In [1]:
import openreview
import re
import unicodecsv as csv
import os
import requests

In [3]:
client = openreview.Client(baseurl="https://openreview.net")

outputdir = './tpms-dump-final'

In [None]:
def get_profiles(groupname):
    print "groupname:",groupname
    members = client.get_group(groupname).members
    
    p = re.compile('~.*')

    canonical_ids = []
    missing_users = []
    for m in members:
        m = m.strip()
        try:
            print('m:',m)
            client.get_group(m)
            g = client.get_group(m)
            groupmembers = [member for member in g.members if p.match(member)]

            if len(groupmembers) ==0:
                print "No canonical IDs found for member ",m
                missing_users.append(m)
            else:
                if len(groupmembers) > 1:
                    print "More than one canonical ID found for member ",m,"; Using first ID in the list."

                tildeId = groupmembers[0]
                canonical_ids.append(tildeId)

        except openreview.OpenReviewException as e:
            missing_users.append(m)
            print e

    profiles = []

    for id in canonical_ids:
        print "id: ",id
        profiles.append(client.get_note(id).to_json())
    
    return profiles,missing_users

In [None]:
def dump_names(profiles,outfilename,missingTuple=None):
    if not os.path.exists(outputdir):
        os.makedirs(outputdir)

    with open(outputdir+'/'+outfilename,'wb') as outfile:
        csvwriter = csv.writer(outfile, delimiter=',')
        for profile in profiles:
            p = profile['content']
            name = p['names'][0]
            firstname = name['first']
            lastname = name['last']
            email = p['emails'][0]
            csvwriter.writerow([email,firstname,lastname])
        if missingTuple !=None:
            missingList = missingTuple[0]
            missingMap = missingTuple[1]
            for email in missingList:
                try:
                    csvwriter.writerow([email,missingMap[email][0],missingMap[email][1]])
                except KeyError as e:
                    print "Key error found on email ",email,': ',e

In [7]:
def write_pdfs(papers):
    external = re.compile('http.*')
    internal = re.compile('\/pdf\/.*')
    for s in papers:
        if external.match(s['content']['pdf']):
            r = requests.get(s['content']['pdf'])
        elif(internal.match(s['content']['pdf'])):
            r = requests.get(client.baseurl+'/pdf?id='+s['id'], headers=client.headers)
        else:
            print "Couldn't get PDF for note ",s['id']
            
        print "number",s['number'],"response",r
        if not os.path.exists(outputdir+'/pdfs'):
            os.makedirs(outputdir+'/pdfs')
        with open(outputdir+'/pdfs/paper'+str(s['number'])+'.pdf', 'wb') as f:
            f.write(r.content)
            f.close()

In [None]:
def get_conflicts(profile):
    conflicts = set()
    for h in profile['content']['history']:
        conflicts.add(h['institution']['domain'])
    for e in profile['content']['emails']:
        domain = e.split('@')[1]
        conflicts.add(domain)
    return conflicts

In [None]:
def dump_conflicts(profiles, missing, papers, outfilename):
    with open(outputdir+'/'+outfilename,'wb') as outfile:
        csvwriter=csv.writer(outfile,delimiter=',')
        for profile in profiles:
            email = profile['content']['emails'][0]
            profile_conflicts = get_conflicts(profile)
            for paper in papers:
                paper_id = str(paper['number'])
                if not profile_conflicts.isdisjoint(paper['content']['conflicts']):
                    print "conflict detected:",email,profile_conflicts,paper['content']['conflicts']
                    csvwriter.writerow([paper_id,email])
        
        for email in missing:
            domain = email.split('@')[1]
            for paper in papers:
                paper_id = str(paper['number'])
                if domain in paper['content']['conflicts']:
                    csvwriter.writerow([paper_id,email])

In [None]:
def dump_missing(list):
    with open(outputdir+'/missing-dump.csv','wb') as outfile:
        for email in list:
            csvwriter=csv.writer(outfile,delimiter=',')
            first = all_reviewers[email][0]
            last = all_reviewers[email][1]
            csvwriter.writerow([])

In [None]:
client.user

In [None]:
reviewer_profiles,missing_reviewers = get_profiles("ICLR.cc/2017/conference/reviewers")

In [None]:
reviewer_profiles

In [None]:
areachair_profiles,missing_areachairs = get_profiles("ICLR.cc/2017/areachairs")


In [None]:
missing_areachairs

In [None]:
missingReviewerMap = {}
with open('./iclr_all_reviewers.csv', 'rb') as csvfile:
    reader = csv.reader(csvfile, delimiter=',', quotechar='|')
    c=0
    for row in reader:
        if c>0:
            print row
            missingReviewerMap[str(row[2])]=row[0:2]
        c+=1
        
missingAreachairMap = {}
with open('./iclr_area_chairs.csv', 'rb') as csvfile:
    reader = csv.reader(csvfile, delimiter=',', quotechar='|')
    c=0
    for row in reader:
        if c>0:
            print row
            missingAreachairMap[str(row[2])]=row[0:2]
        c+=1
      

In [None]:
papers = [s.to_json() for s in client.get_notes(invitation='ICLR.cc/2017/conference/-/submission')]

        
dump_names(reviewer_profiles,"reviewer-dump.csv",(missing_reviewers,missingReviewerMap))
dump_names(areachair_profiles,"areachair-dump.csv",(missing_areachairs,missingAreachairMap))
write_pdfs(papers)
dump_conflicts(reviewer_profiles,missing_reviewers+missing_areachairs,papers,"conflicts-dump.csv")

In [4]:
papers = [s.to_json() for s in client.get_notes(invitation='ICLR.cc/2017/conference/-/submission')]

In [5]:
len(papers)

490

In [8]:
write_pdfs(papers[:5])

number 610 response <Response [200]>
number 609 response <Response [200]>
number 608 response <Response [200]>
number 607 response <Response [200]>
number 606 response <Response [200]>


In [10]:
acceptances = client.get_notes(invitation = 'ICLR.cc/2017/conference/-/paper.*/acceptance')

In [19]:
oral_acceptances = [n.forum for n in acceptances if n.content['decision'] == 'Accept (Oral)']

In [20]:
len(oral_acceptances)

15

In [21]:
poster_acceptances = [n.forum for n in acceptances if n.content['decision'] == 'Accept (Poster)']

In [22]:
len(poster_acceptances)

183

In [24]:
oral_papers = [n for n in papers if n['id'] in oral_acceptances]

In [25]:
len(oral_papers)

15

In [26]:
write_pdfs(oral_papers)

number 597 response <Response [200]>
number 573 response <Response [200]>
number 472 response <Response [200]>
number 428 response <Response [200]>
number 415 response <Response [200]>
number 386 response <Response [200]>
number 270 response <Response [200]>
number 259 response <Response [200]>
number 251 response <Response [200]>
number 170 response <Response [200]>
number 116 response <Response [200]>
number 76 response <Response [200]>
number 45 response <Response [200]>
number 29 response <Response [200]>
number 10 response <Response [200]>


In [27]:
poster_papers = [n for n in papers if n['id'] in poster_acceptances]

In [28]:
len(poster_papers)

183

In [29]:
write_pdfs(poster_papers)

number 610 response <Response [200]>
number 606 response <Response [200]>
number 601 response <Response [200]>
number 595 response <Response [200]>
number 593 response <Response [200]>
number 590 response <Response [200]>
number 589 response <Response [200]>
number 585 response <Response [200]>
number 584 response <Response [200]>
number 582 response <Response [200]>
number 580 response <Response [200]>
number 578 response <Response [200]>
number 571 response <Response [200]>
number 566 response <Response [200]>
number 555 response <Response [200]>
number 553 response <Response [200]>
number 549 response <Response [200]>
number 548 response <Response [200]>
number 541 response <Response [200]>
number 539 response <Response [200]>
number 534 response <Response [200]>
number 533 response <Response [200]>
number 531 response <Response [200]>
number 525 response <Response [200]>
number 520 response <Response [200]>
number 519 response <Response [200]>
number 516 response <Response [200]>
n