## Getting ICLR review data from [Openreview](https://openreview.net/)

In [1]:
import pandas as pd
import csv
import time
import concurrent.futures
from openreview import openreview

In [2]:
path_to_data = "iclr_urls.csv"
iclr_conf_data = pd.read_csv(path_to_data) 
iclr_conf_data.head()

Unnamed: 0,conference,conference_url,n_papers,comments
0,iclr2013_conference,ICLR.cc/2013/conference/-/submission,67,
1,iclr2014_conference,ICLR.cc/2014/conference/-/submission,69,without decisions data
2,iclr2014_workshop,ICLR.cc/2014/workshop/-/submission,19,without decision data
3,iclr2016_workshop,ICLR.cc/2016/workshop/-/submission,125,without decision data
4,iclr2017_conference,ICLR.cc/2017/conference/-/paper.*/acceptance,490,


In [3]:
def save_venue_to_csv(client, venue, csv_filename):
  """Given the url of a venue retrieves the data to a csv file

  Args:
    client (Client object from openreview): Specifies the base URL
      and the login infomation
    venue (string): Each string is a URL to a conference
    csv_filename (string): Name or path for the resulting csv file 
  Yields:
    A csv file name as csv_filename.csv that contains review data.
  """
  submitted_papers = list(openreview.tools.iterget_notes(client, invitation=venue))

  with open(csv_filename+".csv", 'w') as csv_file:
    csv_file.write("title,authors,emails,decision,abstract,pdf,replies\n") # header
    for paper in submitted_papers:
      tmp = paper.to_json()
      forum_id = tmp['forum']
      decision = ""
      content_keys = tmp["content"].keys()
      if 'decision' in content_keys:
        decision = tmp['content']['decision']
      elif 'recommendation' in content_keys:
        decision = tmp['content']['recommendation']
      forum_comments = client.get_notes(forum=str(forum_id))
      writer = csv.writer(csv_file, quoting=csv.QUOTE_ALL)
      row = []
      replies = []
      for comment in forum_comments:
        if 'abstract' in comment.content.keys():
          row.append(comment.content["title"])
          row.append(comment.content['authors'])
          row.append(comment.content['authorids'])
          if 'decision' in comment.content.keys():
            row.append(comment.content['decision'])
          else:
            row.append(decision)
          row.append(comment.content["abstract"])
          row.append(comment.content["pdf"])
        else:
          replies.append(list(comment.content.items()))
      row.append(replies)
      writer.writerow(row)
  csv_file.close()

In [4]:
def retrieve_data_from_paper(client, paper):
  """Given the url of a venue retrieves the data to a csv file

  Args:
    client (Client object from openreview): Specifies the base URL
      and the login infomation
    venue (string): Each string is a URL to a conference
    csv_filename (string): Name or path for the resulting csv file 
    n_workers (optional int): It specifies the number of workers.
    
  Returns:
    A list of strings corresponding to the data fetched from a peper id,
    the list represents a row for the csv file in the following order:
    
    title,authors,emails,decision,abstract,pdf,replies

    Where replies contains in all the replies for the paper as a string
    with list format for example:
    
    [[('title', 'review of Deep Learning'), 
    ('review', "This paper ... )")], 
    [('title', 'review of Deep Learning")]]

  """
  tmp = paper.to_json()
  forum_id = tmp['forum']
  content_keys = tmp["content"].keys()
  decision = ""
  if 'decision' in content_keys:
    decision = tmp['content']['decision']
  elif 'recommendation' in content_keys:
    decision = tmp['content']['recommendation']
  forum_comments = client.get_notes(forum=str(forum_id))
  
  row = []
  replies = []
  for comment in forum_comments:
    if 'abstract' in comment.content.keys():
      row.append(comment.content["title"])
      row.append(comment.content['authors'])
      row.append(comment.content['authorids'])
      if 'decision' in comment.content.keys():
        row.append(comment.content['decision'])
      else:
        row.append(decision)
      row.append(comment.content["abstract"])
      row.append(comment.content["pdf"])
    else:
      replies.append(list(comment.content.items()))
  row.append(replies)
  
  return row

In [5]:
def save_venue_to_csv_parallel(client, venue, csv_filename, n_workers=8):
  '''Given the url of a venue retrieves the data to a csv file

  Args:
    client (Client object from openreview): Specifies the base URL
      and the login infomation
    venue (string): Each string is a URL to a conference
    csv_filename (string): Name or path for the resulting csv file 
    n_workers (optional int): It specifies the number of workers
  '''
  submitted_papers = list(openreview.tools.iterget_notes(client, invitation=venue))

  results = []
  with concurrent.futures.ThreadPoolExecutor(max_workers=n_workers) as executor:
    futures = []
    for paper in submitted_papers:
      futures.append(executor.submit(retrieve_data_from_paper, client, paper))
    for future in concurrent.futures.as_completed(futures):
      results.append(future.result())
  
  with open(csv_filename+".csv", 'w') as csv_file:
    csv_file.write("title,authors,emails,decision,abstract,pdf,replies\n") # header
    writer = csv.writer(csv_file, quoting=csv.QUOTE_ALL)
    for row in results:
      writer.writerow(row)
  csv_file.close()

In [6]:
def save_all_venues_to_csv(client, venues, csv_filenames, n_workers=8, parallel=True):
  '''Given a the list of urls of the venues retrieves all the review data into csv files

  Args:
    client (Client object from openreview): Specifies the base URL
      and the login infomation
    venues (list of strings): Each string is a URL to a conference
    csv_filename (list of strings): Name or path for the resulting csv file 
    n_workers (optional int): It specifies the number of workers
		parallel (bool): To do this parallel using n_workers
  Yields:
    Csv files that contains review data.
  '''

  if parallel:
    for i in range(len(venues)):
      save_venue_to_csv_parallel(client, venues[i], csv_filenames[i], n_workers)
      print("Venue "+str(i)+" done.")
  else:
    for i in range(len(venues)):
      save_venue_to_csv_parallel(client, venues[i], csv_filenames[i])
      print("Venue "+str(i)+" done.")

In [7]:
start = time.time()

# Using guest mode
client = openreview.Client(baseurl='https://openreview.net')

url_list = list(iclr_conf_data["conference_url"])
conference_list = list(iclr_conf_data["conference"])


save_all_venues_to_csv(client, url_list, conference_list, 8)

end = time.time()
print(end - start)

Venue 0 done.
Venue 1 done.
Venue 2 done.
Venue 3 done.
Venue 4 done.
Venue 5 done.
Venue 6 done.
Venue 7 done.
Venue 8 done.
Venue 9 done.
Venue 10 done.
Venue 11 done.
Venue 12 done.
480.1606638431549
