In [13]:
import collections
from datetime import datetime
import hashlib
import json
import openreview
import os
import pandas as pd
import tqdm

pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_rows', 1000)

INVITATION = 'ICLR.cc/2019/Conference/-/Blind_Submission'

invitation_map = {"ICLR_2019": 'ICLR.cc/2019/Conference/-/Blind_Submission'}

LIMIT = 10  # Number of papers to build timelines for
pdfs_dir = "another_dir/"

# A client is required for any OpenReview API actions
guest_client = openreview.Client(baseurl='https://api.openreview.net')

# ==== HELPERS

def maybe_transform_date(timestamp):
  if timestamp is None:
    return None
  else:
    return datetime.fromtimestamp(int(timestamp/1000)).strftime("%m/%d/%Y, %H:%M:%S")


def get_initiator(note):
    return note.signatures[0].split("/")[-1]
    
class PDFStatus(object):
  AVAILABLE = "available"
  DUPLICATE = "duplicate"
  FORBIDDEN = "forbidden"
  NOT_FOUND = "not_found"
  NOT_APPLICABLE = "not_applicable"
  
  
class EventType(object):
  COMMENT = "comment"
  ARTIFACT = "artifact"

def make_metadata_dict(reference, file_path, reference_index, pdf_status, event_type):
  return {
    "forum": reference.forum,
    "initiator": get_initiator(reference),
    "identifier": reference.id,
    "tcdate": reference.tcdate,
    "tmdate": reference.tmdate,
    "reply_to": reference.replyto,
    "reference_index": reference_index,
    "pdf_status": pdf_status,
    "event_type": event_type,
    "filepath": file_path,
  }

ERROR_STATUS_LOOKUP = {
  "ForbiddenError": PDFStatus.FORBIDDEN,
  "NotFoundError": PDFStatus.NOT_FOUND,
}
      
def write_artifact(conference, note_id, reference, reference_index, directories, checksum_map):
  print(reference.original, note_id, reference.id)
  pdf_path = make_path(directories, f'artifact_{note_id}_{reference_index}.pdf')
  is_reference = not reference.id == reference.forum
  try: # try to get the PDF for this reference
    pdf_binary = guest_client.get_pdf(reference.id, is_reference=is_reference)
    this_checksum = hashlib.md5(pdf_binary).hexdigest()
    found = False
    for other_pdf_path, other_checksum in checksum_map.items():
      if other_checksum == this_checksum:
        pdf_path = other_pdf_path
        pdf_status = PDFStatus.DUPLICATE
        found = True
    if not found:
      checksum_map[pdf_path] = this_checksum
      with open(pdf_path, 'wb') as file_handle:
        file_handle.write(pdf_binary)
      pdf_status = PDFStatus.AVAILABLE
  except openreview.OpenReviewException as e:
    error_name = e.args[0]['name']
    pdf_status = ERROR_STATUS_LOOKUP[error_name]
      
  return checksum_map, make_metadata_dict(reference, pdf_path, reference_index, pdf_status, EventType.ARTIFACT)


def make_path(directories, filename=None):
  directory = os.path.join(*directories)
  os.makedirs(directory, exist_ok=True)
  if filename is not None:
    return os.path.join(*directories, filename)
    


def write_comment(conference, note_id, reference, reference_index, directories):
  json_path = make_path(directories, f'comment_{note_id}_{reference_index}.json')
  is_reference = not reference.id == reference.forum
  with open(json_path, 'w') as f:
    json.dump(reference.content, f)
  return make_metadata_dict(reference, json_path, reference_index, PDFStatus.NOT_APPLICABLE, EventType.COMMENT)
  
def get_references_in_order(references):
  return list(sorted(references, key=lambda x: x.tmdate))


events = []

forum_to_events_map = collections.defaultdict(list)

for conference, invitation in invitation_map.items():
  forum_notes = list(openreview.tools.iterget_notes(guest_client, invitation=invitation))[:LIMIT]
  for forum_note in tqdm.tqdm(forum_notes):
    dir_path = [pdfs_dir, conference, forum_note.id]
    make_path(dir_path)
    # Get PDFs from main submission/ BlindSubmission (?)
    checksum_map = {}
    references = get_references_in_order(guest_client.get_references(referent=forum_note.id, original=True))
    for ref_i, reference in enumerate(references): # We don't run this with original=False. This would give us the revisions to the Blind Submission itself, rather than the original submission note.
      checksum_map, row = write_artifact(conference, forum_note.id, reference, ref_i, dir_path, checksum_map)
      events.append(row)
    
    # Get PDFs from comments
    for note in guest_client.get_notes(forum=forum_note.id):
      if note.id == forum_note.id: # Already done above
        
        print(len(list(guest_client.get_references(referent=note.id, original=False))))
      references = get_references_in_order(guest_client.get_references(referent=note.id, original=False))
      if len(references) == 1:
        continue
      for ref_i, reference in enumerate(references):
        print(reference)
        
        events.append(write_comment(conference, note.id, reference, ref_i, dir_path))
      print("+" * 80)
df = pd.DataFrame.from_dict(events)


  0%|                                                      | 0/10 [00:00<?, ?it/s]

HkeaLA39Ym rJl0r3R9KX BJpIR25YX
HkeaLA39Ym rJl0r3R9KX Sk_lKX9Tm
HkeaLA39Ym rJl0r3R9KX ry79jm5pQ
HkeaLA39Ym rJl0r3R9KX ByCx70sam
HkeaLA39Ym rJl0r3R9KX BJVDsE367
HkeaLA39Ym rJl0r3R9KX rkXUYERSN
HkeaLA39Ym rJl0r3R9KX BkaTYNRS4
HkeaLA39Ym rJl0r3R9KX rtZtjY_fk9
{'cdate': 1541203171721,
 'content': {'confidence': '3: The reviewer is fairly confident that the '
                           'evaluation is correct',
             'rating': '7: Good paper, accept',
             'review': 'Authors proposes a new algorithm for improving the '
                       'stability of class importance weighting estimation '
                       'procedure (Lipton et al., 2018) with a two-step '
                       'procedure. The reparamaterization of w using the '
                       'weight shift theta and lambda allows authors develop a '
                       'generalization upperbound with terms rely on theta, '
                       'sigma and lambda. \n'
                       '\n'
       

 10%|████▌                                         | 1/10 [00:06<01:00,  6.69s/it]

Hyl5Mc3qtX SylCrnCcFX Hk9Mq3ctm
Hyl5Mc3qtX SylCrnCcFX Bk70YO_6X
Hyl5Mc3qtX SylCrnCcFX SJyIUFu67
Hyl5Mc3qtX SylCrnCcFX Hysw9K_pX
Hyl5Mc3qtX SylCrnCcFX BkokwMAHN
Hyl5Mc3qtX SylCrnCcFX H1IEk7RBN
Hyl5Mc3qtX SylCrnCcFX HyMkSERBE
Hyl5Mc3qtX SylCrnCcFX r1V_Zj184
Hyl5Mc3qtX SylCrnCcFX SJd008eUE
Hyl5Mc3qtX SylCrnCcFX rZ4W1cOzJ5
{'cdate': 1541095999208,
 'content': {'confidence': '3: The reviewer is fairly confident that the '
                           'evaluation is correct',
             'rating': '6: Marginally above acceptance threshold',
             'review': 'A key challenge that presents the deep learning '
                       'community is that state-of-the-art solutions are '
                       'oftentimes associated with unstable derivatives, '
                       'compromising the robustness of the network. In this '
                       'paper, the author(s) explore the problem of how to '
                       'train a neural network with stable derivatives by '
     

1


 20%|█████████▏                                    | 2/10 [00:19<01:20, 10.07s/it]

SkgjyZRcKm H1xAH2RqK7 SysJWR5tm
SkgjyZRcKm H1xAH2RqK7 B1UYZr9CX
SkgjyZRcKm H1xAH2RqK7 BrBNIO_M15
1


 30%|█████████████▊                                | 3/10 [00:23<00:53,  7.63s/it]

BJe1HWOtK7 HJeABnCqKQ ByJHWdFFQ
BJe1HWOtK7 HJeABnCqKQ SkitUh_RX
BJe1HWOtK7 HJeABnCqKQ rUqtZ_Ozkc
{'cdate': 1541173479791,
 'content': {'confidence': '4: The reviewer is confident but not absolutely '
                           'certain that the evaluation is correct',
             'rating': '4: Ok but not good enough - rejection',
             'review': 'This paper presents an incremental extension to the '
                       'Self-imitation paper by Oh, Junhyuk, et al. The '
                       'previous paper combined self-imitation learning with '
                       'actor-critic methods, and this paper directly '
                       'integrates the idea into the generative adversarial '
                       'imitation learning framework.\n'
                       '\n'
                       'I think the idea is interesting, but there remains '
                       'some issues very unclear to me. In the algorithms, '
                       'when updating the good 

 40%|██████████████████▍                           | 4/10 [00:28<00:38,  6.44s/it]

HyeS8-RqFX SyVpB2RqFX r1HU-A5Y7
HyeS8-RqFX SyVpB2RqFX Sy4xxlcRm
HyeS8-RqFX SyVpB2RqFX BkR_MeqRQ
HyeS8-RqFX SyVpB2RqFX ryp-CmqRQ
HyeS8-RqFX SyVpB2RqFX rJtEA75Rm
HyeS8-RqFX SyVpB2RqFX S1Xn5DcAX
HyeS8-RqFX SyVpB2RqFX HybKcu5CQ
HyeS8-RqFX SyVpB2RqFX HJdn3O5R7
HyeS8-RqFX SyVpB2RqFX SJ01A_5Cm
HyeS8-RqFX SyVpB2RqFX rMfttddzJ9
HyeS8-RqFX SyVpB2RqFX SVg38GBp3f


 50%|███████████████████████                       | 5/10 [00:38<00:39,  7.91s/it]

1
S1gVAfRqtQ SJf6BhAqK7 HkV0GRcF7
S1gVAfRqtQ SJf6BhAqK7 rJHKE6707
S1gVAfRqtQ SJf6BhAqK7 rygRC1q07
{'cdate': 1541201397788,
 'content': {'confidence': '4: The reviewer is confident but not absolutely '
                           'certain that the evaluation is correct',
             'rating': '5: Marginally below acceptance threshold',
             'review': 'Review Summary\n'
                       '--------------\n'
                       'While the focus on variadic learning is interesting, I '
                       'think the present version of the paper needs far more '
                       'presentational polish as well as algorithmic '
                       'improvements before it is ready for ICLR. I think '
                       'there is the potential for some neat ideas here and I '
                       'hope the authors prepare stronger versions in the '
                       'future. However, the current version is unfortunately '
                       'not compreh

1


 60%|███████████████████████████▌                  | 6/10 [00:44<00:28,  7.11s/it]

rklzyMA5FQ H1faSn0qY7 HJQZM7GAQ
rklzyMA5FQ H1faSn0qY7 BkGkGRqtQ
rklzyMA5FQ H1faSn0qY7 HJ_1aTN0m
rklzyMA5FQ H1faSn0qY7 r1eKl0EAQ
rklzyMA5FQ H1faSn0qY7 r89l_uuG15
{'cdate': 1541177275996,
 'content': {'confidence': '2: The reviewer is willing to defend the '
                           'evaluation, but it is quite likely that the '
                           'reviewer did not understand central parts of the '
                           'paper',
             'rating': '5: Marginally below acceptance threshold',
             'review': 'In this paper the authors propose DL2 a system for '
                       'training and querying neural networks with logical '
                       'constraints\n'
                       '\n'
                       'The proposed approach is intriguing but in my humble '
                       'opinion the presentation of the paper could be '
                       'improved. Indeed I think that the paper is bit too '
                       'hard to follo

 70%|████████████████████████████████▏             | 7/10 [00:49<00:19,  6.51s/it]

S1gEek2qKQ HJgTHnActQ SyEeyh9Fm
S1gEek2qKQ HJgTHnActQ ry7dCV5AX
S1gEek2qKQ HJgTHnActQ BWEBK__f1c
{'cdate': 1541023719954,
 'content': {'confidence': '4: The reviewer is confident but not absolutely '
                           'certain that the evaluation is correct',
             'rating': '4: Ok but not good enough - rejection',
             'review': 'Summary--\n'
                       'The paper tries to address an issue existing in '
                       'current image-to-image translation at the point that '
                       'different regions of the image should be treated '
                       'differently. In other word, background should not be '
                       'transferred while only foreground of interest should '
                       'be transferred. The paper propose to use '
                       'co-segmentation to find the common areas to for image '
                       'translation. It reports the proposed method works '
                     

1


 80%|████████████████████████████████████▊         | 8/10 [00:54<00:12,  6.06s/it]

ByxzSCs5K7 HylTBhA5tQ rJGBRocFm
ByxzSCs5K7 HylTBhA5tQ BJT6KpfTQ
ByxzSCs5K7 HylTBhA5tQ HJUM96M6Q
ByxzSCs5K7 HylTBhA5tQ B1ncGoyAQ
ByxzSCs5K7 HylTBhA5tQ Sk3cLIcAQ
ByxzSCs5K7 HylTBhA5tQ B1F2VV5xV
ByxzSCs5K7 HylTBhA5tQ BkE1GKcgN
ByxzSCs5K7 HylTBhA5tQ HyWTqT9gE
ByxzSCs5K7 HylTBhA5tQ SJuL7qV-V
ByxzSCs5K7 HylTBhA5tQ Byo3z7sbN
ByxzSCs5K7 HylTBhA5tQ BkpZGq3ZN
ByxzSCs5K7 HylTBhA5tQ BJ64z53ZE
ByxzSCs5K7 HylTBhA5tQ HJtIL9h-4
{'cdate': 1540864380131,
 'content': {'confidence': '4: The reviewer is confident but not absolutely '
                           'certain that the evaluation is correct',
             'rating': '5: Marginally below acceptance threshold',
             'review': 'In this paper, the authors associated with the '
                       'generalization gap of robust adversarial training with '
                       'the distance between the test point and the manifold '
                       "of training data. A so-called 'blind-spot attack' is "
                       'proposed 

 90%|█████████████████████████████████████████▍    | 9/10 [01:04<00:07,  7.05s/it]

HJlhT3T5K7 B1gTShAct7 H1hanTcKQ
HJlhT3T5K7 B1gTShAct7 H1D0419Am
HJlhT3T5K7 B1gTShAct7 SJ2DBlcCX
HJlhT3T5K7 B1gTShAct7 BJbHkMqR7
HJlhT3T5K7 B1gTShAct7 ByvlvH0B4
HJlhT3T5K7 B1gTShAct7 BJRVurRBV
HJlhT3T5K7 B1gTShAct7 Sk0JX8AHV
HJlhT3T5K7 B1gTShAct7 H1XY9BfFE
HJlhT3T5K7 B1gTShAct7 rkmbRChY4
HJlhT3T5K7 B1gTShAct7 BkmS7QtiE
HJlhT3T5K7 B1gTShAct7 H5zeuuGyc
{'cdate': 1541369820403,
 'content': {'confidence': '5: The reviewer is absolutely certain that the '
                           'evaluation is correct and very familiar with the '
                           'relevant literature',
             'rating': '5: Marginally below acceptance threshold',
             'review': 'The transfer/ interference perspective of lifelong '
                       'learning is well motivated, and combining the '
                       'meta-learning literature with the continual learning '
                       'literature (applying reptile twice), even if seems '
                       "obvious, wasn't explo

100%|█████████████████████████████████████████████| 10/10 [01:14<00:00,  7.43s/it]

1





In [None]:
df

* My current understanding is that using `client.get_references` should get me all revisions of a note.
  * If it's the top note of a forum (i.e. `note.id == note.forum`, then it contains revisions of the submission pdf.
  * If it's any other note, then it contains revisions of the comment
* I hoped to get some information about the meanings of these by looking at them on the OpenReview website using permalinks, but as I understand it the individual referent_ids are not visible on the website. However, I can go to https://openreview.net/revisions?id={forum_id} or https://openreview.net/revisions?id={note_id} and see them listed.

# Questions

* I can't figure out what the `original` flag does. It's supposed to "additionally return references to the original note" per [this file](https://github.com/openreview/openreview-py/blob/db643b015e0f46aec66c2b1227fe0cd2e21b621d/openreview/openreview.py#L1091).
  * However, sometimes it does not result in any change in the result (e.g. line 10 and 11 in the table, almost the same function call with `original` set to either true or false results in the same result both times)
  * In other cases, setting `original` to true or false results in completely disjoint sets of things returned, e.g. lines 22-29 and line 30)
* I'm also trying to understand how the different date fields pertain to the modifications.
  * It seems that `mdate` is always None, is that expected?
  * For something with revisions, e.g. r1ghyrDc2m in lines 14-17 ([link to revisions](https://openreview.net/revisions?id=r1ghyrDc2m)), I would expect the modification date to be available in tmdate, perhaps, so something like November 6th. However tcdate, tmdate, and cdate all have the same value for all revisions. Where would the November 6th date referred to on the revisions page be stored?

In [None]:
# import os
# import tqdm
# pdfs_dir = "another_dir/"
# for i, forum_note in enumerate(openreview.tools.iterget_notes(
#         guest_client, invitation=INVITATION)):
#     write_pdf_to_file(guest_client, forum_dir, forum_note)
#     for revision in tqdm.tqdm(guest_client.get_references(
#             referent=forum_note.id, original=True),
#             desc="Getting revisions for {0}".format(forum_note.id)):
#         try:
#             write_pdf_to_file(guest_client, forum_dir, revision)
#         except openreview.OpenReviewException as e:
#             print(e)
#             continue
#     if i == LIMIT + 1:
#         break