In [None]:
import collections
import glob
import json
import openreview
import pandas as pd
import random
import tqdm

random.seed(37)

In [None]:
SUBSETS = "train dev test".split()
TOTAL_REQUIRED_MAP = {
    "train":1,
    "dev":1,
    "test":2
}

# Continuing annotators: anno3, anno10

ExistingAnnotation = collections.namedtuple("ExistingAnnotation",
                                            "subset forum_id review_id anno".split())
existing_annotation_dicts = []
for subset in SUBSETS:
    subset_dir = "".join(["/Users/nnayak/Downloads/0517_split_2/",
                          subset, "/*"])
    for filename in sorted(glob.glob(subset_dir)):
        with open(filename, 'r') as f:
            obj = json.load(f)
            existing_annotation_dicts.append(
            ExistingAnnotation(subset,obj["metadata"]["forum_id"],
                              obj["metadata"]["review"],
                              obj["metadata"]["anno"]))
existing_annotations = pd.DataFrame.from_dict(existing_annotation_dicts)

In [None]:
guest_client = openreview.Client(baseurl='https://api.openreview.net')

def get_new_annotator(existing_annotators):
    if set('anno3 anno10'.split()) <= set(existing_annotators):
        return "ERROR"
    elif 'anno3' in existing_annotators:
        return 'anno10'
    elif 'anno10' in existing_annotators:
        return 'anno3'
    else:
        return random.choice(['anno3', 'anno10'])
    
def get_top_level_reviews(guest_client, forum_id):
    return set([note.id
            for note in guest_client.get_notes(forum=forum_id)
            if note.replyto == forum_id 
            and "AnonReviewer" in note.signatures[0]])
        
ToBeAnnotated = collections.namedtuple("ToBeAnnotated",
                                       "subset forum_id review_id already_annotated assigned_to".split())
to_be_annotated_dicts = []
for subset in SUBSETS:
    total_required = TOTAL_REQUIRED_MAP[subset]
    this_subset_annotations = existing_annotations[existing_annotations["subset"]  == subset]
    forum_ids = sorted(this_subset_annotations["forum_id"].unique())
    for forum_id in forum_ids:
        this_forum_annotations = existing_annotations[
            existing_annotations["forum_id"] == forum_id]
        new_annotator = get_new_annotator(this_forum_annotations["anno"].unique())
        reviews_annotated = this_forum_annotations["review_id"].unique()
        reviews_to_annotate = get_top_level_reviews(guest_client, forum_id) - set(reviews_annotated)
        if subset == 'test':
            for review in reviews_annotated:
                if len(this_forum_annotations[this_forum_annotations["review_id"] == review]) < 2:
                    to_be_annotated_dicts.append(
                    ToBeAnnotated(subset, forum_id, review, "n/a", new_annotator))
                else:
                    print("yay")
            for review in reviews_to_annotate:
                if new_annotator == "ANY":
                    for annotator in ["anno3", "anno10"]:
                        to_be_annotated_dicts.append(
                            ToBeAnnotated(subset, forum_id, review, "n/a", annotator))
                else:
                    already_annotated_by = this_forum_annotations[this_forum_annotations["review_id"] == review]["anno"].unique()
                    if not already_annotated_by.size:   
                        to_be_annotated_dicts.append(
                            ToBeAnnotated(subset, forum_id, review, "n/a", 'ANY'))
                    elif already_annotated_by[0] == 'anno3':
                        to_be_annotated_dicts.append(
                            ToBeAnnotated(subset, forum_id, review, "n/a", 'anno10'))
                    else:
                        to_be_annotated_dicts.append(
                            ToBeAnnotated(subset, forum_id, review, "n/a", 'anno3'))
        else:
            for review in reviews_to_annotate:
                to_be_annotated_dicts.append(
                    ToBeAnnotated(subset, forum_id, review, "n/a", new_annotator))
                
tba_df = pd.DataFrame.from_dict(to_be_annotated_dicts)

In [None]:
tba_df

In [None]:
REV_MAP = {
    'anno3': 'KG',
    'anno10': 'MAD'
}

for _, row in tba_df.iterrows():
    if 'anno' in row[-1]:
        anno = REV_MAP[row[-1]]
    elif row[-1] == 'ANY':
        anno = random.choice(["KG", 'MAD'])
    else:
        continue
    print(row[2] + "\t" + anno)