In [1]:
import csv
import collections
import json
import openreview
import openreview_lib as orl
import os
import sys
import tqdm

In [3]:
with open('../../data/review_rebuttal_pair_dataset/unstructured_train.json', 'r') as f:
    pairs = json.load(f)
    
pairs_by_forum = collections.defaultdict(list)
for pair in pairs["review_rebuttal_pairs"]:
    pairs_by_forum[pair["forum"]].append(pair)

FileNotFoundError: [Errno 2] No such file or directory: '../../data/review_rebuttal_pair_dataset/unstructured_train.json'

In [None]:
confidence_map = collections.defaultdict(list)
guest_client = openreview.Client(baseurl='https://api.openreview.net')
for forum in tqdm.tqdm(pairs_by_forum.keys()):
    forum_comments = guest_client.get_notes(forum=forum)
    for comment in forum_comments:
        print(comment.content.keys())
        if "Area_Chair" in orl.flatten_signature(comment) and 'recommendation' in comment.content:
            confidence = comment.content["confidence"]
            confidence_map[confidence].append(forum)
            break
dsds

In [None]:
for unsure_forum in confidence_map["2: The area chair is not sure"]:
    unsure_pairs = pairs_by_forum[unsure_forum]
    ratings = [pair["labels"]["rating"] for pair in unsure_pairs]
    rating_range = max(ratings) - min(ratings)
    if rating_range >= 3:
        rating_list = " ".join([str(i) for i in sorted(ratings)])
        print(",".join([str(rating_range), rating_list,
                         unsure_pairs[0]["title"],
                         "https://openreview.net/forum?id="+unsure_forum]))
            

In [None]:
SELECTED_PRELIMINARY_ANNOTATION_FORUM = "HJg6e2CcK7"

In [None]:
def get_text_as_string(tokenized_text, review_or_rebuttal):
    if review_or_rebuttal == "review":
        prefix = "V"
    else:
        assert review_or_rebuttal == "rebuttal"
        prefix = "B"
    text_builder = []
    for para in tokenized_text:
        for sentence in para:
            text_builder.append(" ".join(sentence))
        text_builder.append("")

    return [
            (prefix + str(i), sentence)
            for i, sentence in enumerate(text_builder)]

def pad(text, max_len):
    return text + [("", "")] * (max_len - len(text))

FIELD_NAMES = ("Review_index Review_sentence Grounding Review_affordance ---- "
               "Rebuttal_index Rebuttal_sentence Grounding Related_to Relation Notes").split()

def build_csv_lines(review_text, rebuttal_text):
    assert len(review_text) == len(rebuttal_text)
    lines = []
    for ((review_i, review_sentence),
            (rebuttal_i, rebuttal_sentence)) in zip(review_text, rebuttal_text):
        lines.append((review_i, review_sentence, "", "", "", rebuttal_i,
            rebuttal_sentence, "", "", "", ""))
    return lines


def build_lines(pair):
    review_text = get_text_as_string(pair["review_text"],
            "review")
    rebuttal_text = get_text_as_string(pair["rebuttal_text"],
            "rebuttal")
    max_len = max([len(rebuttal_text), len(review_text)])
    padded_review_text = pad(review_text, max_len)
    padded_rebuttal_text = pad(rebuttal_text, max_len)
    csv_lines = build_csv_lines(padded_review_text,
            padded_rebuttal_text)
    return csv_lines

In [None]:
for pair in pairs_by_forum[SELECTED_PRELIMINARY_ANNOTATION_FORUM]:
    csv_lines = build_lines(pair)
    filename = "".join([
        "pre_pilot/template_", SELECTED_PRELIMINARY_ANNOTATION_FORUM, "_", pair["review_author"], ".csv"])
    with open(filename, 'w', encoding='utf8') as f:
        writer = csv.writer(f)
        writer.writerow(FIELD_NAMES)
        for line in csv_lines:
            writer.writerow(line)