In [None]:
import collections
import glob
import json
import openreview
import pandas as pd


import matplotlib.pyplot as plt
import seaborn as sns


dataset_dir = "data_prep/final_dataset/"

SUBSETS = "train dev test".split()

datasets = collections.defaultdict(list)

for subset in SUBSETS:
    for filename in glob.glob(dataset_dir + subset + "/*"):
        with open(filename, 'r') as f:
            datasets[subset].append(json.load(f))
            
all_pairs = sum(datasets.values(), [])

In [None]:
def total_and_average_len(list_of_lists):
    big_list = sum(list_of_lists, [])
    return len(big_list), len(big_list)/len(list_of_lists)

def count_dataset(pairs, subset):
    # TODO: Add double-annotated and adjudicated
    review_total, review_average = total_and_average_len([pair["review_sentences"] for pair in pairs])
    rebuttal_total, rebuttal_average = total_and_average_len([pair["rebuttal_sentences"] for pair in pairs])
    return {
        "subset":subset,
        "pairs": len(pairs),
        "forums": len(set(pair["metadata"]["forum_id"] for pair in pairs)),
        "adjudicated": len([pair for pair in pairs if pair["metadata"]["annotator"] == "anno0"]),
        "review_sentences": review_total,
        "rebuttal_sentences": rebuttal_total,
        "review_avg_sentences": review_average,
        "rebuttal_avg_sentences": rebuttal_average,
        
    }
# Distribution of examples over sets
df_dicts = [count_dataset(pairs, subset) for subset, pairs in datasets.items()]
dataframe = pd.DataFrame.from_dict(df_dicts)

dataframe.round(2).transpose()

In [None]:
# Spearmen

from scipy import stats


spearmen = []
for pair in all_pairs:
    alignment_list = []
    for sentence in pair["rebuttal_sentences"]:
        align_type, align_indices = sentence["alignment"]
        if align_type == "context_sentences":
            for review_index in align_indices:
                alignment_list.append([sentence["sentence_index"], review_index])
    if not alignment_list:
        continue
    a, b = zip(*alignment_list)
    if len(set(a)) == 1 or len(set(b)) == 1:
        continue
    spearmen.append(stats.spearmanr(*zip(*alignment_list)).correlation)
    
sns.histplot(spearmen, bins=100, palette="crest_r")


In [None]:
# Polarity v/s aspect

In [None]:
# Type responded to



In [None]:
# Types of context

CONTIGUOUS, NONCONTIGUOUS = "contiguous noncontiguous".split()

context_counter = collections.Counter()
length_counter = {
    CONTIGUOUS: collections.Counter(),
    NONCONTIGUOUS: collections.Counter()
}
normalized_length_counter = {
    CONTIGUOUS: collections.Counter(),
    NONCONTIGUOUS: collections.Counter()
}



def is_contiguous(alignments):
    relevant_range = list(range(min(alignments), max(alignments) + 1))
    return relevant_range == list(sorted(alignments))

context_type_counter = collections.Counter()

for pair in all_pairs:
    for sentence in pair["rebuttal_sentences"]:
        align_type, align_indices = sentence["alignment"]
        if align_type == "context_sentences":
            assert align_indices
            if is_contiguous(align_indices):
                context_subtype = CONTIGUOUS
                if len(align_indices) == 1:
                    align_type = "single"
                else:
                    align_type = CONTIGUOUS
            else:
                context_subtype = NONCONTIGUOUS
                align_type = NONCONTIGUOUS
                
            context_type_counter[context_subtype] += 1
            length_counter[context_subtype][len(align_indices)] += 1
            normalized_length_counter[context_subtype][
                len(align_indices)/len(pair["review_sentences"])] += 1
        
        context_type_counter[align_type] += 1
        
        
CONTEXT_TYPE_LABEL_MAP = {
    "context_global": "Global context",
"contiguous": "Multiple contiguous",
"single": "Single sentence",
"context_in-rebuttal": "Context in rebuttal",
"noncontiguous": "Multiple non-contiguous",
"context_none": "No context",
"context_error": "Context error",
"context_unknown": "Cannot be determined",
}
            
context_type_df_dicts = []
for k, v in context_type_counter.most_common():
    context_type_df_dicts.append({
        "Context type": CONTEXT_TYPE_LABEL_MAP[k],
        "Num. sents": v,
        "% sents": "{:.2f}%".format(v * 100/sum(context_type_counter.values()))
    })
    
print(pd.DataFrame.from_dict(context_type_df_dicts).to_latex(index=False))

normalized_dataframe_dicts = []
for context_subtype, maps in normalized_length_counter.items():
    for mapped_to, count in maps.items():
        normalized_dataframe_dicts.append({
            "mapped_to":mapped_to,
            "count":count,
            "cat":context_subtype
        })
normalized_df = pd.DataFrame.from_dict(normalized_dataframe_dicts)

length_dataframe_dicts = []
for context_subtype, maps in length_counter.items():
    for mapped_to, count in maps.items():
        length_dataframe_dicts += [{
            "mapped_to":mapped_to,
            "cat":context_subtype
        }] * count
length_df = pd.DataFrame.from_dict(length_dataframe_dicts)
            
fig, axes = plt.subplots(2, 1, figsize=(10,5))

plt.tight_layout()
    
sns.histplot(
    data=length_df, x="mapped_to", hue="cat", ax=axes[0], multiple="dodge", log_scale=[False,True], bins=(length_df["mapped_to"].max()+1), palette="crest")

sns.kdeplot(data=normalized_df, x="mapped_to", hue="cat", multiple="stack", ax=axes[1], palette="crest")

plt.savefig("mappedfrac.png")


In [None]:
print(length_dataframe_dicts)

In [None]:
# Agreeability v/s variance

import numpy as np

def get_agreeability(pair_obj):
  coarse_counter = collections.Counter()
  for sentence in pair_obj["rebuttal_sentences"]:
    coarse_counter[sentence['coarse']] += 1

  if 'concur' not in coarse_counter and 'dispute' not in coarse_counter:
    return {'agreeability': None}

  return {
      "agreeability":
          coarse_counter['concur'] /
          (coarse_counter['concur'] + coarse_counter['dispute'])
  }

forum_info_map = collections.defaultdict(list)

for pair in all_pairs:
    forum_info_map[pair["metadata"]["forum_id"]].append((get_agreeability(pair)["agreeability"],pair["metadata"]["rating"] ))

agreeability_df_dicts = []

agree_var, rating_var, avg_rating = ["Variance in agreeability", "Variance in rating", "Average rating"]
    
for forum, info in forum_info_map.items():
    if len(info) == 1:
        continue
    agreeabilities, ratings = list(zip(*info))
    if None in agreeabilities:
        continue
    agreeability_df_dicts.append({agree_var: np.var(agreeabilities),
                                 rating_var: np.var(ratings),
                                 avg_rating: np.mean(ratings)})
    
agreeability_df = pd.DataFrame.from_dict(agreeability_df_dicts)

high_agreeability_variance_threshold = agreeability_df.quantile(0.9)[agree_var]
low_score_variance_threshold = agreeability_df.quantile(0.9)[rating_var]

print(agreeability_df.quantile(0.75)[rating_var])

ax = sns.scatterplot(data=agreeability_df, x=agree_var, y=rating_var, hue=avg_rating, palette="crest")
plt.plot([high_agreeability_variance_threshold,high_agreeability_variance_threshold], [0,8.5], linestyle="dashed", color="green")
plt.plot([0,0.25], [low_score_variance_threshold,low_score_variance_threshold], linestyle="dashed", color="green")

plt.savefig("agreeability_vs_rating.pdf")

In [None]:


j = collections.defaultdict(lambda:collections.Counter())

for example in all_pairs:
    review_coarse_labels = [sentence["coarse"] for sentence in example["review_sentences"]]
    for rebuttal_sentence in example["rebuttal_sentences"]:
        fine = rebuttal_sentence["fine"]
        align_type, aligned_idxs = rebuttal_sentence["alignment"]
        if align_type == "context_sentences":
            for aligned_idx in rebuttal_sentence["alignment"][1]:
                try:
                    j[fine][review_coarse_labels[aligned_idx]] += 1
                except IndexError:
                    pass

import matplotlib.pyplot as plt


review_types = "arg_request arg_evaluative arg_fact arg_structuring arg_other".split()

full_eval_responses = [
    "rebuttal_accept-praise",
"rebuttal_concede-criticism",
"rebuttal_mitigate-criticism",
"rebuttal_reject-criticism",
]

full_request_responses = [
    "rebuttal_answer",
"rebuttal_by-cr",
"rebuttal_done",
"rebuttal_future",
"rebuttal_refute-question",
"rebuttal_reject-request",
]

eval_responses = [
"rebuttal_concede-criticism",
]

request_responses = [
    "rebuttal_answer",
"rebuttal_done",
]



k=sns.color_palette("crest", 2)
print(k)

fig, axes = plt.subplots(nrows=len(eval_responses + request_responses), ncols=1, figsize=(10,8))

print("\n".join(j.keys()))

ax_count = 0
for key in sorted(eval_responses) + sorted(request_responses):
    vals = j[key]
    if key in eval_responses:
        color = k[0]
    elif key in request_responses:
        color = k[1]
    else:
        continue
    sns.barplot(x=review_types, y=[vals[i] for i in review_types],ax=axes[ax_count],color=color)
    axes[ax_count].set_ylabel(key[9:])
    ax_count += 1
    
plt.savefig("transitions.pdf")
    

# Appendices

In [None]:
# Review and rebuttal length

big_list = sum(datasets.values(), [])

length_counter = collections.Counter()

for example in big_list:
    length_counter[(len(example["review_sentences"]), 
                    len(example["rebuttal_sentences"]))] += 1
df = pd.DataFrame.from_dict([
    {
        "review_length": a,
        "rebuttal_length": b,
        "count": count
    } for (a, b), count in length_counter.items()
])
sns.jointplot(x=df.review_length, y=df.rebuttal_length, cmap="Blues", kind='hist')
plt.show()
plt.savefig('reb-rev-len.png')

In [None]:
import tqdm
guest_client = openreview.Client(baseurl='https://api.openreview.net')

def get_num_reviews(guest_client, forum_id):
    forum_notes = guest_client.get_notes(forum=forum_id)
    review_count = 0
    for note in forum_notes:
        if note.replyto == forum_id and 'Reviewer' in note.signatures[0]:
            review_count += 1
    return review_count

# for subset, examples in datasets.items():
#     forum_counter = collections.defaultdict(list)
#     for example in tqdm.tqdm(examples[:100]):
#         forum_counter[example["metadata"]["forum_id"]].append(example["metadata"]["review_id"])
    
    
#     percentage_annotated_list = []

#     for forum, annotated_reviews in tqdm.tqdm(forum_counter.items()):
#         percentage_annotated_list.append(len(annotated_reviews)/get_num_reviews(guest_client, forum))
        
#     sns.histplot(data=percentage_annotated_list)
#     break


In [None]:
# Annotator confusion
