In [3]:
import pandas as pd
import random
import numpy as np
import scipy.stats as stats
import matplotlib.pyplot as plt
import warnings
from matplotlib.ticker import PercentFormatter
import matplotlib.ticker as mtick
warnings.filterwarnings('ignore')
import seaborn as sns
import math
from textwrap import wrap
sns.set_style({'font.family':'serif', 'font.serif':'Times New Roman'})

In [4]:
df = pd.read_csv("../dataset_final.csv", quotechar = '"')

In [5]:
sub = pd.read_csv("../openreview/openreview.csv", quotechar='"')

In [6]:
df['categories'] = sub['categories']

In [7]:
def clean_decision(row):
    if "Reject" == row['decisions'] or "Invite to Workshop Track" == row['decisions'] or "Withdrawn" == row['decisions']:
        return 0
    elif "Poster" in row['decisions']:
        return 1
    elif "Talk" in row['decisions'] or "Oral" in row['decisions']:
        return 2
    else:
        return 3

def calculate_average_score(row):
    ratings = [float(rating) for rating in row['ratings'].split(";")]
    return round(sum(ratings) / len(ratings), 1)

def clean_categories(row):
    if not isinstance(row['categories'], str):
        return '12'
    return row['categories']

data = dict()

for year in range(2017, 2021):
    data[year] = df[df.year==year].copy()
    data[year]['decisions'] = data[year].apply(clean_decision, axis = 1)
    data[year]['average_score'] = data[year].apply(calculate_average_score, axis = 1)
    data[year]['categories'] = data[year].apply(clean_categories, axis = 1)

In [8]:
def get_accept(df, category):
    count = 0
    for row in df.iterrows():
        if not isinstance(row[1]['categories'], str):
            continue
        if str(category) in row[1]['categories'].split(";") and row[1]['decisions'] > 0:
            count += 1
        elif category == 12 and row[1]['categories'] == '12' and row[1]['decisions'] > 0:
            count += 1
    return count

def get_total(df, category):
    count = 0
    for row in df.iterrows():
        if not isinstance(row[1]['categories'], str):
            continue
        if str(category) in row[1]['categories'].split(";"):
            count += 1
        elif category == 12 and row[1]['categories'] == '12':
            count += 1
    return count

In [None]:
accept_rate = dict()

for year in range(2017, 2021):
    ar = []
    for c in range(1, 12):
        accepted, total = get_accept(data[year], c), get_total(data[year], c)
        if total != 0:
            ar.append(accepted / total)
        else:
            ar.append(0)
    accept_rate[year] = ar
    accept_rate[year].append(len(data[year][data[year]['decisions'] > 0]) / len(data[year]))

In [None]:
submit = dict()

for year in [2020]:
    ar = []
    for c in range(1, 12):
        total = get_total(data[year], c)
        print(total)
        ar.append(total / len(data[year]))
    submit[year] = ar

In [None]:
reviewer_x = ['Theory', 'Vision', 'NLP','Adversarial', 'Generative', 'Meta-learning', 'Fairness', 'Generalization', 'Optimization', 'Graph', 'Bayesian', "All"]
# reproducibility level over year
reproducibility_y = [round(rate, 2) for rate in accept_rate[2020]]
# Seaborn plot
sns.set_context('talk')

# create dataframe for plotting
plot_df = pd.DataFrame(dict({'x': reviewer_x, 'y': reproducibility_y}))

# initiailize figure and axis
fig, ax = plt.subplots(figsize=(8, 6))

barplot = sns.barplot(x='x', y='y',data=plot_df, ax=ax, color="royalblue")


# Axis styling.
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['bottom'].set_color('#DDDDDD')
ax.tick_params(bottom=False, left=False)
ax.set_axisbelow(True)
ax.yaxis.grid(True, color='#EEEEEE')
ax.xaxis.grid(False)
ax.set(ylim=(0, 0.5))
for item in ax.get_xticklabels():
    item.set_rotation(75)
for tick in ax.xaxis.get_major_ticks():
    tick.label.set_fontsize(18) 
for tick in ax.yaxis.get_major_ticks():
    tick.label.set_fontsize(12)  
    
for p in ax.patches:
             ax.annotate("%.2f" % p.get_height(), (p.get_x() + p.get_width() / 2., p.get_height()),
                 ha='center', va='center', fontsize=11, color='black', xytext=(0, 7),
                 textcoords='offset points')
        
# Axis labeling
#ax.set_xlabel('Acceptance Rate', labelpad=15, fontsize=14)
barplot.set(xlabel=None)
ax.set_ylabel('Acceptance Rate', labelpad=15, fontsize=23)
#ax.set_title('Reproducibility Level with Different Number of Reviewers in 2020', pad=15)

fig.tight_layout()

fig.savefig("topic_breakdown_1.pdf")

In [None]:
# reproducibility level over year
reproducibility_y = [round(rate, 2) for rate in submit[2020]]

reviewer_x = ['Theory', 'Vision', 'NLP','Adversarial', 'Generative', 'Meta-learning', 'Fairness', 'Generalization', 'Optimization', 'Graph', 'Bayesian']

# Seaborn plot
sns.set_context('talk')

# create dataframe for plotting
plot_df = pd.DataFrame(dict({'x': reviewer_x, 'y': reproducibility_y}))

# initiailize figure and axis
fig, ax = plt.subplots(figsize=(8, 6))

barplot = sns.barplot(x='x', y='y',data=plot_df, ax=ax, color="royalblue")


# Axis styling.
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['bottom'].set_color('#DDDDDD')
ax.tick_params(bottom=False, left=False)
ax.set_axisbelow(True)
ax.yaxis.grid(True, color='#EEEEEE')
ax.xaxis.grid(False)
ax.set(ylim=(0, 0.175))
for item in ax.get_xticklabels():
    item.set_rotation(75)
for tick in ax.xaxis.get_major_ticks():
    tick.label.set_fontsize(18) 
for tick in ax.yaxis.get_major_ticks():
    tick.label.set_fontsize(12)  
    
for p in ax.patches:
             ax.annotate("%.2f" % p.get_height(), (p.get_x() + p.get_width() / 2., p.get_height()),
                 ha='center', va='center', fontsize=11, color='black', xytext=(0, 7),
                 textcoords='offset points')

# Axis labeling
#ax.set_xlabel('Acceptance Rate', labelpad=15, fontsize=14)
barplot.set(xlabel=None)
ax.set_ylabel('Distribution', labelpad=15, fontsize=23)
#ax.set_title('Reproducibility Level with Different Number of Reviewers in 2020', pad=15)

fig.tight_layout()

fig.savefig("topic_breakdown_2.pdf")