In [3]:
import pandas as pd
import statistics
import matplotlib.pyplot as plt
from sklearn.metrics import cohen_kappa_score
import numpy as np
from scipy.stats import bootstrap
from math import sqrt

In [4]:
def agreement_proportion(matrix: pd.DataFrame):
    pe = 0
    k = len(matrix)-1
    for i in range(k):
        pe += matrix.loc[i, k] * matrix.loc[k, i]
    return pe

def observed_proportion(matrix: pd.DataFrame):
    po = 0
    k = len(matrix) - 1
    for i in range(k):
        po += matrix.loc[i, i]
    return po


def sd_cohen(po, pe, matrix):
    return sqrt((po*(1-po))/((1-pe)*(1-pe)))

def ci(x1, x2):
    return (1-x1)*(1-x2)

def sd_fleiss(po, pe, matrix):

    y1 = 0
    y2 = 0 
    k = len(matrix) -1

    for i in range (k):
        for j in range(k):
            if j != i:
                carre = (matrix.loc[k,i] + matrix.loc[j,k])
                carre = carre*carre
                y1 += matrix.loc[i,j] * carre
        carre2 = (matrix.loc[k,i] + matrix.loc[i,k])  
        carre2 = carre2*carre2   
        y2 += matrix.loc[i,i] * carre2

    y3 = (po*pe - (2*pe) + po)
    y3 = y3*y3

    x = po*ci(pe,pe) + ci(po, po)* y1 -2*ci(pe,po)*y2 - y3
    res = sqrt(x)
    
    return res/((1-pe)*(1-pe))


def kappa(po, pe):
    return (po-pe)/(1-pe)


def confidence_interval(matrix: pd.DataFrame, func:callable):
    
    po = observed_proportion(matrix)
    pe = agreement_proportion(matrix)

    se = func(po, pe, matrix)/ sqrt(len(matrix))

    low = -1.96 * se(matrix, func) + kappa(po, pe)
    high = 1.96 * se(matrix, func) + kappa(po, pe)

    return low, high


In [1]:
list_categories = [
    "Models and algorithms",
    "Datasets",
    "Code",
    "Experimental results",
    "Error bars or statistical significance",
    #"Statement",
    "Comments",
    "Meta-categories",
]
columns = ['category', 'review 1', 'review 2', 'review 3', 'all reviews']



In [5]:

# Enter the path to the tsv file with the rating from the first reviwer
path_tsv = "/Users/camille.brianceau/aramis/reproducibility-reviews/annotations/annotations_elina2.tsv"
df_rating_1 = pd.read_csv(path_tsv, sep = "\t", index_col=False, header= None)
df_rating_1 = df_rating_1.dropna()



In [6]:
# Enter the path to the tsv file with the rating from the second reviwer
path_tsv = "/Users/camille.brianceau/aramis/reproducibility-reviews/annotations/annotations_olivier2.tsv"
df_rating_2 = pd.read_csv(path_tsv, sep = "\t", index_col=False, header= None)
df_rating_2 = df_rating_2.dropna()