In [142]:
import numpy as np
import scipy.stats as st

def calculate_p_value_from_z_score(score):
    p_value = st.t.sf((abs(score)), 100000) * 2
    return min(float(p_value), 0.99999)

#Cohens Kappa fpr two raters
#contingency_table = np.array([[106,10,4],[22,28,10],[2,12,6]])
contingency_table = np.array([[4,4,2],[2,6,0],[0,2,0]])
#contingency_table = np.array([[10,4,1],[6,16,2],[0,3,8]])
#contingency_table2 = np.array([[20,5],[10,15]])
values = [1, 0, 0.4412, 0, 1, 0.67, 0.4412, 0.67, 1]

# Create a 3x3 matrix
matrix = np.array(values).reshape((3, 3))

confidence_level = 0.95

def Cohens_Kappa_2_raters(contingency_table, confidence_level, weights_type='equal-spacing'):
    number_of_levels = contingency_table.shape[1]
    Sample_Size = np.sum(contingency_table)
    Sum_of_coloumns = np.sum(contingency_table, axis=0) / Sample_Size
    Sum_of_rows = np.sum(contingency_table, axis=1) / Sample_Size
    if weights_type == 'equal-spacing':
        Weights_Matrix = 1 - abs(np.subtract.outer(np.arange(1, number_of_levels+1), np.arange(1, number_of_levels+1))) / (number_of_levels-1)
    elif weights_type == 'fleiss':
        Weights_Matrix = 1 - (abs(np.subtract.outer(np.arange(1, number_of_levels+1), np.arange(1, number_of_levels+1))) / (number_of_levels-1))**2


    Percentages_of_Agreement = np.sum(Weights_Matrix * contingency_table)/ Sample_Size
    Percentages_Of_Disagreement = np.sum(Weights_Matrix * np.outer(Sum_of_rows, Sum_of_coloumns))
    Cohens_Kappa = (Percentages_of_Agreement - Percentages_Of_Disagreement) / (1 - Percentages_Of_Disagreement)

    # Calculate the Variance (Fleiss, Cohen & Everrit, 1969)
    Agreement_Matrix = np.eye(contingency_table.shape[1])
    Probabilty_Matrix = (contingency_table)/Sample_Size
    Variance_Matrix = np.subtract((np.subtract(Weights_Matrix.T, (np.dot(Weights_Matrix, np.sum(Probabilty_Matrix, axis=1)) * (1 - Cohens_Kappa))).T), np.dot(Weights_Matrix, np.sum(Probabilty_Matrix, axis=0)) * (1 - Cohens_Kappa)).T
    Standart_Error_Kappa_Fleisse = np.sqrt((np.sum(Probabilty_Matrix * Variance_Matrix**2) - (Cohens_Kappa - Percentages_Of_Disagreement * (1 - Cohens_Kappa))**2) / np.inner(1 - Percentages_Of_Disagreement, 1 - Percentages_Of_Disagreement) / Sample_Size)
    
    # Calculate the Variance of H0
    Outer_Probability_Matrix = np.multiply.outer(Sum_of_coloumns, Sum_of_rows)
    Sum_of_rows = np.sum(np.sum(Probabilty_Matrix,axis = 0) * Weights_Matrix,axis = 1)
    Sum_of_coloumns = np.sum(np.sum(Probabilty_Matrix,axis = 1) * Weights_Matrix,axis = 1)
    Weighted_Variance_Matrix = (Weights_Matrix - np.add.outer(Sum_of_rows,Sum_of_coloumns))**2
    term = np.sum(Outer_Probability_Matrix * Weighted_Variance_Matrix)
    Standard_Error_H0 = np.sqrt((term - Percentages_Of_Disagreement**2) / (Sample_Size* (1-Percentages_Of_Disagreement)**2))

    # Signficance
    StatisticH0 = Cohens_Kappa / Standart_Error_Kappa_Fleisse
    p_valueH0 = calculate_p_value_from_z_score(StatisticH0)
    Statistic_Fleiss = Cohens_Kappa / Standard_Error_H0
    p_value_Fleiss = calculate_p_value_from_z_score(Statistic_Fleiss)

    # Confidence Interval
    zcrit = st.t.ppf(1 - (1 - confidence_level) / 2, 100000)
    Lower_Confidence_Interval_Kappa = Cohens_Kappa - Standart_Error_Kappa_Fleisse*zcrit
    Upper_Confidence_Interval_Kappa = Cohens_Kappa + Standart_Error_Kappa_Fleisse*zcrit
    Lower_Confidence_Interval_Kappa_H0 = Cohens_Kappa - Standard_Error_H0*zcrit
    Upper_Confidence_Interval_Kappa_H0 = Cohens_Kappa + Standard_Error_H0*zcrit

    results = {}

    results["Cohens Kappa"]= Cohens_Kappa
    results["p_value"] = p_valueH0
    results["Statistic"]= StatisticH0
    results["Standard_Error_H0"]= Standard_Error_H0
    results["Standart_Error_Fleisse, Cohen & Everett"] = Standart_Error_Kappa_Fleisse
    results["p_value (Fleisse, Cohen & Everett)"] = p_value_Fleiss
    results["Statistic (Fleisse, Cohen & Everett)"]= Statistic_Fleiss
    results["Confidence Intervals Kappa"] = f"({round(Lower_Confidence_Interval_Kappa, 4)}, {round(Upper_Confidence_Interval_Kappa, 4)})"
    results["Confidence Intervals Kappa_H0"] = f"({round(Lower_Confidence_Interval_Kappa_H0, 4)}, {round(Upper_Confidence_Interval_Kappa_H0, 4)})"

    result_str = "\n".join([f"{key}: {value}" for key, value in results.items()])
    return result_str

a = Cohens_Kappa_2_raters(contingency_table, confidence_level)
#b = Cohens_Kappa_2_raters(contingency_table2, confidence_level)

print(a)


Cohens Kappa: 0.1176470588235292
p_value: 0.4518653329790838
Statistic: 0.7523118184738425
Standard_Error_H0: 0.24289203320073627
Standart_Error_Fleisse, Cohen & Everett: 0.15638071333531725
p_value (Fleisse, Cohen & Everett): 0.6281318310112067
Statistic (Fleisse, Cohen & Everett): 0.4843594796964818
Confidence Intervals Kappa: (-0.1889, 0.4242)
Confidence Intervals Kappa_H0: (-0.3584, 0.5937)
