In [47]:
import pandas as pd
import statsmodels.api as sm
import numpy as np
from pymer4.models import Lmer
from scipy.stats import f

data = np.array([
    9.30, 9.70, 8.90,
    8.90, 8.80, 8.10,
    8.00, 8.10, 7.30,
    9.10, 9.00, 8.20,
    9.10, 9.20, 8.30,
    8.90, 9.00, 7.70,
    8.30, 8.70, 8.10,
    9.30, 9.40, 8.20,
    9.40, 9.80, 9.40
]).reshape((9, 3))

confidence_level = 0.95
# Convert to data frame

def ICC(data, confidence_level):
    df = pd.DataFrame(data, columns=[1, 2, 3])
    alpha = 1 - confidence_level 
    Sample_Size = df.shape[0]
    Number_of_Raters = df.shape[1]
    Data = df.stack().reset_index()
    Data.columns = ["ID", "Raters", "Values"]
    Model = Lmer('Values ~ 1 + (1 | ID) + (1 | Raters)', data=Data)
    Linear_Mixed_model_Random = Model.fit(summary=False)
    Variance_Matrix = Model.ranef_var

    MS_id = Variance_Matrix.iloc[0, 1]
    MS_items = Variance_Matrix.iloc[1, 1]
    MSresiduals = Variance_Matrix.iloc[2, 1]
    MsError = MS_resid = MSresiduals

    MsSubjects = Number_of_Raters * MS_id + MsError
    MsRaters = Sample_Size * MS_items + MsError
    MsWithin = MsError + MS_items

    F_one_way = MsSubjects / MsWithin
    F_two_way = MsSubjects / MsError
    F_raters = MsRaters / MsError


    # Model 1 - Testing Agreement - One-Way-random (Each subject is being Judged by different set of random raters that were chosen from the population) 
    One_Way_Random_Agreement_Single = (MsSubjects - MsWithin) / (MsSubjects + (Number_of_Raters - 1) * MsWithin)
    One_Way_Random_Agreement_Average = (MsSubjects - MsWithin) / MsSubjects
    df1_one_way = Sample_Size - 1
    df2_one_way = Sample_Size * (Number_of_Raters - 1)
    p_value_one_way  = f.sf(F_one_way, df1_one_way, df2_one_way)
    Lower_One_Way_Random_Single = ((F_one_way / f.ppf(1 - alpha / 2, df1_one_way, df2_one_way)) - 1) / ((F_one_way / f.ppf(1 - alpha / 2, df1_one_way, df2_one_way)) + (Number_of_Raters - 1))
    Upper_One_Way_Random_Single = ((F_one_way * f.ppf(1 - alpha / 2, df2_one_way, df1_one_way)) - 1) / ((F_one_way * f.ppf(1 - alpha / 2, df2_one_way, df1_one_way)) + Number_of_Raters - 1)
    Lower_One_Way_Random_Average = 1 - 1 / (F_one_way / f.ppf(1 - alpha / 2, df1_one_way, df2_one_way))
    Upper_One_Way_Random_Average = 1 - 1 / (F_one_way * f.ppf(1 - alpha / 2, df2_one_way, df1_one_way))

    # Model 2 - Testing Agreement - Two-Way-random (Same set of random judges from the popoulation)
    Two_Way_Random_Single = (MsSubjects - MsError) / (MsSubjects + (Number_of_Raters - 1) * MsError + Number_of_Raters * (MsRaters - MsError) / Sample_Size)
    Two_Way_Random_Average = (MsSubjects - MsError) / (MsSubjects + (MsRaters - MsError) / Sample_Size)
    df1_two_way = Sample_Size - 1
    df2_two_way = (Sample_Size - 1) * (Number_of_Raters - 1)
    p_value_two_way = f.sf(F_two_way, df1_two_way, df2_two_way)

    vn = (Number_of_Raters - 1) * (Sample_Size - 1) * ((Number_of_Raters * Two_Way_Random_Single * F_raters + Sample_Size * (1 + (Number_of_Raters - 1) * Two_Way_Random_Single) - Number_of_Raters * Two_Way_Random_Single))**2
    vd = (Sample_Size - 1) * Number_of_Raters**2 * Two_Way_Random_Single**2 * F_raters**2 + (Sample_Size * (1 + (Number_of_Raters - 1) * Two_Way_Random_Single) - Number_of_Raters * Two_Way_Random_Single)**2
    Lower_Two_Way_Random_Single = Sample_Size * (MsSubjects - (f.ppf(1 - alpha / 2, Sample_Size - 1, ( vn / vd))) * MsError) / ((f.ppf(1 - alpha / 2, Sample_Size - 1, ( vn / vd))) * (Number_of_Raters * MsRaters + (Number_of_Raters * Sample_Size - Number_of_Raters - Sample_Size) * MsError) + Sample_Size * MsSubjects)
    Upper_Two_Way_Random_Single = Sample_Size * ((f.ppf(1 - alpha / 2, (vn/vd), Sample_Size - 1)) * MsSubjects - MsError) / (Number_of_Raters * MsRaters + (Number_of_Raters * Sample_Size - Number_of_Raters - Sample_Size) * MsError + Sample_Size * (f.ppf(1 - alpha / 2, (vn/vd), Sample_Size - 1)) * MsSubjects)
    Lower_Two_Way_Random_Average = Lower_Two_Way_Random_Single * Number_of_Raters / (1 + Lower_Two_Way_Random_Single * (Number_of_Raters - 1))
    Upper_Two_Way_Random_Average = Upper_Two_Way_Random_Single * Number_of_Raters / (1 + Upper_Two_Way_Random_Single * (Number_of_Raters - 1))

    # Model 3 - Testing Consistency - Two-Way-mixed effect (Same set of Specific Judges)
    Two_Way_Fixed_Single = (MsSubjects - MsError) / (MsSubjects + (Number_of_Raters - 1) * MsError)
    Two_Way_Fixed_Average = (MsSubjects - MsError) / MsSubjects
    Lower_Two_Way_Fixed_Single = ((F_two_way / f.ppf(1 - alpha / 2, df1_two_way, df2_two_way)) - 1) / ((F_two_way / f.ppf(1 - alpha / 2, df1_two_way, df2_two_way)) + Number_of_Raters - 1)
    Upper_Two_Way_Fixed_Single = ((F_two_way * f.ppf(1 - alpha / 2, df2_two_way, df1_two_way)) - 1) / ((F_two_way * f.ppf(1 - alpha / 2, df2_two_way, df1_two_way)) + Number_of_Raters - 1)
    Lower_Two_Way_Fixed_Average = 1 - 1 / (F_two_way / f.ppf(1 - alpha / 2, df1_two_way, df2_two_way))
    Upper_Two_Way_Fixed_Average = 1 - 1 / (F_two_way * f.ppf(1 - alpha / 2, df2_two_way, df1_two_way))

    # Initialize results DataFrame
    results = np.full((6, 8), np.nan)
    results = pd.DataFrame(results, columns=["type", "ICC", "F", "df1", "df2", "p", "lower", "upper"])
    results.iloc[:, 0] = ["ICC1", "ICC2", "ICC3", "ICC1k", "ICC2k", "ICC3k"]
    results.iloc[:, 1] = [One_Way_Random_Agreement_Single, Two_Way_Random_Single, Two_Way_Fixed_Single, One_Way_Random_Agreement_Average, Two_Way_Random_Average, Two_Way_Fixed_Average]
    results.iloc[:, 2] = [F_one_way, F_two_way, F_two_way, F_one_way, F_two_way, F_two_way]
    results.iloc[:, 3] = [df1_one_way, df1_two_way, df1_two_way, df1_one_way, df1_two_way, df1_two_way]
    results.iloc[:, 4] = [df2_one_way, df2_two_way, df2_two_way, df2_one_way, df2_two_way, df2_two_way]
    results.iloc[:, 5] = [p_value_one_way, p_value_two_way, p_value_two_way, p_value_one_way, p_value_two_way, p_value_two_way]

    results.iloc[0, 6] = Lower_One_Way_Random_Single
    results.iloc[1, 6] = Lower_Two_Way_Random_Single
    results.iloc[2, 6] = Lower_Two_Way_Fixed_Single
    results.iloc[3, 6] = Lower_One_Way_Random_Average
    results.iloc[4, 6] = Lower_Two_Way_Random_Average
    results.iloc[5, 6] = Lower_Two_Way_Fixed_Average
    results.iloc[0, 7] = Upper_One_Way_Random_Single
    results.iloc[1, 7] = Upper_Two_Way_Random_Single
    results.iloc[2, 7] = Upper_Two_Way_Fixed_Single
    results.iloc[3, 7] = Upper_One_Way_Random_Average
    results.iloc[4, 7] = Upper_Two_Way_Random_Average
    results.iloc[5, 7] = Upper_Two_Way_Fixed_Average

    results = {"results": results}
    return results 

ICC(data, 0.95)



{'results':     type       ICC          F  df1  df2         p     lower     upper
 0   ICC1  0.434527   3.305295    8   18  0.016728  0.032206  0.804341
 1   ICC2  0.508940  16.857998    8   16  0.000002  0.023566  0.852259
 2   ICC3  0.840916  16.857998    8   16  0.000002  0.594313  0.957576
 3  ICC1k  0.697455   3.305295    8   18  0.016728  0.090771  0.924997
 4  ICC2k  0.756645  16.857998    8   16  0.000002  0.067516  0.945373
 5  ICC3k  0.940681  16.857998    8   16  0.000002  0.814639  0.985447}