Intraclass correlation and cronbachs alpha are very related... 

In [1]:
import numpy as np
import pandas as pd
import pingouin as pg


In [2]:
path = "../data/appraisal/export_appraisal.csv"
df = pd.read_csv(path)
df.head()

Unnamed: 0,user_id,survey_id,survey_item_index,filename,has_reply,emotion_1,video_id,mix,emotion_1_id,emotion_2_id,intensity_level,reply_dim_Novelty,reply_dim_Pleasantness,reply_dim_Goal conduciveness,reply_dim_Urgency,reply_dim_Power,reply_dim_Norm compatibility
0,default,1d1439eb990b9fbdc99bf899dc7a7f9cb61a9a404e5677...,0,A200_int_v_3.mp4,1,interest_curiosity,A200,0,29,100,3.0,1,4,4,1,3,3
1,default,1d1439eb990b9fbdc99bf899dc7a7f9cb61a9a404e5677...,1,A102_exc_v_3.mp4,1,excitement_expectation,A102,0,36,100,3.0,1,5,5,1,3,5
2,default,1d1439eb990b9fbdc99bf899dc7a7f9cb61a9a404e5677...,2,A102_fea_p_2.mp4,1,fear,A102,0,10,100,2.0,3,1,1,2,2,1
3,default,1d1439eb990b9fbdc99bf899dc7a7f9cb61a9a404e5677...,3,A102_amu_v_3.mp4,1,amusement,A102,0,13,100,3.0,2,4,4,2,3,4
4,default,1d1439eb990b9fbdc99bf899dc7a7f9cb61a9a404e5677...,4,A207_pea_v_2.mp4,1,peacefulness_serenity,A207,0,25,100,2.0,1,2,2,2,2,2


In [3]:
df.columns

Index(['user_id', 'survey_id', 'survey_item_index', 'filename', 'has_reply',
       'emotion_1', 'video_id', 'mix', 'emotion_1_id', 'emotion_2_id',
       'intensity_level', 'reply_dim_Novelty', 'reply_dim_Pleasantness',
       'reply_dim_Goal conduciveness', 'reply_dim_Urgency', 'reply_dim_Power',
       'reply_dim_Norm compatibility'],
      dtype='object')

In [4]:
scales = [
    'reply_dim_Novelty',
    'reply_dim_Pleasantness',
    'reply_dim_Goal conduciveness',
    'reply_dim_Urgency',
    'reply_dim_Power',
    'reply_dim_Norm compatibility'
]

scales_df = df[scales]
scales_df

Unnamed: 0,reply_dim_Novelty,reply_dim_Pleasantness,reply_dim_Goal conduciveness,reply_dim_Urgency,reply_dim_Power,reply_dim_Norm compatibility
0,1,4,4,1,3,3
1,1,5,5,1,3,5
2,3,1,1,2,2,1
3,2,4,4,2,3,4
4,1,2,2,2,2,2
...,...,...,...,...,...,...
787,3,5,5,3,4,5
788,5,1,1,4,4,2
789,4,5,5,4,5,5
790,1,3,2,2,4,3


In [5]:
# Number of items
n_items = len(scales_df.columns)
n_items

6

In [6]:
# Item variances
item_variances = scales_df.var(ddof=1)
item_variances

reply_dim_Novelty               1.737016
reply_dim_Pleasantness          2.185941
reply_dim_Goal conduciveness    2.425385
reply_dim_Urgency               1.774392
reply_dim_Power                 1.635285
reply_dim_Norm compatibility    2.108698
dtype: float64

In [7]:
# Total variances
respondent_totals = scales_df.sum(axis=1)
respondent_totals

0      16
1      20
2      10
3      19
4      11
       ..
787    25
788    17
789    28
790    15
791    20
Length: 792, dtype: int64

In [8]:
total_variance = respondent_totals.var(ddof=1)
total_variance

26.284711846658745

In [9]:
# Cronbach's Alpha
cronbach_alpha = (n_items / (n_items - 1)) * (1 - item_variances.sum() / total_variance)

print(f"Cronbach's Alpha: {cronbach_alpha}")

Cronbach's Alpha: 0.6582379060301083


## Intraclass Correlation 

In [10]:
video_means = df.groupby('filename')[scales[0]].mean()
video_means

filename
A102_amu_v_3.mp4        2.888889
A102_ang_p_3.mp4        2.777778
A102_ang_v_2.mp4        3.333333
A102_anx_p_2.mp4        2.111111
A102_anx_p_3.mp4        2.555556
                          ...   
A327_ele_p_3.mp4        3.000000
A334_reg_p_2.mp4        2.111111
A407_ins_p_2.mp4        3.222222
A424_ele_v_3.mp4        2.666667
A55_gui_v_2_ver1.mp4    1.333333
Name: reply_dim_Novelty, Length: 88, dtype: float64

In [11]:
grand_mean = df[scales[0]].mean()
grand_mean

2.494949494949495

In [12]:
# Calculate the grand mean across all ratings for this scale
#TODO: THIS grand mean should not capture different items that portray the same emotions, that skews the squared diffs calculation

grand_mean = df[scales[0]].mean()

# Compute the squared differences from the grand mean for each video's mean rating
squared_diffs = (video_means - grand_mean) ** 2

# Compute MSB: mean of these squared differences, adjust for degrees of freedom if necessary
MSB = squared_diffs.mean()
MSB

0.6983471074380164

In [13]:
video_means

filename
A102_amu_v_3.mp4        2.888889
A102_ang_p_3.mp4        2.777778
A102_ang_v_2.mp4        3.333333
A102_anx_p_2.mp4        2.111111
A102_anx_p_3.mp4        2.555556
                          ...   
A327_ele_p_3.mp4        3.000000
A334_reg_p_2.mp4        2.111111
A407_ins_p_2.mp4        3.222222
A424_ele_v_3.mp4        2.666667
A55_gui_v_2_ver1.mp4    1.333333
Name: reply_dim_Novelty, Length: 88, dtype: float64

In [14]:
intermediary_df = df[['filename', scales[0]]].copy()
video_means = intermediary_df.groupby('filename')[scales[0]].mean().rename('video_mean')
intermediary_df = intermediary_df.merge(video_means, on='filename')

intermediary_df['squared_diffs'] = (intermediary_df[scales[0]] - intermediary_df['video_mean']) ** 2

MSE = intermediary_df['squared_diffs'].mean()
MSE

1.0364758698092031

In [15]:
MSB - MSE / (MSB +  (792 -1) * MSE)

0.6970839608781992

In [16]:
df.shape

(792, 17)

In [17]:
F = MSB / MSE
F

0.6737707338681891

In [18]:
from scipy.stats import f

df_between = 87  # 88 videos - 1
df_within = 792  # 880 observations - 88 videos
F_score = MSB / MSE

# Get the p-value
p_value = f.sf(F_score, df_between, df_within)

print("F-Score:", F_score)
print("P-Value:", p_value)

F-Score: 0.6737707338681891
P-Value: 0.9892974665455863


In [27]:
results = pg.intraclass_corr(data=df, targets='emotion_1', raters='survey_id', ratings='reply_dim_Novelty')
results

Unnamed: 0,Type,Description,ICC,F,df1,df2,pval,CI95%
0,ICC1,Single raters absolute,0.394606,6.866357,43,352,1.071705e-26,"[0.28, 0.53]"
1,ICC2,Single random raters,0.408699,10.634199,43,344,1.596623e-41,"[0.27, 0.56]"
2,ICC3,Single fixed raters,0.517017,10.634199,43,344,1.596623e-41,"[0.4, 0.65]"
3,ICC1k,Average raters absolute,0.854362,6.866357,43,352,1.071705e-26,"[0.78, 0.91]"
4,ICC2k,Average random raters,0.861509,10.634199,43,344,1.596623e-41,"[0.77, 0.92]"
5,ICC3k,Average fixed raters,0.905964,10.634199,43,344,1.596623e-41,"[0.86, 0.94]"


In [21]:
results = pg.intraclass_corr(data=df, targets='filename', raters='survey_id', ratings='reply_dim_Novelty')

In [22]:
print(results)

    Type              Description       ICC         F  df1  df2          pval  \
0   ICC1   Single raters absolute  0.330961  5.452122   87  704  8.133354e-39   
1   ICC2     Single random raters  0.342983  7.232364   87  696  1.410710e-54   
2   ICC3      Single fixed raters  0.409153  7.232364   87  696  1.410710e-54   
3  ICC1k  Average raters absolute  0.816585  5.452122   87  704  8.133354e-39   
4  ICC2k    Average random raters  0.824508  7.232364   87  696  1.410710e-54   
5  ICC3k     Average fixed raters  0.861733  7.232364   87  696  1.410710e-54   

          CI95%  
0  [0.25, 0.42]  
1  [0.25, 0.45]  
2   [0.33, 0.5]  
3  [0.75, 0.87]  
4  [0.75, 0.88]  
5   [0.81, 0.9]  


In [25]:
df

Unnamed: 0,user_id,survey_id,survey_item_index,filename,has_reply,emotion_1,video_id,mix,emotion_1_id,emotion_2_id,intensity_level,reply_dim_Novelty,reply_dim_Pleasantness,reply_dim_Goal conduciveness,reply_dim_Urgency,reply_dim_Power,reply_dim_Norm compatibility
0,default,1d1439eb990b9fbdc99bf899dc7a7f9cb61a9a404e5677...,0,A200_int_v_3.mp4,1,interest_curiosity,A200,0,29,100,3.0,1,4,4,1,3,3
1,default,1d1439eb990b9fbdc99bf899dc7a7f9cb61a9a404e5677...,1,A102_exc_v_3.mp4,1,excitement_expectation,A102,0,36,100,3.0,1,5,5,1,3,5
2,default,1d1439eb990b9fbdc99bf899dc7a7f9cb61a9a404e5677...,2,A102_fea_p_2.mp4,1,fear,A102,0,10,100,2.0,3,1,1,2,2,1
3,default,1d1439eb990b9fbdc99bf899dc7a7f9cb61a9a404e5677...,3,A102_amu_v_3.mp4,1,amusement,A102,0,13,100,3.0,2,4,4,2,3,4
4,default,1d1439eb990b9fbdc99bf899dc7a7f9cb61a9a404e5677...,4,A207_pea_v_2.mp4,1,peacefulness_serenity,A207,0,25,100,2.0,1,2,2,2,2,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
787,default,ec2e6b609b727ed8df127dad7dae6bae61a680f51ee1a0...,83,A102_ple_v_2.mp4,1,sensory_pleasure,A102,0,23,100,2.0,3,5,5,3,4,5
788,default,ec2e6b609b727ed8df127dad7dae6bae61a680f51ee1a0...,84,A102_dist_p_3.mp4,1,distress_pain,A102,0,17,100,3.0,5,1,1,4,4,2
789,default,ec2e6b609b727ed8df127dad7dae6bae61a680f51ee1a0...,85,A227_pri_p_2.mp4,1,pride,A227,0,41,100,2.0,4,5,5,4,5,5
790,default,ec2e6b609b727ed8df127dad7dae6bae61a680f51ee1a0...,86,A102_sex_p_2.mp4,1,sexual_lust,A102,0,24,100,2.0,1,3,2,2,4,3


In [30]:
import statsmodels.api as sm
from statsmodels.formula.api import ols

model = ols('reply_dim_Novelty ~ C(filename) + C(survey_id)', data=df).fit()
anova_table = sm.stats.anova_lm(model, typ=2)
anova_table

Unnamed: 0,sum_sq,df,F,PR(>F)
C(filename),553.090909,87.0,7.232364,1.41071e-54
C(survey_id),209.093434,8.0,29.734004,4.479147e-40
Residual,611.795455,696.0,,


In [26]:
df["survey_id"].unique()

array(['1d1439eb990b9fbdc99bf899dc7a7f9cb61a9a404e5677631723cd56788b2d4a',
       '4512b9c28e499053d7517c9c05291332846f82c3a969fb54978fc8966861824f',
       '67921eb65c423aab2abf37e16661e6338b2cb4f500b1bad84bf9052b065d4b80',
       '7ef007a5f86ee9eb7037e0c4f80ad9610f7a26b4ea7118b781ecfdc00596ef3a',
       '86395a140e86308a1eeddb80a97aa009b3fe89f1099adb70a7aa24ec85610342',
       '8d874a3f1a6a3cd5b31669581a06e6b6978e7ed7a9653e857d649ba33e051971',
       '938d071cd06babc93eab6d597625c295c9d362cfabbf9fc2901ca49cad26c66a',
       'ce2ad4e108a5e6956a51691f70d37bb55e1ebb0883b96bcafaf27082a4bd3e09',
       'ec2e6b609b727ed8df127dad7dae6bae61a680f51ee1a0014cac58c72a2e663d'],
      dtype=object)

ICC2k ska motsvara inverterad cronbachs alpha, petri rekommenderar denna. 