In [2]:
import numpy as np
import pandas as pd

In [3]:
path = "../data/appraisal/export_appraisal.csv"
df = pd.read_csv(path)
df.head()

Unnamed: 0,user_id,survey_id,survey_item_index,filename,has_reply,emotion_1,video_id,mix,emotion_1_id,emotion_2_id,intensity_level,reply_dim_Novelty,reply_dim_Pleasantness,reply_dim_Goal conduciveness,reply_dim_Urgency,reply_dim_Power,reply_dim_Norm compatibility
0,default,1d1439eb990b9fbdc99bf899dc7a7f9cb61a9a404e5677...,0,A200_int_v_3.mp4,1,interest_curiosity,A200,0,29,100,3.0,1,4,4,1,3,3
1,default,1d1439eb990b9fbdc99bf899dc7a7f9cb61a9a404e5677...,1,A102_exc_v_3.mp4,1,excitement_expectation,A102,0,36,100,3.0,1,5,5,1,3,5
2,default,1d1439eb990b9fbdc99bf899dc7a7f9cb61a9a404e5677...,2,A102_fea_p_2.mp4,1,fear,A102,0,10,100,2.0,3,1,1,2,2,1
3,default,1d1439eb990b9fbdc99bf899dc7a7f9cb61a9a404e5677...,3,A102_amu_v_3.mp4,1,amusement,A102,0,13,100,3.0,2,4,4,2,3,4
4,default,1d1439eb990b9fbdc99bf899dc7a7f9cb61a9a404e5677...,4,A207_pea_v_2.mp4,1,peacefulness_serenity,A207,0,25,100,2.0,1,2,2,2,2,2


In [5]:
df.columns

Index(['user_id', 'survey_id', 'survey_item_index', 'filename', 'has_reply',
       'emotion_1', 'video_id', 'mix', 'emotion_1_id', 'emotion_2_id',
       'intensity_level', 'reply_dim_Novelty', 'reply_dim_Pleasantness',
       'reply_dim_Goal conduciveness', 'reply_dim_Urgency', 'reply_dim_Power',
       'reply_dim_Norm compatibility'],
      dtype='object')

In [15]:
scales = [
    'reply_dim_Novelty',
    'reply_dim_Pleasantness',
    'reply_dim_Goal conduciveness',
    'reply_dim_Urgency',
    'reply_dim_Power',
    'reply_dim_Norm compatibility'
]

scales_df = df[scales]
scales_df

Unnamed: 0,reply_dim_Novelty,reply_dim_Pleasantness,reply_dim_Goal conduciveness,reply_dim_Urgency,reply_dim_Power,reply_dim_Norm compatibility
0,1,4,4,1,3,3
1,1,5,5,1,3,5
2,3,1,1,2,2,1
3,2,4,4,2,3,4
4,1,2,2,2,2,2
...,...,...,...,...,...,...
787,3,5,5,3,4,5
788,5,1,1,4,4,2
789,4,5,5,4,5,5
790,1,3,2,2,4,3


In [8]:
# Number of items
n_items = len(scales_df.columns)
n_items

6

In [9]:
# Item variances
item_variances = scales_df.var(ddof=1)
item_variances

reply_dim_Novelty               1.737016
reply_dim_Pleasantness          2.185941
reply_dim_Goal conduciveness    2.425385
reply_dim_Urgency               1.774392
reply_dim_Power                 1.635285
reply_dim_Norm compatibility    2.108698
dtype: float64

In [11]:
# Total variances
respondent_totals = scales_df.sum(axis=1)
respondent_totals

0      16
1      20
2      10
3      19
4      11
       ..
787    25
788    17
789    28
790    15
791    20
Length: 792, dtype: int64

In [12]:
total_variance = respondent_totals.var(ddof=1)
total_variance

26.284711846658745

In [13]:
# Cronbach's Alpha
cronbach_alpha = (n_items / (n_items - 1)) * (1 - item_variances.sum() / total_variance)

print(f"Cronbach's Alpha: {cronbach_alpha}")

Cronbach's Alpha: 0.6582379060301083


## Intraclass Correlation 

In [17]:
video_means = df.groupby('filename')[scales[0]].mean()
video_means

filename
A102_amu_v_3.mp4        2.888889
A102_ang_p_3.mp4        2.777778
A102_ang_v_2.mp4        3.333333
A102_anx_p_2.mp4        2.111111
A102_anx_p_3.mp4        2.555556
                          ...   
A327_ele_p_3.mp4        3.000000
A334_reg_p_2.mp4        2.111111
A407_ins_p_2.mp4        3.222222
A424_ele_v_3.mp4        2.666667
A55_gui_v_2_ver1.mp4    1.333333
Name: reply_dim_Novelty, Length: 88, dtype: float64

In [28]:
# Calculate the grand mean across all ratings for this scale
grand_mean = df[scales[0]].mean()

# Compute the squared differences from the grand mean for each video's mean rating
squared_diffs = (video_means - grand_mean) ** 2

# Compute MSB: mean of these squared differences, adjust for degrees of freedom if necessary
MSB = squared_diffs.mean()
MSB

0.6905005107252298

In [29]:
video_means

filename
A102_amu_v_3.mp4        2.888889
A102_ang_p_3.mp4        2.777778
A102_ang_v_2.mp4        3.333333
A102_anx_p_2.mp4        2.111111
A102_anx_p_3.mp4        2.555556
                          ...   
A334_reg_p_2.mp4        2.111111
A407_ins_p_2.mp4        3.222222
A424_ele_v_3.mp4        2.666667
A55_gui_v_2_ver1.mp4    1.333333
grand_mean              2.494949
Name: video_mean, Length: 89, dtype: float64

In [21]:
intermediary_df = df[['filename', scales[0]]].copy()
video_means = intermediary_df.groupby('filename')[scales[0]].mean().rename('video_mean')
intermediary_df = intermediary_df.merge(video_means, on='filename')

intermediary_df['squared_diffs'] = (intermediary_df[scales[0]] - intermediary_df['video_mean']) ** 2

MSE = intermediary_df['squared_diffs'].mean()
MSE

1.0364758698092031

In [25]:
MSB - MSE / (MSB +  (792 -1) * MSE)

0.6970839608781992

In [24]:
df.shape

(792, 17)