In [None]:
import pandas as pd
import numpy as np
from statsmodels.stats.inter_rater import fleiss_kappa
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import cohen_kappa_score
from itertools import combinations

In [None]:
sheet_id = "1sQEGYhR2XTuOIeDERvie_BJxmxRxsQMA88bDS6uPxlM"
sheet_name = "Summary"

csv_url = f"https://docs.google.com/spreadsheets/d/{sheet_id}/gviz/tq?tqx=out:csv&sheet={sheet_name}"

df = pd.read_csv(csv_url)
df = df[['meeting', 'participant','video_clip','DucManh', 'Charlotte', 'Ducanh', 'Money']]

    meeting participant        video_clip                  DucManh  \
0  20210323       SP01F  clip_732_737.mp4        Active engagement   
1  20210323       SP01F  clip_747_752.mp4        Active engagement   
2  20210323       SP01F  clip_837_842.mp4  Intermittent engagement   
3  20210323       SP01F  clip_846_851.mp4        Active engagement   
4  20210323       SP01F  clip_849_854.mp4        Active engagement   

           Charlotte                   Ducanh                    Money  
0  Active engagement        Active engagement  Intermittent engagement  
1  Active engagement        Active engagement        Active engagement  
2  Active engagement        Active engagement  Intermittent engagement  
3  Active engagement  Intermittent engagement            Disengagement  
4  Active engagement  Intermittent engagement                  unclear  


In [None]:
def compute_fleiss_kappa(df,cols):
    labels_df = df[cols].dropna()
    all_labels = pd.unique(labels_df.values.ravel())
    label_to_id = {label: (3-i) for i, label in enumerate(all_labels)}

    video_num = labels_df.shape[0]
    label_num = len(label_to_id)

    count_matrix = np.zeros((video_num, label_num), dtype=int)

    for i, row in enumerate(labels_df.itertuples(index=False)):
        for label in row:
            label_idx = label_to_id[label]
            count_matrix[i, label_idx] += 1

    score = fleiss_kappa(count_matrix)

    return score

In [None]:
def compute_agreement_dist(df, cols):
    label_df = df[['video_clip'] + cols].dropna()
    label_dist = []

    for i, row in label_df.iterrows():
        video = row['video_clip']
    labels = row[cols].tolist()
    counts = pd.Series(labels).value_counts()

    for label, count in counts.items():
        label_dist.append({'video_clip': video, 'label': label, 'count': count})

    dist_df = pd.DataFrame(label_dist)

    agreement = (
    label_df[cols]
    .apply(lambda row: row.value_counts(normalize=True).max(), axis=1)
    .reset_index(drop=True)
    )
    label_df['agreement'] = agreement
    agreement_dist = label_df['agreement'].round(2).value_counts().sort_index()

    return agreement_dist

Fleiss‘s Kappa score(for multiple persons) \
https://en.wikipedia.org/wiki/Fleiss%27_kappa \
< 0 Poor agreement   \
0.01 – 0.20 Slight agreement    \
0.21 – 0.40 Fair agreement \
0.41 – 0.60 Moderate agreement \
0.61 – 0.80 Substantial agreement \
0.81 – 1.00	Almost perfect agreement

In [None]:
label_cols = ['DucManh', 'Charlotte', 'Ducanh', 'Money']
score = compute_fleiss_kappa(df,label_cols)
print(f"Fleiss' Kappa score: {score:.3f}\n")

agreement_dist = compute_agreement_dist(df,label_cols)
print(f"The agreement distribution between{label_cols}is:\n{agreement_dist}")

Fleiss' Kappa score: 0.277

The agreement distribution between['DucManh', 'Charlotte', 'Ducanh', 'Money']is:
agreement
0.50    46
0.75    32
1.00    29
Name: count, dtype: int64


In [None]:
#agreement between pairs
pair_df = df[label_cols].dropna()
pairs = list(combinations(label_cols, 2))

for user1, user2 in pairs:
    kappa = cohen_kappa_score(pair_df[user1], pair_df[user2])
    print(f"Cohen's Kappa between {user1} and {user2}: {kappa:.3f}")

Cohen's Kappa between DucManh and Charlotte: 0.454
Cohen's Kappa between DucManh and Ducanh: 0.385
Cohen's Kappa between DucManh and Money: 0.103
Cohen's Kappa between Charlotte and Ducanh: 0.360
Cohen's Kappa between Charlotte and Money: 0.163
Cohen's Kappa between Ducanh and Money: 0.231


Cohen's Kappa interpretation
>0.8	Almost Perfect\
>0.6	Substantial \
>0.4	Moderate \
>0.2	Fair \
0-0,2	Slight \
<0	Poor \

In [None]:
all_dist = {
    user: pair_df[user].value_counts(normalize=True)
    for user in label_cols
}
label_dist_df = pd.DataFrame(all_dist).fillna(0)  # 缺失的标签填 0
label_dist_df = label_dist_df.sort_index()

print("label distribution:")
print(label_dist_df)

label distribution:
                          DucManh  Charlotte    Ducanh     Money
Active engagement        0.495327   0.532710  0.392523  0.355140
Disengagement            0.177570   0.121495  0.271028  0.224299
Intermittent engagement  0.317757   0.308411  0.327103  0.401869
unclear                  0.009346   0.037383  0.009346  0.018692


In [None]:
# excluding 'Money'
label_cols_3 = ['DucManh', 'Charlotte', 'Ducanh']
score_3 = compute_fleiss_kappa(df, label_cols_3)
print(f"Fleiss' Kappa score(excluding Money): {score_3:.3f}")
agreement_dist = compute_agreement_dist(df,label_cols_3)
print(f"The agreement distribution between{label_cols_3} is:\n{agreement_dist}")

Fleiss' Kappa score(excluding Money): 0.395
The agreement distribution between['DucManh', 'Charlotte', 'Ducanh'] is:
agreement
0.33     4
0.67    56
1.00    47
Name: count, dtype: int64


In [None]:
label_df_3 = df[['meeting']+['participant']+['video_clip'] + label_cols_3].dropna()

label_df_3['agreement'] = label_df_3[label_cols_3].apply(
    lambda row: row.value_counts(normalize=True).max(), axis=1
)

low_agreement_df = label_df_3[label_df_3['agreement'] == 1/3]

print("total disagreement clip")
print(low_agreement_df[['meeting']+['participant']+['video_clip'] + label_cols_3 + ['agreement']])

total disagreement clip
     meeting participant          video_clip                  DucManh  \
33  20210504       SP09M    clip_282_287.mp4        Active engagement   
36  20210504       SP09M    clip_522_527.mp4        Active engagement   
68  20210616       SP02F    clip_849_854.mp4        Active engagement   
95  20220204       SP03M  clip_2751_2756.mp4  Intermittent engagement   

                  Charlotte         Ducanh  agreement  
33  Intermittent engagement  Disengagement   0.333333  
36  Intermittent engagement  Disengagement   0.333333  
68  Intermittent engagement  Disengagement   0.333333  
95                  unclear  Disengagement   0.333333  


In [None]:
def majority_vote(row):
    return row.value_counts().idxmax()

def vote_agreement_level(row):
    counts = row.value_counts(normalize=True)
    max_vote_ratio = counts.max()
    if max_vote_ratio == 1.0:
        return 'full'
    elif max_vote_ratio >= 2/3:
        return 'majority'
    else:
        return 'disagree'

label_df_3['majority_label_3'] = label_df_3[label_cols_3].apply(majority_vote, axis=1)
label_df_3['vote_agreement'] = label_df_3[label_cols_3].apply(vote_agreement_level, axis=1)

print(label_df_3[['meeting', 'participant', 'video_clip', 'majority_label_3', 'vote_agreement']].head(20))

     meeting participant          video_clip         majority_label_3  \
0   20210323       SP01F    clip_732_737.mp4        Active engagement   
1   20210323       SP01F    clip_747_752.mp4        Active engagement   
2   20210323       SP01F    clip_837_842.mp4        Active engagement   
3   20210323       SP01F    clip_846_851.mp4        Active engagement   
4   20210323       SP01F    clip_849_854.mp4        Active engagement   
5   20210323       SP01F    clip_852_857.mp4        Active engagement   
6   20210323       SP01F    clip_948_953.mp4            Disengagement   
7   20210323       SP01F    clip_957_962.mp4  Intermittent engagement   
8   20210323       SP01F    clip_966_971.mp4            Disengagement   
9   20210323       SP01F  clip_1248_1253.mp4        Active engagement   
10  20210323       SP06M    clip_417_422.mp4        Active engagement   
11  20210323       SP06M    clip_426_431.mp4        Active engagement   
12  20210323       SP06M    clip_438_443.mp4  Inter