In [1]:
import os
import glob
import random
import pandas as pd
import krippendorff
import numpy as np
from sklearn.metrics import accuracy_score, confusion_matrix, f1_score, classification_report

In [13]:
directory = "../data/episodes"

### Is this discussing race?

In [22]:
race_sample = pd.read_csv('../data/annotation/race_sample.csv')
race_sample_1 = pd.read_csv('../data/annotation/race_sample_1.csv')
race_sample_2 = pd.read_csv('../data/annotation/race_sample_2.csv')
race_sample_3 = pd.read_csv('../data/annotation/race_sample_3.csv')

In [23]:
df_race = race_sample[['sentence', 'race']].copy()
df_race['race'] = df_race['race'].astype(int)
df_race['annotation_1'] = race_sample_1['annotation'].values
df_race['annotation_2'] = race_sample_2['annotation'].values
df_race['annotation_3'] = race_sample_3['annotation'].values
df_race['majority'] = df_race[['annotation_1', 'annotation_2', 'annotation_3']].mode(axis=1)[0]
df_race.head()

Unnamed: 0,sentence,race,annotation_1,annotation_2,annotation_3,majority
0,America is the only country.,0,0,0,0,0
1,"Now remember, when you're done today, please sign up for part five and also remember to check out all of our audio guides on the Through the Word app and website.",0,0,0,0,0
2,"And I heard a loud voice saying in heaven, now is come salvation and strength, and the kingdom of our God and the power of his Christ.",0,0,0,0,0
3,"It's actually the white supremacist that we're running it to get all these young people to fall, but basically fall in direction and to go loot and mass. - Yeah, you know, there was a cop shot here Saturday night like if I circus circus and apparently the guy that shot him didn't have anything to do with the situation, there was a couple of the cop was, I think, struggling with somebody and the guy walked by and decided to get an opportunity and just shot the cop.",1,1,1,1,1
4,"And that's what I feel like collectively at a high level, that's what that's a big thing that the black community is missing.",1,1,0,1,1


In [24]:
df_race_krip = df_race[['annotation_1', 'annotation_2', 'annotation_3']].to_numpy()
df_race_krip = np.transpose(df_race_krip)
alpha = krippendorff.alpha(reliability_data=df_race_krip, level_of_measurement='nominal')
print(f"Krippendorff's alpha: {alpha:.4f}")

Krippendorff's alpha: 0.7737


In [25]:
accuracy = accuracy_score(df_race['majority'], df_race['race'])
f1 = f1_score(df_race['majority'], df_race['race'])
tn, fp, fn, tp = confusion_matrix(df_race['majority'], df_race['race']).ravel()
print(f'Accuracy: {accuracy:.4f}')
print(f'F1 Score: {f1:.4f}')
print(f'True Positives (TP): {tp}')
print(f'True Negatives (TN): {tn}')
print(f'False Positives (FP): {fp}')
print(f'False Negatives (FN): {fn}')

Accuracy: 0.8100
F1 Score: 0.7654
True Positives (TP): 62
True Negatives (TN): 100
False Positives (FP): 38
False Negatives (FN): 0


### What level of collective action is it?

In [26]:
multi_sample = pd.read_csv('../data/annotation/multi_sample.csv')
multi_sample_1 = pd.read_csv('../data/annotation/multi_sample_1.csv')
multi_sample_2 = pd.read_csv('../data/annotation/multi_sample_2.csv')
multi_sample_3 = pd.read_csv('../data/annotation/multi_sample_3.csv')

In [None]:
df_multi = multi_sample[['sentence', 'collectiveActionLevel']].copy()
df_multi['annotation_1'] = multi_sample_1['annotation'].values
df_multi['annotation_2'] = multi_sample_2['annotation'].values
df_multi['annotation_3'] = multi_sample_3['annotation'].values
df_multi[['annotation_1', 'annotation_2', 'annotation_3']] = df_multi[['annotation_1', 'annotation_2', 'annotation_3']].fillna('no_annotation')

modes = df_multi[['annotation_1', 'annotation_2', 'annotation_3']].mode(axis=1)
df_multi['majority'] = modes.apply(lambda x: x[0] if len(x.dropna()) == 1 else 'no majority', axis=1)
df_multi.head()

Unnamed: 0,sentence,collectiveActionLevel,annotation_1,annotation_2,annotation_3,majority
0,"I also wanted to take our anger, our frustration, our sadness.",Call-to-action,Execution,no_annotation,no_annotation,no_annotation
1,"A ban on police choke holds, for example, and the talks are still going on for the bigger asks, namely slashing the Seattle police department's budget and redirecting funds to health and social services.",Intention,Intention,Problem-solution,no_annotation,no majority
2,"I mean, it's so ingrained into, you know, just society and, you know, it's been represented historically, like, throughout, you know, throughout time, like with us and, you know, with American Indians and Alaska Natives as well, like in policies and systemic things that have, you know, really hurt them and hurt their development, like systematically, like things that were put into law that really hurt them as well.",Problem-solution,Problem-solution,Problem-solution,Problem-solution,Problem-solution
3,"They do themselves, they do wear clothes, they do not do this, so we are going there to civilise them, which is not true.",Problem-solution,Problem-solution,Problem-solution,no_annotation,Problem-solution
4,Justice has to be for everyone.,Intention,Problem-solution,no_annotation,no_annotation,no_annotation


In [39]:
label_mapping = {
    'Execution': 0,
    'Intention': 1,
    'Problem-solution': 2,
    'Call-to-action': 3,
    'no_annotation': np.nan  # treat as missing
}
df_krip = df_multi[['annotation_1', 'annotation_2', 'annotation_3']].replace(label_mapping)

df_multi_krip = df_krip[['annotation_1', 'annotation_2', 'annotation_3']].to_numpy()
df_multi_krip = np.transpose(df_multi_krip)
alpha = krippendorff.alpha(reliability_data=df_multi_krip, level_of_measurement='nominal')
print(f"Krippendorff's alpha: {alpha:.4f}")

Krippendorff's alpha: 0.4033


In [46]:
# accuracy = accuracy_score(df_multi['majority'], df_multi['collectiveActionLevel'])
# f1 = f1_score(df_multi['majority'], df_multi['collectiveActionLevel'])
# tn, fp, fn, tp = confusion_matrix(df_multi['majority'], df_multi['collectiveActionLevel']).ravel()
# print(f'Accuracy: {accuracy:.4f}')
# print(f'F1 Score: {f1:.4f}')
# print(f'True Positives (TP): {tp}')
# print(f'True Negatives (TN): {tn}')
# print(f'False Positives (FP): {fp}')
# print(f'False Negatives (FN): {fn}')

Filtering out rows with no majority vote or marked as not collective action

In [None]:
# valid_labels = ['Problem-solution', 'Call-to-action', 'Intention', 'Execution']
# df_eval = df_multi[
#     df_multi['majority'].isin(valid_labels) &
#     df_multi['collectiveActionLevel'].isin(valid_labels)
# ]
# accuracy = accuracy_score(df_eval['majority'], df_eval['collectiveActionLevel'])
# f1 = f1_score(df_eval['majority'], df_eval['collectiveActionLevel'], average='macro')
# labels = valid_labels  
# cm = confusion_matrix(df_eval['majority'], df_eval['collectiveActionLevel'], labels=labels)
# print(f'Accuracy: {accuracy:.4f}')
# print(f'F1 Score (macro): {f1:.4f}')
# print('Confusion Matrix (labels: Problem-solution, Call-to-action, Intention, Execution):')
# print(cm)

Accuracy: 0.8095
F1 Score (macro): 0.6689
Confusion Matrix (labels: Problem-solution, Call-to-action, Intention, Execution):
[[54  5  2  0]
 [ 1  6  3  0]
 [ 0  3  4  0]
 [ 0  0  2  4]]


In [49]:
print(classification_report(
    df_eval['majority'],
    df_eval['collectiveActionLevel'],
    labels=valid_labels,
    zero_division=0
))

                  precision    recall  f1-score   support

Problem-solution       0.98      0.89      0.93        61
  Call-to-action       0.43      0.60      0.50        10
       Intention       0.36      0.57      0.44         7
       Execution       1.00      0.67      0.80         6

        accuracy                           0.81        84
       macro avg       0.69      0.68      0.67        84
    weighted avg       0.87      0.81      0.83        84



Merging as rows with no majority vote or marked as not collective action to class None

In [52]:
df_multi['majority_normalized'] = df_multi['majority'].replace({
    'no_annotation': 'None',
    'no majority': 'None'
})
df_multi['collectiveActionLevel_normalized'] = df_multi['collectiveActionLevel'].fillna('None')

In [None]:
all_labels = ['Problem-solution', 'Call-to-action', 'Intention', 'Execution', 'None']
df_eval = df_multi[['majority_normalized', 'collectiveActionLevel_normalized']].dropna()
accuracy = accuracy_score(df_eval['majority_normalized'], df_eval['collectiveActionLevel_normalized'])
f1 = f1_score(df_eval['majority_normalized'], df_eval['collectiveActionLevel_normalized'], labels=all_labels, average='macro')
cm = confusion_matrix(df_eval['majority_normalized'], df_eval['collectiveActionLevel_normalized'], labels=all_labels)
print(f'Accuracy: {accuracy:.4f}')
print(f'Macro F1 Score: {f1:.4f}')
print('Confusion Matrix:')
print(pd.DataFrame(cm, index=all_labels, columns=all_labels))

Accuracy: 0.2267
Macro F1 Score: 0.2169
Confusion Matrix:
                  Problem-solution  Call-to-action  Intention  Execution  None
Problem-solution                54               5          2          0     0
Call-to-action                   1               6          3          0     0
Intention                        0               3          4          0     0
Execution                        0               0          2          4     0
None                           133              24         38         21     0


In [55]:
print(classification_report(
    df_eval['majority_normalized'],
    df_eval['collectiveActionLevel_normalized'],
    labels=all_labels,
    zero_division=0
))

                  precision    recall  f1-score   support

Problem-solution       0.29      0.89      0.43        61
  Call-to-action       0.16      0.60      0.25        10
       Intention       0.08      0.57      0.14         7
       Execution       0.16      0.67      0.26         6
            None       0.00      0.00      0.00       216

        accuracy                           0.23       300
       macro avg       0.14      0.54      0.22       300
    weighted avg       0.07      0.23      0.11       300

