In [1]:
# imports
import numpy as np
import pandas as pd

# set pandas to display the maximum range if necessary
pd.set_option("display.max_rows", None, "display.max_columns", None)

# labels are emotion labels only
labels = ['anger_bf', 'fear_bf', 'joy_bf', 'sadness_bf', 'disgust_bf', 'surprise_bf', 'anger_af', 'fear_af', 'joy_af', 'sadness_af', 'disgust_af', 'surprise_af']
# column names of the imported datasets
col_names = ['id', 'anger_bf', 'fear_bf', 'joy_bf', 'sadness_bf', 'disgust_bf', 'surprise_bf', 'drug_name', 'anger_af', 'fear_af', 'joy_af', 'sadness_af', 'disgust_af', 'surprise_af', 'comments', 'data']

In [2]:
# import and trim Isabelle's worksheet
df_isa = pd.read_csv("../worksheets/isabelle_worksheet.tsv", sep='\t', names=col_names, skiprows=1)

# drop irrelevant columns
df_isa = df_isa.drop(['drug_name', 'comments', 'data'], axis=1)

# preprocess
df_isa = df_isa.replace(np.nan, '0')
df_isa = df_isa.set_index('id')

# solving for a specific bug encountered while converting Isabelle's worksheet
df_isa.at[26323, 'surprise_af'] = '0'

# display trimmed dataset
df_isa.head()

Unnamed: 0_level_0,anger_bf,fear_bf,joy_bf,sadness_bf,disgust_bf,surprise_bf,anger_af,fear_af,joy_af,sadness_af,disgust_af,surprise_af
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
32104,0,0,0,0,0,0,0,0,2 Normal,0,0,0
224119,0,0,0,0,0,0,0,0,0,0,2 Normal,0
28132,0,3 High,0,3 High,0,0,0,1 Low,0,1 Low,0,0
26323,0,0,0,0,0,0,3 High,0,0,0,0,0
165354,0,0,0,0,0,0,3 High,0,1 Low,0,0,0


In [3]:
# import and trim Nishan's worksheet
df_n = pd.read_csv("../worksheets/nishan_worksheet_with_older.tsv", sep='\t', names=col_names, skiprows=1)

# drop irrelevant columns
df_n = df_n.drop(['drug_name', 'comments', 'data'], axis=1)

# preprocess
df_n = df_n.replace(np.nan, '0')
df_n = df_n.set_index('id')

# display trimmed dataset
df_n.head()

Unnamed: 0_level_0,anger_bf,fear_bf,joy_bf,sadness_bf,disgust_bf,surprise_bf,anger_af,fear_af,joy_af,sadness_af,disgust_af,surprise_af
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
32104,0,0,0,3 High,0,0,0,0,2 Normal,0,0,2 Normal
224119,0,0,0,0,0,0,0,1 Low,0,0,3 High,0
28132,0,3 High,0,3 High,0,0,0,2 Normal,2 Normal,0,0,0
26323,0,0,0,0,0,0,1 Low,2 Normal,0,0,0,0
165354,0,0,0,0,0,0,3 High,0,2 Normal,0,0,0


In [4]:
# vectorize Isabelle's worksheet saving degrees of emotions: Anger, Joy, etc can be between 0 and 3

isa_scores = {}
for index, row in df_isa.iterrows():
    isa_scores[index] = list(map(int, row.str[:1]))
    
# print the first 5 pairs of keys and lists
print({k: isa_scores[k] for k in sorted(isa_scores.keys())[:5]})

{4308: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 6528: [1, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0], 6764: [0, 0, 0, 0, 0, 0, 3, 0, 0, 3, 0, 0], 11600: [1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0], 13633: [0, 0, 0, 0, 0, 0, 0, 0, 3, 1, 0, 0]}


In [5]:
# vectorize Isabelle's worksheet without degrees of emotions: Anger, Joy, etc can be between 0 or 1

isa_scores_no_degree = {}
for key, value in isa_scores.items():
    foo = []
    for index in value:
        if index > 0:
            foo.append(1)
        else:
            foo.append(0)
    isa_scores_no_degree[key] = foo
    
# print the first 5 pairs of keys and lists
print({k: isa_scores_no_degree[k] for k in sorted(isa_scores_no_degree.keys())[:5]})

{4308: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 6528: [1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0], 6764: [0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0], 11600: [1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0], 13633: [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0]}


In [6]:
# vectorize Nishan's worksheet: Anger, Joy, etc can be between 0 and 3

n_scores = {}
for index, row in df_n.iterrows():
    n_scores[index] = list(map(int, row.str[:1]))
    
# print the first 5 pairs of keys and lists
print({k: n_scores[k] for k in sorted(n_scores.keys())[:5]})

{4308: [0, 2, 0, 0, 3, 0, 0, 0, 1, 0, 0, 1], 6528: [2, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0], 6764: [0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 3, 0], 11600: [0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0], 13633: [0, 0, 0, 0, 0, 0, 3, 3, 0, 0, 0, 0]}


In [7]:
# vectorize Nishan's worksheet without degrees of emotions: Anger, Joy, etc can be between 0 and 1

n_scores_no_degree = {}
for key, value in n_scores.items():
    foo = []
    for index in value:
        if index > 0:
            foo.append(1)
        else:
            foo.append(0)
    n_scores_no_degree[key] = foo
    
# print the first 5 pairs of keys and lists
print({k: n_scores_no_degree[k] for k in sorted(n_scores_no_degree.keys())[:5]})

{4308: [0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1], 6528: [1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0], 6764: [0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0], 11600: [0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0], 13633: [0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0]}


In [8]:
import sklearn
from sklearn.metrics import cohen_kappa_score

# Cohen Kappa scores for annotations with degree of emotions mentioned
ck = []
ck_linear = []
ck_quadratic = []

# Cohen Kappa scores for annotations with degree of emotions absent
ck_no_degree = []
ck_linear_no_degree = []
ck_quadratic_no_degree = []

for key, value in isa_scores.items():
    ck.append(cohen_kappa_score(isa_scores[key], n_scores[key]))
    ck_linear.append(cohen_kappa_score(isa_scores[key], n_scores[key], weights='linear'))
    ck_quadratic.append(cohen_kappa_score(isa_scores[key], n_scores[key], weights='quadratic'))
    
    ck_no_degree.append(cohen_kappa_score(isa_scores_no_degree[key], n_scores_no_degree[key]))
    ck_linear_no_degree.append(cohen_kappa_score(isa_scores_no_degree[key], n_scores_no_degree[key], weights='linear'))
    ck_quadratic_no_degree.append(cohen_kappa_score(isa_scores_no_degree[key], n_scores_no_degree[key], weights='quadratic'))
    
print("Not weighted Cohen Kappa with degrees in emotions: ", sum(ck)/len(ck))
print("Linear weighted Cohen Kappa with degrees in emotions: ", sum(ck_linear)/len(ck_linear))
print("Quadratic weighted Cohen Kappa with degrees in emotions: ", sum(ck_quadratic)/len(ck_quadratic))

print("\n")

print("Not weighted Cohen Kappa with degrees in emotions: ", sum(ck_no_degree)/len(ck_no_degree))
print("Linear weighted Cohen Kappa with degrees in emotions: ", sum(ck_linear_no_degree)/len(ck_linear_no_degree))
print("Quadratic weighted Cohen Kappa with degrees in emotions: ", sum(ck_quadratic_no_degree)/len(ck_quadratic_no_degree))

Not weighted Cohen Kappa with degrees in emotions:  0.2947380236782072
Linear weighted Cohen Kappa with degrees in emotions:  0.3586086601347752
Quadratic weighted Cohen Kappa with degrees in emotions:  0.407269849104948


Not weighted Cohen Kappa with degrees in emotions:  0.41961342076726676
Linear weighted Cohen Kappa with degrees in emotions:  0.41961342076726676
Quadratic weighted Cohen Kappa with degrees in emotions:  0.41961342076726676


In [9]:
'''import statsmodels
from statsmodels.stats.inter_rater import fleiss_kappa

with_degree = []

for key, value in isa_scores.items():
    for isa, n in zip(isa_scores[key], n_scores[key]):
        with_degree.append([isa, n])

print(fleiss_kappa(with_degree))

without_degree = []        
for key, value in isa_scores_no_degree.items():
    for isa_with_degree, n_with_degree in zip(isa_scores_no_degree[key], n_scores_no_degree[key]):
        without_degree.append([isa_with_degree, n_with_degree])
        
print(fleiss_kappa(without_degree))'''

'import statsmodels\nfrom statsmodels.stats.inter_rater import fleiss_kappa\n\nwith_degree = []\n\nfor key, value in isa_scores.items():\n    for isa, n in zip(isa_scores[key], n_scores[key]):\n        with_degree.append([isa, n])\n\nprint(fleiss_kappa(with_degree))\n\nwithout_degree = []        \nfor key, value in isa_scores_no_degree.items():\n    for isa_with_degree, n_with_degree in zip(isa_scores_no_degree[key], n_scores_no_degree[key]):\n        without_degree.append([isa_with_degree, n_with_degree])\n        \nprint(fleiss_kappa(without_degree))'

In [10]:
# For first 15

# Cohen Kappa scores for annotations with degree of emotions mentioned
ck = []
ck_linear = []
ck_quadratic = []

# Cohen Kappa scores for annotations with degree of emotions absent
ck_no_degree = []
ck_linear_no_degree = []
ck_quadratic_no_degree = []

count = 0

for key, value in isa_scores.items():
    if count > 14:
        break
    else:
        ck.append(cohen_kappa_score(isa_scores[key], n_scores[key]))
        ck_linear.append(cohen_kappa_score(isa_scores[key], n_scores[key], weights='linear'))
        ck_quadratic.append(cohen_kappa_score(isa_scores[key], n_scores[key], weights='quadratic'))

        ck_no_degree.append(cohen_kappa_score(isa_scores_no_degree[key], n_scores_no_degree[key]))
        ck_linear_no_degree.append(cohen_kappa_score(isa_scores_no_degree[key], n_scores_no_degree[key], weights='linear'))
        ck_quadratic_no_degree.append(cohen_kappa_score(isa_scores_no_degree[key], n_scores_no_degree[key], weights='quadratic'))
        count += 1
        
print("For first 15")

print("Not weighted Cohen Kappa with degrees in emotions: ", sum(ck)/len(ck))
print("Linear weighted Cohen Kappa with degrees in emotions: ", sum(ck_linear)/len(ck_linear))
print("Quadratic weighted Cohen Kappa with degrees in emotions: ", sum(ck_quadratic)/len(ck_quadratic))

print("\n")

print("Not weighted Cohen Kappa with degrees in emotions: ", sum(ck_no_degree)/len(ck_no_degree))
print("Linear weighted Cohen Kappa with degrees in emotions: ", sum(ck_linear_no_degree)/len(ck_linear_no_degree))
print("Quadratic weighted Cohen Kappa with degrees in emotions: ", sum(ck_quadratic_no_degree)/len(ck_quadratic_no_degree))

For first 15
Not weighted Cohen Kappa with degrees in emotions:  0.3175011163568026
Linear weighted Cohen Kappa with degrees in emotions:  0.40511746430641343
Quadratic weighted Cohen Kappa with degrees in emotions:  0.46769428416855086


Not weighted Cohen Kappa with degrees in emotions:  0.48432234432234433
Linear weighted Cohen Kappa with degrees in emotions:  0.48432234432234433
Quadratic weighted Cohen Kappa with degrees in emotions:  0.48432234432234433


In [11]:
# For next 50

# Cohen Kappa scores for annotations with degree of emotions mentioned
ck = []
ck_linear = []
ck_quadratic = []

# Cohen Kappa scores for annotations with degree of emotions absent
ck_no_degree = []
ck_linear_no_degree = []
ck_quadratic_no_degree = []

count = 0

for key, value in isa_scores.items():
    if count > 14:
        ck.append(cohen_kappa_score(isa_scores[key], n_scores[key]))
        ck_linear.append(cohen_kappa_score(isa_scores[key], n_scores[key], weights='linear'))
        ck_quadratic.append(cohen_kappa_score(isa_scores[key], n_scores[key], weights='quadratic'))

        ck_no_degree.append(cohen_kappa_score(isa_scores_no_degree[key], n_scores_no_degree[key]))
        ck_linear_no_degree.append(cohen_kappa_score(isa_scores_no_degree[key], n_scores_no_degree[key], weights='linear'))
        ck_quadratic_no_degree.append(cohen_kappa_score(isa_scores_no_degree[key], n_scores_no_degree[key], weights='quadratic'))
    count += 1
        
print("For the next 50")

print("Not weighted Cohen Kappa with degrees in emotions: ", sum(ck)/len(ck))
print("Linear weighted Cohen Kappa with degrees in emotions: ", sum(ck_linear)/len(ck_linear))
print("Quadratic weighted Cohen Kappa with degrees in emotions: ", sum(ck_quadratic)/len(ck_quadratic))

print("\n")

print("Not weighted Cohen Kappa with degrees in emotions: ", sum(ck_no_degree)/len(ck_no_degree))
print("Linear weighted Cohen Kappa with degrees in emotions: ", sum(ck_linear_no_degree)/len(ck_linear_no_degree))
print("Quadratic weighted Cohen Kappa with degrees in emotions: ", sum(ck_quadratic_no_degree)/len(ck_quadratic_no_degree))

For the next 50
Not weighted Cohen Kappa with degrees in emotions:  0.2879090958746287
Linear weighted Cohen Kappa with degrees in emotions:  0.3446560188832838
Quadratic weighted Cohen Kappa with degrees in emotions:  0.3891425185858672


Not weighted Cohen Kappa with degrees in emotions:  0.40020074370074377
Linear weighted Cohen Kappa with degrees in emotions:  0.40020074370074377
Quadratic weighted Cohen Kappa with degrees in emotions:  0.40020074370074377
