In [8]:
# read data that have annotations from zooniverse
# and make kappa matrix
import pandas as pd
import numpy as np
import os

dat = "../data/zooni/sud-project-classifications.csv"
df = pd.read_csv(dat)

In [10]:
# parse annotation from json
df['connection'] = df['annotations'].apply(lambda x: eval(x)[0]['value'])
df['subject'] = df['annotations'].apply(lambda x: eval(x)[1]['value'])
df['objective'] = df['annotations'].apply(lambda x: eval(x)[2]['value'])


In [16]:
# make agreement matrix for connection
# 1 = agree, 0 = disagree
# lauren and donald

lauren = df[df['user_name'] == 'laurenmiller324'][['connection', 'subject_ids', 'subject', 'objective']]
# join with donald
donald = df[df['user_name'] == 'dhattier'][['connection', 'subject_ids', 'subject', 'objective']]

In [29]:
connection = lauren[['subject_ids','connection']].merge(donald[['connection','subject_ids']], on='subject_ids', suffixes=('_lauren', '_donald'))
connection['agree'] = np.where(connection['connection_lauren'] == connection['connection_donald'], 1, 0)

In [62]:
# export disagreements

disagree = connection[connection['agree'] == 0]
disagree.to_csv("../data/zooni/disagree_connection.csv")
agree = connection[connection['agree'] == 1]
agree.to_csv("../data/zooni/agree_connection.csv")

In [31]:
kappa = connection.groupby(['connection_lauren', 'connection_donald']).count().reset_index()
kappa = kappa.pivot(index='connection_lauren', columns='connection_donald', values='agree')
kappa = kappa.fillna(0)
kappa = kappa.astype(int)
# calculate kappa
# https://en.wikipedia.org/wiki/Cohen%27s_kappa
kappa_matrix = kappa.values

In [32]:

n = np.sum(kappa_matrix)
p0 = np.sum(np.diag(kappa_matrix))/n
pe = np.sum(np.sum(kappa_matrix, axis=0)*np.sum(kappa_matrix, axis=1))/n**2
kappa = (p0-pe)/(1-pe)
print(kappa)

0.25742574257425754


---

subject

In [46]:
subject = lauren[['subject_ids','subject']].merge(donald[['subject','subject_ids']], on='subject_ids', suffixes=('_lauren', '_donald'))
subject['agree'] = np.where(subject['subject_lauren'] == subject['subject_donald'], 1, 0)

In [49]:
kappa_subject = subject.groupby(['subject_lauren', 'subject_donald']).count().reset_index()
kappa_subject = kappa_subject.pivot(index='subject_lauren', columns='subject_donald', values='agree')
kappa_subject = kappa_subject.fillna(0)
kappa_subject = kappa_subject.astype(int)
# calculate kappa
# https://en.wikipedia.org/wiki/Cohen%27s_kappa
kappa_matrix_subject = kappa_subject.values
# add third column
kappa_matrix_subject = np.append(kappa_matrix_subject, np.zeros((kappa_matrix_subject.shape[0],1)), axis=1)
n_s = np.sum(kappa_matrix_subject)
p0_s = np.sum(np.diag(kappa_matrix_subject))/n
pe_s = np.sum(np.sum(kappa_matrix_subject, axis=0)*np.sum(kappa_matrix_subject, axis=1))/n**2
kappa_subject = (p0_s-pe_s)/(1-pe_s)
print(kappa_subject)

0.45205479452054803


In [61]:
# export disagreement
disagree_subject = subject[subject['agree'] == 0]
disagree_subject.to_csv("../data/zooni/disagree_subject.csv")

---
Objectives

In [51]:
objective = lauren[['subject_ids','objective']].merge(donald[['objective','subject_ids']], on='subject_ids', suffixes=('_lauren', '_donald'))
# agree if values of list are the same - not order
objective['objective_lauren'] = objective['objective_lauren'].apply(lambda x: sorted(x))
objective['objective_donald'] = objective['objective_donald'].apply(lambda x: sorted(x))
# list are unhashable so convert to tuple
objective['objective_lauren'] = objective['objective_lauren'].apply(lambda x: tuple(x))
objective['objective_donald'] = objective['objective_donald'].apply(lambda x: tuple(x))
objective['agree'] = np.where(objective['objective_lauren'] == objective['objective_donald'], 1, 0)

kappa_objective = objective.groupby(['objective_lauren', 'objective_donald']).count().reset_index()
kappa_objective = kappa_objective.pivot(index='objective_lauren', columns='objective_donald', values='agree')
kappa_objective = kappa_objective.fillna(0)
kappa_objective = kappa_objective.astype(int)
# calculate kappa
kappa_matrix_objective = kappa_objective.values
# make sure there are columns for all possible values
kappa_matrix_objective = np.append(kappa_matrix_objective, np.zeros((kappa_matrix_objective.shape[0],1)), axis=1)
n_o = np.sum(kappa_matrix_objective)
p0_o = np.sum(np.diag(kappa_matrix_objective))/n
pe_o = np.sum(np.sum(kappa_matrix_objective, axis=0)*np.sum(kappa_matrix_objective, axis=1))/n**2
kappa_objective = (p0_o-pe_o)/(1-pe_o)
print(kappa_objective)

0.11971830985915494


In [None]:
# calculate intra-class correlation
# https://en.wikipedia.org/wiki/Intraclass_correlation

