IRR Analysis for Journey Phases
===

In [1]:
import os
import re
import pandas as pd
import numpy as np
import nltk
import sklearn
from collections import Counter
import itertools

In [2]:
import sys
sys.path.append("../../annotation_data")

In [22]:
from phase import *

In [4]:
annotation_data_dir = "/home/srivbane/shared/caringbridge/data/projects/qual-health-journeys/instance/annotation_data/export"
assert os.path.exists(annotation_data_dir)

In [5]:
annotation_files = [fname for fname in os.listdir(annotation_data_dir) if re.match("annotation_data_.*csv$", fname)]
annotation_files

['annotation_data_levon003.csv',
 'annotation_data_luoxx498.csv',
 'annotation_data_complete.csv']

In [5]:
user_dfs = []
for annotation_file in annotation_files:
    fname = os.path.splitext(annotation_file)[0]
    username = fname.split("_")[-1]
    if username in ("complete",):  # exclude some files from the above directory
        continue
    df = pd.read_csv(os.path.join(annotation_data_dir, annotation_file))
    user_dfs.append(df)
user_count = len(user_dfs)
user_count

2

In [6]:
df = pd.concat(user_dfs)
len(df)

1949

In [7]:
phase_df = df[df["annotation_type"] == "journal_journey_phase"]
len(phase_df)

1040

In [8]:
phase_df.head()

Unnamed: 0,annotation_type,site_id,journal_oid,annotator_username,data
74,journal_journey_phase,829111,522bf7224db9218c78a6a78d,levon003,screening
75,journal_journey_phase,839971,526f96afa689b47d6479a94c,levon003,treatment
76,journal_journey_phase,839971,52cfe564ca16b4735b5fc0a5,levon003,treatment
77,journal_journey_phase,839971,52da710d8b5cd3ee65abe51a,levon003,treatment
78,journal_journey_phase,839971,52e2d66dca16b47c40517008,levon003,treatment


In [9]:
phase_df.groupby(by="annotator_username").size()

annotator_username
levon003    233
luoxx498    807
dtype: int64

### Build the task data in the NLTK format

In [10]:
# Data format is: annotator label, item label, tag
data = []
grouped = phase_df.groupby(by=["site_id", "journal_oid"])
for name, group in grouped:
    if len(group) == user_count:
        for i in range(len(group)):
            annotation = group.iloc[i]
            username = annotation['annotator_username']
            item_label = str(annotation['site_id']) + "|" + annotation['journal_oid']
            tag = annotation['data']
            data_tup = (username, item_label, tag)
            data.append(data_tup)
len(data)

464

In [11]:
data[:10]

[('levon003', '829111|522bf7224db9218c78a6a78d', 'screening'),
 ('luoxx498', '829111|522bf7224db9218c78a6a78d', 'info_seeking'),
 ('levon003', '829111|522f77d7af3d79db0e6bcc7f', 'screening'),
 ('luoxx498', '829111|522f77d7af3d79db0e6bcc7f', 'screening|info_seeking'),
 ('levon003', '829111|523139414db9211d5f52cfd0', 'screening'),
 ('luoxx498', '829111|523139414db9211d5f52cfd0', 'info_seeking'),
 ('levon003', '829111|523212c8a589b44d58a7e9bc', 'info_seeking'),
 ('luoxx498', '829111|523212c8a589b44d58a7e9bc', 'info_seeking'),
 ('levon003', '829111|52332c9f0dad105611e55116', 'info_seeking'),
 ('luoxx498', '829111|52332c9f0dad105611e55116', 'info_seeking')]

In [12]:
def phase_distance_binary(phase1, phase2):
    return 0 if phase1 == phase2 else 1

def phase_distance(item1, item2):
    phases1 = item1.split("|")
    phases2 = item2.split("|")
    if len(phases1) == 1 and len(phases2) == 1:
        return phase_distance_binary(item1, item2)
    for phase_list in (phases1, phases2):
        if 'unknown' in phase_list:
            phase_list.remove('unknown')
    # the distance is the percentage of overlap of the labels
    total_labels = len(set(phases1 + phases2))
    agreeing_labels = len(set(phases1).intersection(set(phases2)))
    percent_agreement = agreeing_labels / total_labels
    distance = 1 - percent_agreement
    
    #if percent_agreement == 0 and total_labels > 2:  # these are particularly strong disagreements!
    #    print(phases1, phases2)
    
    return distance
    

agreement = nltk.agreement.AnnotationTask(data=data, distance=phase_distance)
print("kappa " + str(agreement.kappa()))
print("fleiss " + str(agreement.multi_kappa()))
print("alpha " + str(agreement.alpha()))
print("scotts " + str(agreement.pi()))

kappa 0.5276203966005666
fleiss 0.5276203966005666
alpha 0.42197427739999704
scotts 0.5209480488388796


In [13]:
#Visualize the greatest disagreements
assert user_count == 2
grouped = phase_df.groupby(by=["site_id", "journal_oid"])
for name, group in grouped:
    if len(group) == user_count:
        a1 = group.iloc[0]
        a2 = group.iloc[1]
        d1 = a1['data']
        d2 = a2['data']
        phases1 = d1.split("|")
        phases2 = d2.split("|")
        for phase_list in (phases1, phases2):
            if 'unknown' in phase_list:
                phase_list.remove('unknown')
        
        dist = phase_distance(d1, d2)
        if dist >= 0.8 and len(set(phases1 + phases2)) > 1:
            author1 = a1['annotator_username']
            author2 = a2['annotator_username']
            print("Site %d" % a1['site_id'])
            print(author1, phases1)
            print(author2, phases2)
            print()


Site 829111
levon003 ['screening']
luoxx498 ['info_seeking']

Site 829111
levon003 ['screening']
luoxx498 ['info_seeking']

Site 829111
levon003 ['info_seeking', 'treatment']
luoxx498 ['screening']

Site 829111
levon003 ['treatment']
luoxx498 ['info_seeking']

Site 829111
levon003 ['treatment']
luoxx498 ['info_seeking']

Site 839971
levon003 ['treatment']
luoxx498 ['info_seeking']

Site 839971
levon003 ['treatment']
luoxx498 ['info_seeking']

Site 839971
levon003 ['treatment']
luoxx498 ['info_seeking']

Site 839971
levon003 ['treatment']
luoxx498 ['info_seeking']

Site 839971
levon003 ['treatment']
luoxx498 ['info_seeking']

Site 839971
levon003 ['treatment']
luoxx498 ['info_seeking']

Site 855625
levon003 ['screening']
luoxx498 ['info_seeking']

Site 855625
levon003 ['screening']
luoxx498 ['info_seeking']

Site 857627
levon003 ['treatment']
luoxx498 ['info_seeking']

Site 863706
levon003 ['treatment']
luoxx498 ['info_seeking']

Site 863706
levon003 ['treatment', 'cured']
luoxx498 []



### Compute the Cohen's Kappa using sklearn

In [14]:
assert user_count == 2
y1 = []
y2 = []
disagreement_count = 0

grouped = phase_df.groupby(by=["site_id", "journal_oid"])
for name, group in grouped:
    if len(group) == user_count:
        a1 = group.iloc[0]
        a2 = group.iloc[1]
        d1 = a1['data']
        d2 = a2['data']
        phases1 = d1.split("|")
        phases2 = d2.split("|")
        for phase_list in (phases1, phases2):
            if 'unknown' in phase_list:
                phase_list.remove('unknown')
        if len(phases1) == 0 or len(phases2) == 0:
            continue  # skip journals that were labeled unknown by one of the raters
        d1 = "|".join(phases1)
        d2 = "|".join(phases2)
        y1.append(d1)
        y2.append(d2)
        if d1 != d2:  # print all instances of disagreement
            author1 = a1['annotator_username']
            author2 = a2['annotator_username']
            print("Site %d" % a1['site_id'])
            print(author1, phases1)
            print(author2, phases2)
            print()
            disagreement_count += 1
        
len(y1), disagreement_count

Site 829111
levon003 ['screening']
luoxx498 ['info_seeking']

Site 829111
levon003 ['screening']
luoxx498 ['screening', 'info_seeking']

Site 829111
levon003 ['screening']
luoxx498 ['info_seeking']

Site 829111
levon003 ['info_seeking']
luoxx498 ['info_seeking', 'treatment']

Site 829111
levon003 ['info_seeking', 'treatment']
luoxx498 ['screening']

Site 829111
levon003 ['treatment']
luoxx498 ['info_seeking']

Site 829111
levon003 ['treatment']
luoxx498 ['info_seeking']

Site 829111
levon003 ['treatment']
luoxx498 ['info_seeking', 'treatment']

Site 829111
levon003 ['treatment', 'cured']
luoxx498 ['cured']

Site 839971
levon003 ['treatment']
luoxx498 ['info_seeking']

Site 839971
levon003 ['treatment']
luoxx498 ['info_seeking']

Site 839971
levon003 ['treatment']
luoxx498 ['info_seeking']

Site 839971
levon003 ['treatment']
luoxx498 ['info_seeking']

Site 839971
levon003 ['treatment']
luoxx498 ['info_seeking']

Site 839971
levon003 ['treatment']
luoxx498 ['info_seeking']

Site 839971
l

(213, 76)

In [15]:
sklearn.metrics.cohen_kappa_score(y1, y2)

0.353333599648464

## IRR part 2; directly from loaded annotations

In [6]:
phase_annotations = get_phase_annotations_by_username()
len(phase_annotations)

9127

In [7]:
df = pd.DataFrame(phase_annotations)
len(df)

9127

In [8]:
df.head(n=3)

Unnamed: 0,is_corrected,journal_oid,phases,site_id,username
0,False,51be29d46ca004e47900f6a7,"[treatment, unknown]",190464,naray114
1,False,526076fbe9cb6a8b045d6ce5,[treatment],623581,naray114
2,False,5262b655e9cb6a737a718a97,"[treatment, unknown]",623581,naray114


In [9]:
Counter(df['is_corrected']).most_common()

[(False, 8905), (True, 222)]

In [10]:
df = df[df['is_corrected'] == False]
len(df)

8905

In [11]:
irr_sites = []
irr_set_dir = "/home/srivbane/shared/caringbridge/data/projects/qual-health-journeys/instance/annotation_data/assignments/eriks074"
for filename in os.listdir(irr_set_dir):
    irr_filepath = os.path.join(irr_set_dir, filename)
    with open(irr_filepath, 'r') as infile:
        lines = infile.readlines()[1:]  # take all the lines, minus the header
        site_ids = [int(line) for line in lines if line.strip() != ""]
        print(f"Added {len(site_ids)} sites from IRR file '{filename}'.")
        irr_sites.extend(site_ids)
len(irr_sites)

Added 10 sites from IRR file 'irr_set_3_20180830.txt'.
Added 20 sites from IRR file 'multiset_4_20181206.txt'.
Added 20 sites from IRR file 'phases_set_4_20180904.txt'.
Added 20 sites from IRR file 'multiset_1_20181009.txt'.
Added 20 sites from IRR file 'multiset_3_20181019.txt'.


90

In [12]:
irr_df = df[df['site_id'].isin(irr_sites)]
len(irr_df)

2945

In [13]:
def get_agreement_df(user1, user2):
    agreements_list = []
    for key, group in df.groupby(by=("site_id", "journal_oid")):
        if len(group) <= 1:
            continue
        annotating_users = set(group['username'])
        if user1 not in annotating_users or user2 not in annotating_users:
            continue
        site_id, journal_oid = key

        user1_phases = group[group['username'] == user1].iloc[0]['phases']
        user2_phases = group[group['username'] == user2].iloc[0]['phases']

        if 'unknown' in user1_phases:
            user1_phases.remove('unknown')
        if 'unknown' in user2_phases:
            user2_phases.remove('unknown')
            
        user1_phase_str = "|".join(user1_phases)
        user2_phase_str = "|".join(user2_phases)

        agreement_score = 0
        is_strict_agreement = len(set(user1_phases) & set(user2_phases)) == max(len(user1_phases), len(user2_phases))
        is_partial_agreement = len(set(user1_phases) & set(user2_phases)) > 0 or len(user1_phases) == len(user2_phases)

        agreement = {
            "site_id": site_id,
            "journal_oid": journal_oid,
            "user1": user1,
            "user2": user2,
            "user1_phases": user1_phases,
            "user2_phases": user2_phases,
            "user1_phase_str": user1_phase_str,
            "user2_phase_str": user2_phase_str,
            "is_strict_agreement": is_strict_agreement,
            "is_partial_agreement": is_partial_agreement
        }
        agreements_list.append(agreement)
    agreement_df = pd.DataFrame(agreements_list)
    return agreement_df

In [20]:
user1 = "luoxx498"
user2 = "eriks074"
agreement_df = get_agreement_df(user1, user2)
len(agreement_df)

619

In [15]:
def get_strict_agreement_accuracy(df):
    acc = np.sum(df['is_strict_agreement']) / len(df)
    return acc

def get_partial_agreement_accuracy(df):
    acc = np.sum(df['is_partial_agreement']) / len(df)
    return acc

In [30]:
1- get_strict_agreement_accuracy(agreement_df)

0.04523424878836835

In [17]:
get_partial_agreement_accuracy(agreement_df)

0.9951534733441034

In [18]:
sklearn.metrics.cohen_kappa_score(agreement_df["user1_phase_str"], agreement_df["user2_phase_str"])

0.9250251977973015

In [29]:
for phase_label in phase_labels:
    user1_annotations = agreement_df.user1_phases.apply(lambda phase_list: phase_label in phase_list)
    user2_annotations = agreement_df.user2_phases.apply(lambda phase_list: phase_label in phase_list)
    disagreement = 1 - (np.sum(user1_annotations == user2_annotations) / len(agreement_df))
    cohen_kappa = sklearn.metrics.cohen_kappa_score(user1_annotations, user2_annotations)
    print(f"{phase_label:20} {disagreement*100:5.2f}%  {cohen_kappa:5.2f}  {np.sum(user1_annotations | user2_annotations):10}")

pretreatment          2.91%   0.91         141
treatment             2.58%   0.94         409
end_of_life           0.00%    nan           0
cured                 1.29%   0.95          91


  k = np.sum(w_mat * confusion) / np.sum(w_mat * expected)


In [19]:
usernames = ["levon003", "eriks074", "rubya001", "vachh007", "luoxx498"]
for user1, user2 in itertools.combinations(usernames, 2):
    agreement_df = get_agreement_df(user1, user2)
    num_sites = len(set(agreement_df["site_id"]))
    strict_acc = get_strict_agreement_accuracy(agreement_df)
    partial_acc = get_partial_agreement_accuracy(agreement_df)
    
    cohen_kappa = sklearn.metrics.cohen_kappa_score(agreement_df["user1_phase_str"], agreement_df["user2_phase_str"])
    
    print(f"{user1}/{user2}\ts={num_sites:3},j={len(agreement_df):3}\t{strict_acc:.2f}\t{partial_acc:.2f}\t{cohen_kappa:.2f}")

levon003/eriks074	s= 16,j=122	0.76	0.95	0.61
levon003/rubya001	s= 14,j=145	0.83	0.99	0.68
levon003/vachh007	s=  6,j= 73	0.77	0.97	0.54
levon003/luoxx498	s= 26,j=287	0.87	0.97	0.74
eriks074/rubya001	s= 10,j=139	0.80	0.94	0.65
eriks074/vachh007	s=  6,j= 73	0.85	0.93	0.75
eriks074/luoxx498	s= 31,j=619	0.95	1.00	0.93
rubya001/vachh007	s=  6,j= 73	0.78	1.00	0.60
rubya001/luoxx498	s= 10,j=138	0.79	0.93	0.64
vachh007/luoxx498	s=  6,j= 73	0.85	0.93	0.75


In [32]:
resp_counts = np.zeros(len(phase_labels))
resp_disagreements = np.zeros(len(phase_labels))
multiple_annotator_count = 0  # tracks number of journals with multiple annotators
total_disagreements = 0
none_count = 0
for key, group in tqdm(df.groupby(by=('site_id', 'journal_oid'), sort=False)):
    all_resps = set()
    for resp_list in group.phases:
        all_resps.update(resp_list)
    if len(all_resps) == 0:
        none_count += 1
        continue  # if all annotators indicated none, there's nothing else to do here
    for i, resp_label in enumerate(phase_labels):
        if resp_label in all_resps:
            resp_counts[i] += 1
    if len(group) >= 2:
        multiple_annotator_count += 1
        disagreement = False
        for i, resp_label in enumerate(phase_labels):
            if resp_label in all_resps:
                for resp_list in group.phases:
                    if resp_label not in resp_list:
                        resp_disagreements[i] += 1
                        disagreement = True
                        break
        if disagreement:
            total_disagreements += 1

resp_proportions = resp_counts / len(df)
resp_disagreement_proportions = resp_disagreements / multiple_annotator_count
for i, resp_label in enumerate(phase_labels):
    code = "  "
    occurrence = resp_proportions[i]
    disagreement = resp_disagreement_proportions[i]
    print(f"{resp_label:20}  {code:3}  {occurrence*100:.1f}  {disagreement * 100:.1f}")
print()
print(f"Overall disagreement: {total_disagreements / multiple_annotator_count}")
print(f"Non-none proportion: {(1 - (none_count / len(df))) * 100:.2f}")

100%|██████████| 7553/7553 [00:01<00:00, 4397.21it/s]

pretreatment               7.4  5.5
treatment                  69.7  7.4
end_of_life                1.9  0.2
cured                      6.4  3.6

Overall disagreement: 0.10167464114832536
Non-none proportion: 99.62



