In [1]:
import pandas as pd
import numpy as np
import json
import os
import re

In [2]:
def get_character_index(story_lines):
  indexes = []
  cur_index = 0
  for i in range(len(story_lines)):
    indexes.append(cur_index)
    cur_index += len(story_lines[i])
  indexes.append(cur_index)
  return indexes

In [3]:
schoolmistress_story_paragraphs = open('schoolmistress_paragraphs.txt', 'r').readlines()
schoolmistress_story_paragraphs_indexes = get_character_index(schoolmistress_story_paragraphs)
schoolmistress_story_sentences = open('schoolmistress.txt', 'r').readlines()
schoolmistress_story_sentences_indexes = get_character_index(schoolmistress_story_sentences)

el_story_paragraphs = open('expensivelessons_paragraphs.txt', 'r').readlines()
el_story_paragraphs_indexes = get_character_index(el_story_paragraphs)
el_story_sentences = open('expensivelessons.txt', 'r').readlines()
el_story_sentences_indexes = get_character_index(el_story_sentences)

In [4]:
def update_indexes(highlight, indexes):
  for i in range(len(highlight) - 1):
    highlight[i]['start'] = highlight[i]['start'] + indexes[highlight[-1]['paragraph']]
    highlight[i]['end'] = highlight[i]['end'] + indexes[highlight[-1]['paragraph']]
  return highlight

In [5]:
def parse_highlights(filename, story_sentences, story_paragraphs, story_sentences_indexes, story_paragraphs_indexes):
  highlight_file = open(f'./highlighting_logs/{filename}.log', 'r')
  CATEGORY_MAP = {'None': 0, 'Present': 1, 'Confused': 2, 'Curious': 3, 'Connected': 4, 'Other': 5}
  lines = highlight_file.readlines()
  highlights = [None] * len(story_paragraphs)
  survey_results = []
  for line in lines:
    start = line.find('Highlight:')
    if start != -1:
      highlight = json.loads(str(line[start + len('Highlight: '):].replace("'", '"')), )
      highlight_absolute_indexes = update_indexes(highlight, story_paragraphs_indexes)
      highlights[highlight[-1]['paragraph']] = highlight_absolute_indexes
    survey = re.search("Survey\d:\s", line)
    if survey != None:
      answers_str = """{0}"""
      answers_str = answers_str.format(str(line[survey.end():]))

      # oh lord.
      # print(re.sub("'}", '"}', re.sub("{'", '{"', re.sub("', ", '", ', re.sub(r"(\: |\, )'", r'\1"', re.sub("(\d)'", r'\1"', re.sub('\\\\', "", re.sub('"', "'", answers_str))))))))
      survey_dict = json.loads(re.sub("'}", '"}', re.sub("{'", '{"', re.sub("', ", '", ', re.sub(r"(\: |\, )'", r'\1"', re.sub("(\d)'", r'\1"', re.sub('\\\\', "", re.sub('"', "'", answers_str))))))), )
      survey_dict['participant_id'] = filename.split('_')[0].lower()
      survey_dict['story'] = filename.split('_')[1].replace('.log', '')
      survey_results.append(survey_dict)
  sentence_highlights = [{'category': 0, 'proportion': 0.0, 'percent_highlighted': 0.0}] * len(story_sentences)
  for i in range(len(highlights)):
    if highlights[i] != None:
      for k in range(len(highlights[i]) - 1):
        start_sent = None
        end_sent = None
        for p in range(len(story_sentences_indexes) - 1):
          if highlights[i][k]['start'] >= story_sentences_indexes[p] and highlights[i][k]['start'] < story_sentences_indexes[p+1]:
            start_sent = p
          if highlights[i][k]['end'] >= story_sentences_indexes[p] and highlights[i][k]['end'] < story_sentences_indexes[p+1]:
            end_sent = p
          if start_sent != None:
            end_index = story_sentences_indexes[p+1] if end_sent == None else highlights[i][k]['end']
            start_index = story_sentences_indexes[p] if story_sentences_indexes[p] > highlights[i][k]['start'] else highlights[i][k]['start']
            proportion = (end_index - start_index) / (story_sentences_indexes[p+1] - story_sentences_indexes[p])
            if proportion > sentence_highlights[p]['proportion']:
              sentence_highlights[p] = {'category': CATEGORY_MAP[highlights[i][k]['tag']], 'proportion': proportion, 'percent_highlighted': sentence_highlights[p]['percent_highlighted'] + proportion}
            else:
              sentence_highlights[p]['percent_highlighted'] = sentence_highlights[p]['percent_highlighted'] + proportion
          if end_sent != None:
            break
  for sentence in sentence_highlights:
    if sentence['proportion'] == 0.0:
      sentence['proportion'] = 1.0
  highlight_df = pd.DataFrame.from_dict(sentence_highlights)
  highlight_df.to_csv(f'./highlights/{filename}.csv')
  first_survey = pd.DataFrame.from_dict([survey_results[0]])
  last_survey = pd.DataFrame.from_dict([survey_results[1]])
  survey_df = pd.concat([first_survey, last_survey], axis=1)
  return survey_df.loc[:,~survey_df.columns.duplicated()].copy()
  

In [6]:
HIGHLIGHT_DIR = "./highlighting_logs/"
survey_df = pd.DataFrame()
for filename in os.listdir(HIGHLIGHT_DIR):
    f = os.path.join(HIGHLIGHT_DIR,filename)
    if os.path.isfile(f):
        filename = f.replace(HIGHLIGHT_DIR, '').replace('.log', '')
        if 'schoolmistress' in filename:
          survey = parse_highlights(filename, schoolmistress_story_sentences, schoolmistress_story_paragraphs, schoolmistress_story_sentences_indexes, schoolmistress_story_paragraphs_indexes)
        elif 'el' in filename:
          survey = parse_highlights(filename, el_story_sentences, el_story_paragraphs, el_story_sentences_indexes, el_story_paragraphs_indexes)
        if len(survey_df) == 0:
          survey_df = survey
        else:
          survey_df = pd.concat([survey_df, survey], ignore_index=True)
survey_df.to_csv('./survey_results.csv')



In [7]:
def normalize_ids(x):
    return str(x).lower() if "id" in str(x).lower() else "id" + str(x)

In [8]:
highlight_percent=pd.read_csv("./results/id10_el.csv", usecols=['RECORDING_SESSION_LABEL', 'Unnamed: 0.1', 'percent_highlighted', 'valence_avg', 'valence_max', 'valence_min', 'arousal_avg', 'arousal_max', 'arousal_min'])
highlight_percent['story'] = ['EL']*len(highlight_percent)
highlight_percent['RECORDING_SESSION_LABEL'] = highlight_percent['RECORDING_SESSION_LABEL'].apply(normalize_ids)
for filename in os.listdir('./results/'):
    f = os.path.join('./results/',filename)
    if os.path.isfile(f) and f != "./results/id10_el.csv":
        df = pd.read_csv(f, usecols=['RECORDING_SESSION_LABEL', 'Unnamed: 0.1', 'percent_highlighted', 'valence_avg', 'valence_max', 'valence_min', 'arousal_avg', 'arousal_max', 'arousal_min'])
        if "schoolmistress" in f:
            df['story'] = ['SM']*len(df)
        else:
            df['story'] = ['EL']*len(df)
        highlight_percent['RECORDING_SESSION_LABEL'] = highlight_percent['RECORDING_SESSION_LABEL'].apply(normalize_ids)
        highlight_percent = pd.concat([highlight_percent, df])


highlight_percent = highlight_percent.drop_duplicates()

In [9]:
combined_df = pd.read_csv("results_combined.csv")
combined_df['RECORDING_SESSION_LABEL'] = combined_df['RECORDING_SESSION_LABEL'].apply(normalize_ids)

In [10]:
merged = pd.merge(combined_df, highlight_percent, how='inner', left_on=['RECORDING_SESSION_LABEL', 'story', 'Unnamed..0.1'], right_on=['RECORDING_SESSION_LABEL', 'story', 'Unnamed: 0.1'], copy=False)

In [11]:
merged.to_csv("combined_with_percent_highlighted.csv")

In [12]:

def fleiss_kappa(M):
    ## Code from: https://towardsdatascience.com/inter-annotator-agreement-2f46c6d37bf3
    """Computes Fleiss' kappa for group of annotators.
    :param M: a matrix of shape (:attr:'N', :attr:'k') with 'N' = number of subjects and 'k' = the number of categories.
        'M[i, j]' represent the number of raters who assigned the 'i'th subject to the 'j'th category.
    :type: numpy matrix
    :rtype: float
    :return: Fleiss' kappa score
    """
    N, k = M.shape  # N is # of items, k is # of categories
    n_annotators = float(np.sum(M[0, :]))  # # of annotators
    tot_annotations = N * n_annotators  # the total # of annotations
    category_sum = np.sum(M, axis=0)  # the sum of each category over all items

    # chance agreement
    p = category_sum / tot_annotations  # the distribution of each category over all annotations
    PbarE = np.sum(p * p)  # average chance agreement over all categories

    # observed agreement
    P = (np.sum(M * M, axis=1) - n_annotators) / (n_annotators * (n_annotators - 1))
    Pbar = np.sum(P) / N  # add all observed agreement chances per item and divide by amount of items

    return round((Pbar - PbarE) / (1 - PbarE), 4)

In [13]:
# prepare highlights for inter annotator score computation
combined_sm = combined_df[combined_df['story'] == 'SM']
combined_el = combined_df[combined_df['story'] == 'EL']

In [36]:
combined_sm['category'] = combined_sm['category'].astype('category')
highlights_sm = combined_sm.groupby(by=['X', 'category']).agg({'category': 'count'})
highlights_sm.shape[0]

combined_el['category'] = combined_el['category'].astype('category')
highlights_el = combined_el.groupby(by=['X', 'category']).agg({'category': 'count'})
highlights_el.shape[0]


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  combined_sm['category'] = combined_sm['category'].astype('category')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  combined_el['category'] = combined_el['category'].astype('category')


708

In [37]:
NUM_CAT = 6
annotator_mat = np.zeros((len(schoolmistress_story_sentences), NUM_CAT), dtype=np.int32)
count=0
for sent in range(len(schoolmistress_story_sentences)):
    for cat in range(NUM_CAT):
        count = 0 if len(highlights_sm[(sent*NUM_CAT)+cat:(sent*NUM_CAT)+cat+1]['category'].to_numpy()) == 0 else highlights_sm[(sent*NUM_CAT)+cat:(sent*NUM_CAT)+cat+1]['category'].to_numpy()[0]
        annotator_mat[sent][cat] = count 

annotator_mat_el = np.zeros((len(el_story_sentences), NUM_CAT), dtype=np.int32)
count=0
for sent in range(len(el_story_sentences)):
    for cat in range(NUM_CAT):
        count = 0 if len(highlights_el[(sent*NUM_CAT)+cat:(sent*NUM_CAT)+cat+1]['category'].to_numpy()) == 0 else highlights_el[(sent*NUM_CAT)+cat:(sent*NUM_CAT)+cat+1]['category'].to_numpy()[0]
        annotator_mat_el[sent][cat] = count


In [30]:
# Inter annotator agreement if each highlight category, including none is a separate category
fleiss_kappa_score = fleiss_kappa(annotator_mat)
fleiss_kappa_score

0.1289

In [38]:
fleiss_kappa_score_el = fleiss_kappa(annotator_mat_el)
fleiss_kappa_score_el

0.0662

In [39]:
# Inter annotator agreement if we consider only two categories: highlighted or not highlighted
combined_sm['binary_label'] = combined_sm['category'].map(lambda x: 0 if x == 0 else 1)
binary_highlights_sm = combined_sm.groupby(by=['X', 'binary_label']).agg({'binary_label': 'count'})

combined_el['binary_label'] = combined_el['category'].map(lambda x: 0 if x == 0 else 1)
binary_highlights_el = combined_el.groupby(by=['X', 'binary_label']).agg({'binary_label': 'count'})

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  combined_sm['binary_label'] = combined_sm['category'].map(lambda x: 0 if x == 0 else 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  combined_el['binary_label'] = combined_el['category'].map(lambda x: 0 if x == 0 else 1)


In [32]:

binary_annotator_mat = np.zeros((len(schoolmistress_story_sentences), 2), dtype=np.int32)
count=0
for sent in range(len(schoolmistress_story_sentences)):
    for cat in range(2):
        count = 0 if len(binary_highlights_sm[(sent*2)+cat:(sent*2)+cat+1]['binary_label'].to_numpy()) == 0 else binary_highlights_sm[(sent*2)+cat:(sent*2)+cat+1]['binary_label'].to_numpy()[0]
        binary_annotator_mat[sent][cat] = count 



In [40]:

binary_annotator_mat_el = np.zeros((len(el_story_sentences), 2), dtype=np.int32)
count=0
for sent in range(len(el_story_sentences)):
    for cat in range(2):
        count = 0 if len(binary_highlights_el[(sent*2)+cat:(sent*2)+cat+1]['binary_label'].to_numpy()) == 0 else binary_highlights_el[(sent*2)+cat:(sent*2)+cat+1]['binary_label'].to_numpy()[0]
        binary_annotator_mat_el[sent][cat] = count 


In [33]:
binary_fleiss = fleiss_kappa(binary_annotator_mat)
binary_fleiss

0.1529

In [41]:
binary_fleiss_el = fleiss_kappa(binary_annotator_mat_el)
binary_fleiss_el

0.1098

In [34]:
# remove none category to see agreement when highlighted
no_none_annotator_mat = np.delete(annotator_mat, 0, 1)

In [42]:
no_none_annotator_mat_el = np.delete(annotator_mat_el, 0, 1)

In [35]:
no_none_fleiss = fleiss_kappa(no_none_annotator_mat)
no_none_fleiss

0.887

In [43]:
no_none_fleiss_el = fleiss_kappa(no_none_annotator_mat_el)
no_none_fleiss_el

0.0622