<a href="https://colab.research.google.com/github/siddadel/kalidas/blob/main/Gold.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>


**Prepare annotation evaluation set**
* Read CSV
* Select subset of sentences randomly for annotation: n=50-60
* Create df with columns: sentences, relation, inferences, likert cat 1-4
  * Drop all columns besides: Unnamed: 0, Unnamed: 0.1, Unnamed: 0.1.1, film, item, MotivatedByGoal, xAttr, xReact
  * Pivot relation columns into rows
  * Turn each relation list into own row: Split elements of list into each row
  * Create inference sentence for evaluation in COMET paper 
  * Clean 
  * Write CSV

**Prepare annotated output: Evaluate accuaracy & CI**
 * To come

In [1]:
from google.colab import files
from google.colab import drive
drive.mount("/content/drive", force_remount=True)

Mounted at /content/drive


In [168]:
import pandas as pd
import random
import os
import numpy as np
import re
import numpy
import nltk
from scipy import stats
from sklearn.metrics import accuracy_score
import spacy
from collections import Counter
from joblib import Parallel, delayed

## Annotation set preparation

In [3]:
def get_inference_sentence(row):
  xattr_sent = 'X is seen as '
  xintent_sent = '...because PersonX wanted '
  xmotiv_sent = 'is a step towards accomplishing the goal '
  xreact_sent = 'As a result, PersonX feels '

  if row['relation'] == 'xAttr':
    row[0] = xattr_sent + str(row[0])
    row[1] = xattr_sent + str(row[1])
    row[2] = xattr_sent + str(row[2])
    row[3] = xattr_sent + str(row[3])
    row[4] = xattr_sent + str(row[4])
  elif row['relation'] == 'MotivatedByGoal':
    row[0] = xintent_sent + str(row[0])
    row[1] = xintent_sent + str(row[1])
    row[2] = xintent_sent + str(row[2])
    row[3] = xintent_sent + str(row[3])
    row[4] = xintent_sent + str(row[4])
  else: # xReact
    row[0] = xreact_sent + str(row[0])
    row[1] = xreact_sent + str(row[1])
    row[2] = xreact_sent + str(row[2])
    row[3] = xreact_sent + str(row[3])
    row[4] = xreact_sent + str(row[4])

  return row


  

In [4]:
def create_annotation_set(n: int, relation_col: list, id_col: list, df_in: pd.DataFrame):
  df = df_in.copy()
  # drop irrelevant columns
  df = df.drop(columns=[x for x in df.columns.tolist() if x not in (id_col + relation_col)])                                 

  # select n annotations
  df = df.sample(frac=1, random_state=17)[0:n]#.reset_index(drop=True)

  # pivot columns: stack inference relations
  df = df.melt(id_vars=id_col, var_name="relation", value_name="inference")

  # turn list of inferences into seperate columns with inferences
  df_inf = pd.DataFrame({})

  for row in range(df.shape[0]):
    curr_inf_list = df.inference[row].lower().strip('][').strip('\'').split('\', \'' )
    curr_inf_df = pd.DataFrame(data=[curr_inf_list])
    df_inf = pd.concat([df_inf, curr_inf_df]).reset_index(drop=True)

  df2 = pd.concat([df, df_inf], axis=1)

  # make individual inference words/snippets full sentences
  df2 = df2.apply(lambda x: get_inference_sentence(x), axis=1)

  # pivot columns: stack inferences
  df2 = df2.melt(id_vars=id_col + ["relation"] + ["inference"], var_name="inference_list_id", value_name="inference_sentence")

  # select random inference sentence for sentence x relation x inference sentence group (1 from 5)
  fn = lambda obj: obj.loc[np.random.choice(obj.index, size=1, replace=False),:]
  df2 = df2.groupby(id_col + ['relation'], as_index=False).apply(fn).reset_index(drop=True)

  # add_likert_scale
  df2["likert1"] = ''
  df2["likert2"] = ''
  df2["likert3"] = ''
  df2["likert4"] = ''

  return df2


# Pipeline run


### Create annotation set

In [5]:
# Read CSV
df = pd.read_csv('/content/drive/MyDrive/Berkeley/INFO256/annotation_survey/concatenated (10).csv')
#df

In [6]:
#id_col = ['Unnamed: 0', 'Unnamed: 0.1', 'Unnamed: 0.1.1', 'film', 'item']
id_col = ['Unnamed: 0.1', 'film', 'item']
rel_col = ['MotivatedByGoal', 'xAttr', 'xReact']

annotation_set = create_annotation_set(100, rel_col, id_col, df)
#annotation_set

In [42]:
# write csv
annotation_set.to_csv('/content/drive/MyDrive/Berkeley/INFO256/annotation_survey/comet_annotation_v01.csv')

## Evaluate annotations

In [167]:
df_ann1 = pd.read_csv('/content/drive/MyDrive/Berkeley/INFO256/data/Annotation 1.csv')
df_ann2 = pd.read_csv('/content/drive/MyDrive/Berkeley/INFO256/data/ Annotation 2.csv')

df_ann1 = df_ann1.drop(columns='Zeitstempel')
df_ann2 = df_ann2.drop(columns='Zeitstempel')

df_ann = pd.concat([df_ann1, df_ann2], axis=1)
df_ann.shape

#df_ann

(3, 180)

In [169]:
def get_relation(s):
  if 'As a result, PersonX' in s:
    relation = 'xReact'
  elif 'X is seen as' in s:
    relation = 'xAttr'
  else:
    relation = 'MotivatedByGoal'

  return relation


def get_valid_vote(row):
  mode = stats.mode([row[0], row[1], row[2]])
  return mode[0][0]


def preprocess_annotation(df):
  df = df.copy()
  df = df.T.reset_index()

  # Replace labels
  df = df.replace('Always/often', True)
  df = df.replace('Sometimes/likely', True)
  df = df.replace('Farfetched/never', False)
  df = df.replace('Invalid', False)

  # Get relation per inference
  df['relation'] = df['index'].apply(lambda x: get_relation(x))

  # Get most common vote per inference
  df['mode'] = df.apply(lambda x: get_valid_vote(x), axis=1)
  df = df.drop(columns=[0,1,2]).sort_values('relation')

  # Group by relation
  df_group = df.groupby(['relation','mode']).count().reset_index()

  return df_group





In [147]:
def calculate_metrics(arr):
  score = accuracy_score(test[:,-1], predictions)
	print(score)

  # confidence intervals
  alpha = 0.95
  p = ((1.0-alpha)/2.0) * 100
  lower = max(0.0, numpy.percentile(stats, p))
  p = (alpha+((1.0-alpha)/2.0)) * 100
  upper = min(1.0, numpy.percentile(stats, p))
  print('%.1f confidence interval %.1f%% and %.1f%%' % (alpha*100, lower*100, upper*100))

IndentationError: ignored

In [170]:
df_summ = preprocess_annotation(df_ann)
print(df_summ)

#print(df_ann1.iloc[0,3])

print(" ------------- ")
print("MotivatedByGoal accuracy", df_summ.loc[1, 'index'] / (df_summ.loc[0, 'index'] + df_summ.loc[1, 'index']))
print("xAttr accuracy", df_summ.loc[3, 'index'] / (df_summ.loc[2, 'index'] + df_summ.loc[3, 'index']))
print("xReact accuracy", df_summ.loc[5, 'index'] / (df_summ.loc[4, 'index'] + df_summ.loc[5, 'index']))

          relation  mode  index
0  MotivatedByGoal     0     42
1  MotivatedByGoal  True     18
2            xAttr     0      7
3            xAttr  True     53
4           xReact     0     13
5           xReact  True     47
 ------------- 
MotivatedByGoal accuracy 0.3
xAttr accuracy 0.8833333333333333
xReact accuracy 0.7833333333333333
