# Create test sets for each annotation dimension

In [1]:
import pandas as pd

limit = 350 # because Elin only annotated the first 350 sentences

pappa_elin = pd.read_excel('../data/human_annotation/ELINsample_for_check_human.xlsx', sheet_name='main')[:limit]
pappa_lena = pd.read_excel('../data/human_annotation/Lenasample_for_check_human.xlsx')[:limit]
pappa_oscar = pd.read_excel('../data/human_annotation/Oscarsample_for_check_human.xlsx')[:limit]

names = ['elin', 'lena', 'oscar']
dfs = [pappa_elin, pappa_lena, pappa_oscar]

# rename columns dim1, dim2, dim3 to dim1_name, dim2_name, dim3_name
for name, df in zip(names, dfs):
    df.rename(columns={'dim1': f'dim1_{name}', 'dim2': f'dim2_{name}', 'dim3': f'dim3_{name}'}, inplace=True)

# merge dfs on "Unnamed: 0" (i.e. sentID)
df = pd.concat([df.set_index('Unnamed: 0') for df in dfs], axis=1, join='inner').reset_index()

# clean repeated text_clean columns
df = df.loc[:,~df.columns.duplicated()]

# rename Unnamed: 0 to sentID
df.rename(columns={'Unnamed: 0': 'sentID'}, inplace=True)

# read aggregated labels from separated column files
pappa_dim1_agg = pd.read_csv('../data/human_annotation/dim1.prediction', header=None)[:limit]
pappa_dim2_agg = pd.read_csv('../data/human_annotation/dim2.prediction', header=None)[:limit]
pappa_dim3_agg = pd.read_csv('../data/human_annotation/dim3.prediction', header=None)[:limit]

# add aggregated predictions to df as columns
df['dim1_agg'] = pappa_dim1_agg
df['dim2_agg'] = pappa_dim2_agg
df['dim3_agg'] = pappa_dim3_agg

# reorder columns so that dimN columns are next to each other, but text_clean and sentID are first
df = df[['sentID', 'text_clean'] + [col for col in df.columns.sort_values() if col.startswith('dim')]]

# create three separate dfs for each dimension
df_dim1 = df[['sentID', 'text_clean', 'dim1_elin', 'dim1_lena', 'dim1_oscar', 'dim1_agg']]
df_dim2 = df[['sentID', 'text_clean', 'dim2_elin', 'dim2_lena', 'dim2_oscar', 'dim2_agg']]
df_dim3 = df[['sentID', 'text_clean', 'dim3_elin', 'dim3_lena', 'dim3_oscar', 'dim3_agg']]

# rename columns to have only the name of the annotator
df_dim1.rename(columns={'dim1_elin': 'elin', 'dim1_lena': 'lena', 'dim1_oscar': 'oscar', 'dim1_agg': 'agg'}, inplace=True)
df_dim2.rename(columns={'dim2_elin': 'elin', 'dim2_lena': 'lena', 'dim2_oscar': 'oscar', 'dim2_agg': 'agg'}, inplace=True)
df_dim3.rename(columns={'dim3_elin': 'elin', 'dim3_lena': 'lena', 'dim3_oscar': 'oscar', 'dim3_agg': 'agg'}, inplace=True)

# remove rows with NA in all annotators
# df_dim2 = df_dim2[df_dim2[['elin', 'lena', 'oscar', 'agg']].notna().all(axis=1)]
# df_dim3 = df_dim3[df_dim3[['elin', 'lena', 'oscar', 'agg']].notna().all(axis=1)]
# sanity check: raise error if any row in df_dim2/3 contain labels that should not be there
# assert df_dim3[~df_dim3[['elin', 'lena', 'oscar', 'agg']].isin(['IDEAL', 'DESCRIPTIVE']).any(axis=1)].shape[0] == 0
# assert df_dim2[~df_dim2[['elin', 'lena', 'oscar', 'agg']].isin(['EXPLICIT', 'IMPLICIT']).any(axis=1)].shape[0] == 0

# fill NA with NA
df_dim1.fillna('NA', inplace=True)
df_dim2.fillna('NA', inplace=True)
df_dim3.fillna('NA', inplace=True)

# create dfs with reduced and binary labels for dim1
df_dim1_reduced = df_dim1.copy()
df_dim1_binary = df_dim1.copy()
for name in ['elin', 'lena', 'oscar', 'agg']:
    df_dim1_reduced[name] = df_dim1_reduced[name].apply(lambda x: x[:10])
    df_dim1_binary[name] = df_dim1_binary[name].apply(lambda x: x[:3])

# save dfs to csv
# df_dim1_reduced.to_csv('../data/human_annotation/dim1_reduced.csv', index=False, sep=';')
# df_dim1_binary.to_csv('../data/human_annotation/dim1_binary.csv', index=False, sep=';')
# df_dim1.to_csv('../data/human_annotation/dim1.csv', index=False, sep=';')
df_dim2.to_csv('../data/human_annotation/dim2.csv', index=False, sep=';')
df_dim3.to_csv('../data/human_annotation/dim3.csv', index=False, sep=';')




  warn(msg)
  warn(msg)
  warn(msg)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_dim1.rename(columns={'dim1_elin': 'elin', 'dim1_lena': 'lena', 'dim1_oscar': 'oscar', 'dim1_agg': 'agg'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_dim2.rename(columns={'dim2_elin': 'elin', 'dim2_lena': 'lena', 'dim2_oscar': 'oscar', 'dim2_agg': 'agg'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_dim3.rename(columns={'dim3_elin': 'elin', 'dim3_lena': 'lena', 'dim3_oscar': 'oscar', 'dim3

In [25]:
df

Unnamed: 0,sentID,text_clean,dim1_agg,dim1_elin,dim1_lena,dim1_oscar,dim2_agg,dim2_elin,dim2_lena,dim2_oscar,dim3_agg,dim3_elin,dim3_lena,dim3_oscar
0,291392,man kan samtidigt disku- tera om det är lämpli...,PASSIVE,PASSIVE,PASSIVE,PASSIVE,EXPLICIT,EXPLICIT,EXPLICIT,EXPLICIT,IDEAL,IDEAL,IDEAL,DESCRIPTIVE
1,305276,tabell 0 exempel när avgiften baseras på barne...,,PASSIVE,,,,EXPLICIT,,,,DESCRIPTIVE,,
2,328458,att så många av de familjehemsplacerade barnen...,ACTIVE_POS_OTHER,ACTIVE_POS_OTHER,ACTIVE_POS_OTHER,PASSIVE,IMPLICIT,IMPLICIT,EXPLICIT,EXPLICIT,IDEAL,IDEAL,DESCRIPTIVE,DESCRIPTIVE
3,7955,det gäller både att få fäder att vilja ta förä...,ACTIVE_POS_OTHER,ACTIVE_POS_OTHER,ACTIVE_POS_OTHER,ACTIVE_POS_OTHER,EXPLICIT,EXPLICIT,EXPLICIT,EXPLICIT,IDEAL,IDEAL,IDEAL,IDEAL
4,78617,som motivering har utskottet anfört att den nu...,PASSIVE,PASSIVE,PASSIVE,PASSIVE,EXPLICIT,EXPLICIT,EXPLICIT,EXPLICIT,IDEAL,IDEAL,DESCRIPTIVE,DESCRIPTIVE
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
345,57234,mammor blir kriminella därför att de inte ser ...,ACTIVE_NEG,ACTIVE_NEG,ACTIVE_NEG,ACTIVE_NEG,EXPLICIT,EXPLICIT,EXPLICIT,IMPLICIT,DESCRIPTIVE,DESCRIPTIVE,DESCRIPTIVE,DESCRIPTIVE
346,345069,sou 0000:00 totalt utanför 0000 utanför 0000–0...,,,,,,,,,,,,
347,320316,"mammans ålder , utbildningsnivå , inkomst samt...",PASSIVE,,PASSIVE,PASSIVE,,,EXPLICIT,IMPLICIT,,,DESCRIPTIVE,DESCRIPTIVE
348,66065,fler dubbeldagar - positivt för kvinnors hälsa...,ACTIVE_POS_CARING,ACTIVE_POS_OTHER,ACTIVE_POS_CARING,ACTIVE_POS_CARING,EXPLICIT,EXPLICIT,EXPLICIT,IMPLICIT,IDEAL,IDEAL,DESCRIPTIVE,IDEAL


Results from Dirk's evaluation of GPT:

````python
dim1
350
RAW:
               elin      lena     oscar  aggregate       GPT  mean_human
elin       1.000000  0.608571  0.617143   0.748571  0.494286    0.612857
lena       0.608571  1.000000  0.668571   0.817143  0.545714    0.638571
oscar      0.617143  0.668571  1.000000   0.840000  0.528571    0.642857
aggregate  0.748571  0.817143  0.840000   1.000000  0.548571    0.801905
GPT        0.494286  0.545714  0.528571   0.548571  1.000000    0.522857
KAPPA:
               elin      lena     oscar  aggregate       GPT  mean_human
elin       1.000000  0.486754  0.499290   0.672779  0.354546    0.493022
lena       0.486754  1.000000  0.533719   0.750879  0.388609    0.510237
oscar      0.499290  0.533719  1.000000   0.781927  0.364316    0.516505
aggregate  0.672779  0.750879  0.781927   1.000000  0.406302    0.735195
GPT        0.354546  0.388609  0.364316   0.406302  1.000000    0.369157

dim2
225
RAW:
               elin      lena     oscar  aggregate       GPT  mean_human
elin       1.000000  0.613333  0.631111   0.835556  0.591111    0.622222
lena       0.613333  1.000000  0.706667   0.511111  0.760000    0.660000
oscar      0.631111  0.706667  1.000000   0.795556  0.577778    0.668889
aggregate  0.835556  0.511111  0.795556   1.000000  0.462222    0.714074
GPT        0.591111  0.760000  0.577778   0.462222  1.000000    0.642963
KAPPA:
               elin      lena     oscar  aggregate       GPT  mean_human
elin       1.000000  0.014797  0.194175   0.677288  0.110251    0.104486
lena       0.014797  1.000000  0.181818   0.081155  0.145570    0.098308
oscar      0.194175  0.181818  1.000000   0.601156  0.030612    0.187996
aggregate  0.677288  0.081155  0.601156   1.000000  0.036624    0.453200
GPT        0.110251  0.145570  0.030612   0.036624  1.000000    0.095478

dim3
225
RAW:
               elin      lena     oscar  aggregate       GPT  mean_human
elin       1.000000  0.746667  0.857778   1.000000  0.746667    0.802222
lena       0.746667  1.000000  0.808889   0.746667  0.702222    0.777778
oscar      0.857778  0.808889  1.000000   0.857778  0.737778    0.833333
aggregate  1.000000  0.746667  0.857778   1.000000  0.746667    0.868148
GPT        0.746667  0.702222  0.737778   0.746667  1.000000    0.728889
KAPPA:
               elin      lena     oscar  aggregate       GPT  mean_human
elin       1.000000  0.312590  0.629630   1.000000  0.468812    0.471110
lena       0.312590  1.000000  0.354354   0.312590  0.303245    0.333472
oscar      0.629630  0.354354  1.000000   0.629630  0.405242    0.491992
aggregate  1.000000  0.312590  0.629630   1.000000  0.468812    0.647407
GPT        0.468812  0.303245  0.405242   0.468812  1.000000    0.392433


````