### This file computes the correlation metrics of the appropriateness corpus and the dagstuhl corpus (table 2 in the paper)

In [None]:
import pandas as pd
from quica.quica import Quica
import numpy as np
import krippendorff
import matplotlib.pyplot as plt
from scipy import stats
pd.set_option('display.max_colwidth', None)

In [None]:
data_dir = '../../data/'

In [None]:
df_dagstuhl = pd.read_csv(data_dir+'dagstuhl-15512-argquality-corpus-v2/dagstuhl-15512-argquality-corpus-v2-mean.csv')
df_sources = pd.read_csv(data_dir+'appropriateness-corpus/annotation_dataset_types_sourceids.csv')
df_appropriateness = pd.read_csv(data_dir+'appropriateness-corpus/appropriateness_corpus_mean.csv')

In [None]:
df_appropriateness['source_id'] = df_sources['id'].tolist()

In [None]:
df_appropriateness = df_appropriateness[df_appropriateness['source_id'].isin(df_dagstuhl['#id'].tolist())]

In [None]:
df_dagstuhl.sort_values('#id', inplace=True)

In [None]:
df_appropriateness.sort_values('source_id', inplace=True)

In [None]:
rel_dims_dagstuhl = list(set(df_dagstuhl.columns)-set(['argument', '#id', 'issue']))
rel_dims_appropriateness = ['Inappropriateness', 'Toxic Emotions', 'Excessive Intensity', 'Emotional Deception', 'Missing Commitment', 'Missing Seriousness', 'Missing Openness', 'Missing Intelligibility', 'Unclear Meaning', 'Missing Relevance', 'Confusing Reasoning', 'Other Reasons', 'Detrimental Orthography', 'Reason Unclassified']

In [None]:
rel_dims_dagstuhl = [
    'overall quality',
    'effectiveness',
    'appropriateness',
    'emotional appeal',
    'clarity',
    'credibility',
    'arrangement',
]

In [None]:
corr_dict = {
    'rel_dims_dagstuhl': rel_dims_dagstuhl, 
    'Inappropriateness': [],
    'Toxic Emotions': [],
    'Excessive Intensity': [],
    'Emotional Deception': [],
    'Missing Commitment': [],
    'Missing Seriousness': [],
    'Missing Openness': [],
    'Missing Intelligibility': [],
    'Unclear Meaning': [],
    'Missing Relevance': [],
    'Confusing Reasoning': [],
    'Other Reasons': [],
    'Detrimental Orthography': [],
    'Reason Unclassified': [],
}

for rel_dim_dagstuhl in rel_dims_dagstuhl:
    for rel_dim_appropriateness in rel_dims_appropriateness:
        x1 = [float(x) for x in df_dagstuhl[rel_dim_dagstuhl].tolist()]
        x2 = [float(x) for x in df_appropriateness[rel_dim_appropriateness].tolist()]
        tau, p_value = stats.kendalltau(x1, x2)
        corr_dict[rel_dim_appropriateness].append(np.round(tau,2))

In [None]:
df_corr = pd.DataFrame(corr_dict)

In [None]:
df_corr

In [None]:
df_corr.T