In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import pandas as pd
from tqdm import tqdm
import razdel
import fasttext
import os
import json
import matplotlib.pyplot as plt
import seaborn as sn
tqdm.pandas()

## Evaluation

- [baseline](#Baseline)
- [rstbert](#RSTBert)

In [None]:
from sklearn.metrics import f1_score
import numpy as np

def fine_grained_f1(true, pred):
    labels = [0, 1, 2]
    return f1_score(true, pred, average='macro', labels=labels)

def average_f1(f1s):
    return np.average(f1s)

### Baseline

#### Overall baseline performance

In [None]:
f1_stances = []
f1_arguments = []

f1_masks_stances = []
f1_masks_arguments = []
f1_vaccines_stances = []
f1_vaccines_arguments = []
f1_quarantine_stances = []
f1_quarantine_arguments = []

cm_stance = []
cm_argument = []


for fold in range(5):
    pathname = f'data/fold_{fold}'
    
    with open(f'data/fold_{fold}/test_tokens.json', 'r') as file:
        test = pd.read_json(file.read(), lines=True)
    
    ## Masks
    with open(f'convbert_masks/fold_{fold}/predictions_test.json', 'r') as file:
        pred = pd.read_json(file.read(), lines=True)
        pred_masks_stance = pred.label1
        pred_masks_argument = pred.label2

    f1_masks_stance = fine_grained_f1(test.masks_stance, pred_masks_stance)
    f1_masks_argument = fine_grained_f1(test.masks_argument, pred_masks_argument)
    print(f'Masks stance: {(f1_masks_stance*100).round(2)}, argument: {(f1_masks_argument*100).round(2)}')
    f1_masks_stances.append(f1_masks_stance)
    f1_masks_arguments.append(f1_masks_argument)
    
    current_cm_stance = pd.crosstab(pd.Series(pred_masks_stance), test.masks_stance, rownames=['Predicted'], colnames=['True'], margins=True)
    current_cm_arg = pd.crosstab(pd.Series(pred_masks_argument), test.masks_argument, rownames=['Predicted'], colnames=['True'], margins=True)
    
    ## Vaccines
    with open(f'convbert_vaccines/fold_{fold}/predictions_test.json', 'r') as file:
        pred = pd.read_json(file.read(), lines=True)
        pred_vac_stance = pred.label1
        pred_vac_argument = pred.label2
        
    f1_vac_stance = fine_grained_f1(test.vaccines_stance, pred_vac_stance)
    f1_vac_argument = fine_grained_f1(test.vaccines_argument, pred_vac_argument)
    print(f'Vaccines stance: {(f1_vac_stance*100).round(2)}, argument: {(f1_vac_argument*100).round(2)}')
    f1_vaccines_stances.append(f1_vac_stance)
    f1_vaccines_arguments.append(f1_vac_argument)
        
    current_cm_stance += pd.crosstab(pd.Series(pred_vac_stance), test.vaccines_stance, rownames=['Predicted'], colnames=['True'], margins=True)
    current_cm_arg += pd.crosstab(pd.Series(pred_vac_argument), test.vaccines_argument, rownames=['Predicted'], colnames=['True'], margins=True)
    
    ## Quarantine
    with open(f'convbert_quarantine/fold_{fold}/predictions_test.json', 'r') as file:
        pred = pd.read_json(file.read(), lines=True)
        pred_quarantine_stance = pred.label1
        pred_quarantine_argument = pred.label2
        
    f1_quarantine_stance = fine_grained_f1(test.quarantine_stance, pred_quarantine_stance)
    f1_quarantine_argument = fine_grained_f1(test.quarantine_argument, pred_quarantine_argument)
    print(f'Quarantine stance: {(f1_quarantine_stance*100).round(2)}, argument: {(f1_quarantine_argument*100).round(2)}')
    f1_quarantine_stances.append(f1_quarantine_stance)
    f1_quarantine_arguments.append(f1_quarantine_argument)

    current_cm_stance += pd.crosstab(pd.Series(pred_quarantine_stance), test.quarantine_stance, rownames=['Predicted'], colnames=['True'], margins=True)
    current_cm_arg += pd.crosstab(pd.Series(pred_quarantine_argument), test.quarantine_argument, rownames=['Predicted'], colnames=['True'], margins=True)

    ## Overall
    f1_stance = average_f1([f1_masks_stance, f1_vac_stance, f1_quarantine_stance])
    f1_arg = average_f1([f1_masks_argument, f1_vac_argument, f1_quarantine_argument])
        
    current_cm_stance = current_cm_stance.iloc[:4,:4]
    current_cm_arg = current_cm_arg.iloc[:4,:4]
    cm_stance.append(current_cm_stance / current_cm_stance.sum(axis=1))
    cm_argument.append(current_cm_arg / current_cm_arg.sum(axis=1))
    
    print(f'Fold {fold} ------------------')
    print('F1 stance\t:::', (f1_stance * 100).round(2))
    print('F1 argument\t:::', (f1_arg * 100).round(2))
    
    f1_stances.append(f1_stance)
    f1_arguments.append(f1_arg)

In [None]:
print(f'stance \t\t{(np.mean(f1_stances) * 100).round(1)} ± {(np.std(f1_stances) * 100).round(1)}')
print(f'argument\t{(np.mean(f1_arguments) * 100).round(1)} ± {(np.std(f1_arguments) * 100).round(1)}')

print(f'mask stance \t{(np.mean(f1_masks_stances) * 100).round(1)} ± {(np.std(f1_masks_stances) * 100).round(1)}')
print(f'mask argument\t{(np.mean(f1_masks_arguments) * 100).round(1)} ± {(np.std(f1_masks_arguments) * 100).round(1)}')

print(f'vaccine stance \t\t{(np.mean(f1_vaccines_stances) * 100).round(1)} ± {(np.std(f1_vaccines_stances) * 100).round(1)}')
print(f'vaccine argument\t{(np.mean(f1_vaccines_arguments) * 100).round(1)} ± {(np.std(f1_vaccines_arguments) * 100).round(1)}')

print(f'quarantine stance\t{(np.mean(f1_quarantine_stances) * 100).round(1)} ± {(np.std(f1_quarantine_stances) * 100).round(1)}')
print(f'quarantine argument\t{(np.mean(f1_quarantine_arguments) * 100).round(1)} ± {(np.std(f1_quarantine_arguments) * 100).round(1)}')

In [None]:
cm_concat = pd.concat(cm_stance)
(cm_concat.groupby(cm_concat.index).mean() * 100.).T.round(2)

#### EDU only baseline performance

In [None]:
test = pd.read_pickle('data/fold_0/test.pkl')
is_edu = test.annot.map(lambda row: row['rst'][0].relation == 'elementary')
is_edu.value_counts()

In [None]:
f1_stances = []
f1_arguments = []

f1_masks_stances = []
f1_masks_arguments = []
f1_vaccines_stances = []
f1_vaccines_arguments = []
f1_quarantine_stances = []
f1_quarantine_arguments = []


for fold in range(5):
    pathname = f'data/fold_{fold}'
    
    test = pd.read_pickle(f'data/fold_{fold}/test.pkl').reset_index(drop=True)
    idx_edu = test.annot.map(lambda row: len(row['rst']) == 1 and row['rst'][0].relation == 'elementary')
    test = test[idx_edu]

    with open(f'convbert_masks/fold_{fold}/predictions_test.json', 'r') as file:
        pred = pd.read_json(file.read(), lines=True)[idx_edu]

    pred_masks_stance = pred.label1
    pred_masks_argument = pred.label2

    f1_masks_stance = fine_grained_f1(test.masks_stance, pred_masks_stance)
    f1_masks_argument = fine_grained_f1(test.masks_argument, pred_masks_argument)
    print(f'Masks stance: {(f1_masks_stance*100).round(2)}, argument: {(f1_masks_argument*100).round(2)}')
    f1_masks_stances.append(f1_masks_stance)
    f1_masks_arguments.append(f1_masks_argument)
    
    with open(f'convbert_vaccines/fold_{fold}/predictions_test.json', 'r') as file:
        pred = pd.read_json(file.read(), lines=True)[idx_edu]
        pred_vac_stance = pred.label1
        pred_vac_argument = pred.label2
        
    f1_vac_stance = fine_grained_f1(test.vaccines_stance, pred_vac_stance)
    f1_vac_argument = fine_grained_f1(test.vaccines_argument, pred_vac_argument)
    print(f'Vaccines stance: {(f1_vac_stance*100).round(2)}, argument: {(f1_vac_argument*100).round(2)}')
    f1_vaccines_stances.append(f1_vac_stance)
    f1_vaccines_arguments.append(f1_vac_argument)
    
    with open(f'convbert_quarantine/fold_{fold}/predictions_test.json', 'r') as file:
        pred = pd.read_json(file.read(), lines=True)[idx_edu]
        pred_quarantine_stance = pred.label1
        pred_quarantine_argument = pred.label2
        
    f1_quarantine_stance = fine_grained_f1(test.quarantine_stance, pred_quarantine_stance)
    f1_quarantine_argument = fine_grained_f1(test.quarantine_argument, pred_quarantine_argument)
    print(f'Quarantine stance: {(f1_quarantine_stance*100).round(2)}, argument: {(f1_quarantine_argument*100).round(2)}')
    f1_quarantine_stances.append(f1_quarantine_stance)
    f1_quarantine_arguments.append(f1_quarantine_argument)

    f1_stance = average_f1([f1_masks_stance, f1_vac_stance, f1_quarantine_stance])
    f1_arg = average_f1([f1_masks_argument, f1_vac_argument, f1_quarantine_argument])
    
    print(f'Fold {fold} ------------------')
    print('F1 stance\t:::', (f1_stance * 100).round(2))
    print('F1 argument\t:::', (f1_arg * 100).round(2))
    
    f1_stances.append(f1_stance)
    f1_arguments.append(f1_arg)

In [None]:
print(f'stance \t\t{(np.mean(f1_stances) * 100).round(1)} ± {(np.std(f1_stances) * 100).round(1)}')
print(f'argument\t{(np.mean(f1_arguments) * 100).round(1)} ± {(np.std(f1_arguments) * 100).round(1)}')

print(f'mask stance \t{(np.mean(f1_masks_stances) * 100).round(1)} ± {(np.std(f1_masks_stances) * 100).round(1)}')
print(f'mask argument\t{(np.mean(f1_masks_arguments) * 100).round(1)} ± {(np.std(f1_masks_arguments) * 100).round(1)}')

print(f'vaccine stance \t\t{(np.mean(f1_vaccines_stances) * 100).round(1)} ± {(np.std(f1_vaccines_stances) * 100).round(1)}')
print(f'vaccine argument\t{(np.mean(f1_vaccines_arguments) * 100).round(1)} ± {(np.std(f1_vaccines_arguments) * 100).round(1)}')

print(f'quarantine stance\t{(np.mean(f1_quarantine_stances) * 100).round(1)} ± {(np.std(f1_quarantine_stances) * 100).round(1)}')
print(f'quarantine argument\t{(np.mean(f1_quarantine_arguments) * 100).round(1)} ± {(np.std(f1_quarantine_arguments) * 100).round(1)}')

#### NOT-EDU only performance 

In [None]:
f1_stances = []
f1_arguments = []

f1_masks_stances = []
f1_masks_arguments = []
f1_vaccines_stances = []
f1_vaccines_arguments = []
f1_quarantine_stances = []
f1_quarantine_arguments = []

cm_stance = []
cm_argument = []

for fold in range(5):
    pathname = f'data/fold_{fold}'
    
    test = pd.read_pickle(f'data/fold_{fold}/test.pkl').reset_index(drop=True)
    idx_edu = test.annot.map(lambda row: len(row['rst']) == 1 and row['rst'][0].relation != 'elementary')
    test = test[idx_edu].reset_index(drop=True)

    ## Masks
    with open(f'convbert_masks/fold_{fold}/predictions_test.json', 'r') as file:
        pred = pd.read_json(file.read(), lines=True)[idx_edu]
        pred_masks_stance = pred.label1.reset_index(drop=True)
        pred_masks_argument = pred.label2.reset_index(drop=True)
        masks_stance_base = pred_masks_stance
        masks_argument_base = pred_masks_argument

    f1_masks_stance = fine_grained_f1(test.masks_stance, pred_masks_stance)
    f1_masks_argument = fine_grained_f1(test.masks_argument, pred_masks_argument)
    print(f'Masks stance: {(f1_masks_stance*100).round(2)}, argument: {(f1_masks_argument*100).round(2)}')
    f1_masks_stances.append(f1_masks_stance)
    f1_masks_arguments.append(f1_masks_argument)
    
    current_cm_stance = pd.crosstab(pd.Series(pred_masks_stance), test.masks_stance, rownames=['Prediction'], colnames=['Ground truth'], margins=True)
    current_cm_arg = pd.crosstab(pd.Series(pred_masks_argument), test.masks_argument, rownames=['Prediction'], colnames=['Ground truth'], margins=True)
    
    ## Vaccines
    with open(f'convbert_vaccines/fold_{fold}/predictions_test.json', 'r') as file:
        pred = pd.read_json(file.read(), lines=True)[idx_edu]
        pred_vac_stance = pred.label1.reset_index(drop=True)
        pred_vac_argument = pred.label2.reset_index(drop=True)
        
    f1_vac_stance = fine_grained_f1(test.vaccines_stance, pred_vac_stance)
    f1_vac_argument = fine_grained_f1(test.vaccines_argument, pred_vac_argument)
    print(f'Vaccines stance: {(f1_vac_stance*100).round(2)}, argument: {(f1_vac_argument*100).round(2)}')
    f1_vaccines_stances.append(f1_vac_stance)
    f1_vaccines_arguments.append(f1_vac_argument)
    
    current_cm_stance += pd.crosstab(pd.Series(pred_vac_stance), test.vaccines_stance, rownames=['Prediction'], colnames=['Ground truth'], margins=True)
    current_cm_arg += pd.crosstab(pd.Series(pred_vac_argument), test.vaccines_argument, rownames=['Prediction'], colnames=['Ground truth'], margins=True)
    
    ## Quarantine
    with open(f'convbert_quarantine/fold_{fold}/predictions_test.json', 'r') as file:
        pred = pd.read_json(file.read(), lines=True)[idx_edu]
        pred_quarantine_stance = pred.label1.reset_index(drop=True)
        pred_quarantine_argument = pred.label2.reset_index(drop=True)
        
    f1_quarantine_stance = fine_grained_f1(test.quarantine_stance, pred_quarantine_stance)
    f1_quarantine_argument = fine_grained_f1(test.quarantine_argument, pred_quarantine_argument)
    print(f'Quarantine stance: {(f1_quarantine_stance*100).round(2)}, argument: {(f1_quarantine_argument*100).round(2)}')
    f1_quarantine_stances.append(f1_quarantine_stance)
    f1_quarantine_arguments.append(f1_quarantine_argument)
    
    current_cm_stance += pd.crosstab(pd.Series(pred_quarantine_stance), test.quarantine_stance, rownames=['Prediction'], colnames=['Ground truth'], margins=True)
    current_cm_arg += pd.crosstab(pd.Series(pred_quarantine_argument), test.quarantine_argument, rownames=['Prediction'], colnames=['Ground truth'], margins=True)

    ## Overall
    f1_stance = average_f1([f1_masks_stance, f1_vac_stance, f1_quarantine_stance])
    f1_arg = average_f1([f1_masks_argument, f1_vac_argument, f1_quarantine_argument])
    
    current_cm_stance = current_cm_stance.iloc[1:4,1:4]
    current_cm_arg = current_cm_arg.iloc[1:4,1:4]
    cm_stance.append(current_cm_stance / current_cm_stance.sum(axis=1))
    cm_argument.append(current_cm_arg / current_cm_arg.sum(axis=1))
    
    print(f'Fold {fold} ------------------')
    print('F1 stance\t:::', (f1_stance * 100).round(2))
    print('F1 argument\t:::', (f1_arg * 100).round(2))
    
    f1_stances.append(f1_stance)
    f1_arguments.append(f1_arg)

In [None]:
print(f'stance \t\t{(np.mean(f1_stances) * 100).round(1)} ± {(np.std(f1_stances) * 100).round(1)}')
print(f'argument\t{(np.mean(f1_arguments) * 100).round(1)} ± {(np.std(f1_arguments) * 100).round(1)}')

print(f'mask stance \t{(np.mean(f1_masks_stances) * 100).round(1)} ± {(np.std(f1_masks_stances) * 100).round(1)}')
print(f'mask argument\t{(np.mean(f1_masks_arguments) * 100).round(1)} ± {(np.std(f1_masks_arguments) * 100).round(1)}')

print(f'vaccine stance \t\t{(np.mean(f1_vaccines_stances) * 100).round(1)} ± {(np.std(f1_vaccines_stances) * 100).round(1)}')
print(f'vaccine argument\t{(np.mean(f1_vaccines_arguments) * 100).round(1)} ± {(np.std(f1_vaccines_arguments) * 100).round(1)}')

print(f'quarantine stance\t{(np.mean(f1_quarantine_stances) * 100).round(1)} ± {(np.std(f1_quarantine_stances) * 100).round(1)}')
print(f'quarantine argument\t{(np.mean(f1_quarantine_arguments) * 100).round(1)} ± {(np.std(f1_quarantine_arguments) * 100).round(1)}')

In [None]:
cm_concat = pd.concat(cm_stance)
cm = (cm_concat.groupby(cm_concat.index).mean() * 100.).T.round(2)
cm = cm.rename({0: 'against',
           1: 'other',
           2: 'for'}, axis=0
         ).rename({0: 'against',
           1: 'other',
           2: 'for'}, axis=1)
f = plt.figure(figsize = (4,3.3))
sn.set(font_scale=1.2)
sn.heatmap(cm, annot=True, fmt='4.1f', cmap='Greys')
f.savefig("confusion_baseline.pdf", bbox_inches='tight')

### RSTBert

#### NOT-EDU only performance 

In [None]:
f1_stances = []
f1_arguments = []

f1_masks_stances = []
f1_masks_arguments = []
f1_vaccines_stances = []
f1_vaccines_arguments = []
f1_quarantine_stances = []
f1_quarantine_arguments = []

cm_stance = []
cm_argument = []


for fold in range(5):
    pathname = f'data/fold_{fold}'
    
    test = pd.read_pickle(f'data/fold_{fold}/test.pkl').reset_index(drop=True)
    idx_edu = test.annot.map(lambda row: len(row['rst']) == 1 and row['rst'][0].relation != 'elementary')
    test = test[idx_edu].reset_index(drop=True)

    with open(f'rstbert_masks/fold_{fold}/predictions_test.json', 'r') as file:
        pred = pd.read_json(file.read(), lines=True)#[idx_edu]
        pred_masks_stance = pred.label1.reset_index(drop=True)
        pred_masks_argument = pred.label2.reset_index(drop=True)

    f1_masks_stance = fine_grained_f1(test.masks_stance, pred_masks_stance)
    f1_masks_argument = fine_grained_f1(test.masks_argument, pred_masks_argument)
    print(f'Masks stance: {(f1_masks_stance*100).round(2)}, argument: {(f1_masks_argument*100).round(2)}')
    f1_masks_stances.append(f1_masks_stance)
    f1_masks_arguments.append(f1_masks_argument)
    
    current_cm_stance = pd.crosstab(pred_masks_stance, test.masks_stance, rownames=['Prediction'], colnames=['Ground truth'], margins=True)
    current_cm_arg = pd.crosstab(pred_masks_argument, test.masks_argument, rownames=['Prediction'], colnames=['Ground truth'], margins=True)
    
    ## Vaccines
    with open(f'rstbert_vaccines/fold_{fold}/predictions_test.json', 'r') as file:
        pred = pd.read_json(file.read(), lines=True)#[idx_edu]
        pred_vac_stance = pred.label1.reset_index(drop=True)
        pred_vac_argument = pred.label2.reset_index(drop=True)
        
    f1_vac_stance = fine_grained_f1(test.vaccines_stance, pred_vac_stance)
    f1_vac_argument = fine_grained_f1(test.vaccines_argument, pred_vac_argument)
    print(f'Vaccines stance: {(f1_vac_stance*100).round(2)}, argument: {(f1_vac_argument*100).round(2)}')
    f1_vaccines_stances.append(f1_vac_stance)
    f1_vaccines_arguments.append(f1_vac_argument)
    
    current_cm_stance += pd.crosstab(pred_vac_stance, test.vaccines_stance, rownames=['Prediction'], colnames=['Ground truth'], margins=True)
    current_cm_arg += pd.crosstab(pred_vac_argument, test.vaccines_argument, rownames=['Prediction'], colnames=['Ground truth'], margins=True)
    
    ## Quarantine
    with open(f'rstbert_quarantine/fold_{fold}/predictions_test.json', 'r') as file:
        pred = pd.read_json(file.read(), lines=True)#[idx_edu]
        pred_quarantine_stance = pred.label1.reset_index(drop=True)
        pred_quarantine_argument = pred.label2.reset_index(drop=True)
        
    f1_quarantine_stance = fine_grained_f1(test.quarantine_stance, pred_quarantine_stance)
    f1_quarantine_argument = fine_grained_f1(test.quarantine_argument, pred_quarantine_argument)
    print(f'Quarantine stance: {(f1_quarantine_stance*100).round(2)}, argument: {(f1_quarantine_argument*100).round(2)}')
    f1_quarantine_stances.append(f1_quarantine_stance)
    f1_quarantine_arguments.append(f1_quarantine_argument)
    
    current_cm_stance += pd.crosstab(pred_quarantine_stance, test.quarantine_stance, rownames=['Prediction'], colnames=['Ground truth'], margins=True)
    current_cm_arg += pd.crosstab(pred_quarantine_argument, test.quarantine_argument, rownames=['Prediction'], colnames=['Ground truth'], margins=True)

    ## Overall
    f1_stance = average_f1([f1_masks_stance, f1_vac_stance, f1_quarantine_stance])
    f1_arg = average_f1([f1_masks_argument, f1_vac_argument, f1_quarantine_argument])
    
    current_cm_stance = current_cm_stance.iloc[1:4,1:4]
    current_cm_arg = current_cm_arg.iloc[1:4,1:4]
    cm_stance.append(current_cm_stance / current_cm_stance.sum(axis=1))
    cm_argument.append(current_cm_arg / current_cm_arg.sum(axis=1))
    
    print(f'Fold {fold} ------------------')
    print('F1 stance\t:::', (f1_stance * 100).round(2))
    print('F1 argument\t:::', (f1_arg * 100).round(2))
    
    f1_stances.append(f1_stance)
    f1_arguments.append(f1_arg)

In [None]:
print(f'stance \t\t{(np.mean(f1_stances) * 100).round(1)} ± {(np.std(f1_stances) * 100).round(1)}')
print(f'argument\t{(np.mean(f1_arguments) * 100).round(1)} ± {(np.std(f1_arguments) * 100).round(1)}')

print(f'mask stance \t{(np.mean(f1_masks_stances) * 100).round(1)} ± {(np.std(f1_masks_stances) * 100).round(1)}')
print(f'mask argument\t{(np.mean(f1_masks_arguments) * 100).round(1)} ± {(np.std(f1_masks_arguments) * 100).round(1)}')

print(f'vaccine stance \t\t{(np.mean(f1_vaccines_stances) * 100).round(1)} ± {(np.std(f1_vaccines_stances) * 100).round(1)}')
print(f'vaccine argument\t{(np.mean(f1_vaccines_arguments) * 100).round(1)} ± {(np.std(f1_vaccines_arguments) * 100).round(1)}')

print(f'quarantine stance\t{(np.mean(f1_quarantine_stances) * 100).round(1)} ± {(np.std(f1_quarantine_stances) * 100).round(1)}')
print(f'quarantine argument\t{(np.mean(f1_quarantine_arguments) * 100).round(1)} ± {(np.std(f1_quarantine_arguments) * 100).round(1)}')

In [None]:
cm_concat = pd.concat(cm_stance)
cm = (cm_concat.groupby(cm_concat.index).mean() * 100.).T.round(2)
cm = cm.rename({0: 'against',
           1: 'other',
           2: 'for'}, axis=0
         ).rename({0: 'against',
           1: 'other',
           2: 'for'}, axis=1)
f = plt.figure(figsize = (4,3.3))
sn.set(font_scale=1.2)
sn.heatmap(cm, annot=True, fmt='4.1f', cmap='Greys')
f.savefig("confusion_rstlstm.pdf", bbox_inches='tight')

### Conditional classifier 

In [None]:
class ConditionalClassifier:
    def __init__(self, foldnum, mode='test', theme='masks'):
        self._foldnum = foldnum
        self._mode = mode
        self._theme = theme
        
        self.true = pd.read_pickle(f'data/fold_{self._foldnum}/{self._mode}.pkl').reset_index(drop=True)
        self.idx_edu = self.true.annot.map(lambda row: len(row['rst']) == 1 and row['rst'][0].relation != 'elementary')

        self._init_base()
        self._init_rst()
        
        self._combine_predictions()
    
    def _init_base(self):
        with open(f'convbert_{self._theme}/fold_{self._foldnum}/predictions_test.json', 'r') as file:
            pred = pd.read_json(file.read(), lines=True)
            
        self._pred_base = {
            'stance': pred.label1,
            'argument': pred.label2
        }
        
        self.f1_base = {
            'stance': fine_grained_f1(self.true[self._theme + '_stance'], pred.label1),
            'argument': fine_grained_f1(self.true[self._theme + '_argument'], pred.label2)
        }
    
    def _init_rst(self):
        with open(f'rstbert_{self._theme}/fold_{self._foldnum}/predictions_test.json', 'r') as file:
            pred = pd.read_json(file.read(), lines=True)
            
        self._pred_rst = {
            'stance': pred.label1,
            'argument': pred.label2
        }
        
        self.f1_rst = {
            'stance': fine_grained_f1(self.true[self.idx_edu][self._theme + '_stance'], pred.label1),
            'argument': fine_grained_f1(self.true[self.idx_edu][self._theme + '_argument'], pred.label2)
        }
        
    def _combine_predictions(self):
        all_pred = self.true.copy()
        all_pred['stance'] = self._pred_base['stance']
        all_pred['argument'] = self._pred_base['argument']
        all_pred = all_pred[self.idx_edu == False].reset_index(drop=True)
        
        rst_pred = self.true[self.idx_edu].copy().reset_index(drop=True)
        rst_pred['stance'] = self._pred_rst['stance']
        rst_pred['argument'] = self._pred_rst['argument']
        self.pred = pd.concat([all_pred, rst_pred])

In [None]:
f1_stances = []
f1_arguments = []

f1_masks_stances = []
f1_masks_arguments = []
f1_vaccines_stances = []
f1_vaccines_arguments = []
f1_quarantine_stances = []
f1_quarantine_arguments = []


for fold in range(5):
    predictor = ConditionalClassifier(foldnum=fold, mode='test', theme='masks')
    f1_masks_stance = fine_grained_f1(predictor.pred.masks_stance, predictor.pred.stance)
    f1_masks_argument = fine_grained_f1(predictor.pred.masks_argument, predictor.pred.argument)
    print(f'Masks stance: {(f1_masks_stance*100).round(2)}, argument: {(f1_masks_argument*100).round(2)}')
    f1_masks_stances.append(f1_masks_stance)
    f1_masks_arguments.append(f1_masks_argument)
    
    predictor = ConditionalClassifier(foldnum=fold, mode='test', theme='vaccines')
    f1_vac_stance = fine_grained_f1(predictor.pred.vaccines_stance, predictor.pred.stance)
    f1_vac_argument = fine_grained_f1(predictor.pred.vaccines_argument, predictor.pred.argument)
    print(f'Vaccines stance: {(f1_vac_stance*100).round(2)}, argument: {(f1_vac_argument*100).round(2)}')
    f1_vaccines_stances.append(f1_vac_stance)
    f1_vaccines_arguments.append(f1_vac_argument)
    
    predictor = ConditionalClassifier(foldnum=fold, mode='test', theme='quarantine')
    f1_quarantine_stance = fine_grained_f1(predictor.pred.quarantine_stance, predictor.pred.stance)
    f1_quarantine_argument = fine_grained_f1(predictor.pred.quarantine_argument, predictor.pred.argument)
    print(f'Quarantine stance: {(f1_quarantine_stance*100).round(2)}, argument: {(f1_quarantine_argument*100).round(2)}')
    f1_quarantine_stances.append(f1_quarantine_stance)
    f1_quarantine_arguments.append(f1_quarantine_argument)

    f1_stance = average_f1([f1_masks_stance, f1_vac_stance, f1_quarantine_stance])
    f1_arg = average_f1([f1_masks_argument, f1_vac_argument, f1_quarantine_argument])
    
    print(f'Fold {fold} ------------------')
    print('F1 stance\t:::', (f1_stance * 100).round(2))
    print('F1 argument\t:::', (f1_arg * 100).round(2))
    
    f1_stances.append(f1_stance)
    f1_arguments.append(f1_arg)

In [None]:
print(f'stance \t\t{(np.mean(f1_stances) * 100).round(1)} ± {(np.std(f1_stances) * 100).round(1)}')
print(f'argument\t{(np.mean(f1_arguments) * 100).round(1)} ± {(np.std(f1_arguments) * 100).round(1)}')

print(f'mask stance \t{(np.mean(f1_masks_stances) * 100).round(1)} ± {(np.std(f1_masks_stances) * 100).round(1)}')
print(f'mask argument\t{(np.mean(f1_masks_arguments) * 100).round(1)} ± {(np.std(f1_masks_arguments) * 100).round(1)}')

print(f'vaccine stance \t\t{(np.mean(f1_vaccines_stances) * 100).round(1)} ± {(np.std(f1_vaccines_stances) * 100).round(1)}')
print(f'vaccine argument\t{(np.mean(f1_vaccines_arguments) * 100).round(1)} ± {(np.std(f1_vaccines_arguments) * 100).round(1)}')

print(f'quarantine stance\t{(np.mean(f1_quarantine_stances) * 100).round(1)} ± {(np.std(f1_quarantine_stances) * 100).round(1)}')
print(f'quarantine argument\t{(np.mean(f1_quarantine_arguments) * 100).round(1)} ± {(np.std(f1_quarantine_arguments) * 100).round(1)}')