## Installs

In [None]:
#!pip install datasets

In [None]:
#!pip install 'transformers[torch]' -U

## Imports

In [None]:
from transformers import AutoTokenizer, BertForSequenceClassification
from transformers import BertModel
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
from sklearn.metrics import classification_report

In [None]:
from torch import nn
import torch

In [None]:
import pandas as pd

In [None]:
import numpy as np

In [None]:
from transformers import default_data_collator
from torch.utils.data import DataLoader

## Import Model

In [None]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification

In [None]:
import sys
sys.path.append('model/code-bert/')
from temporal_relation_classification import TemporalRelationClassification
from temporal_relation_classification_config import TemporalRelationClassificationConfig

In [None]:
model_path = "saved_model/bert-base-uncased-saved-model"
model = TemporalRelationClassification.from_pretrained(model_path)
tokenizer = AutoTokenizer.from_pretrained(model_path)

In [None]:
model.resize_token_embeddings(len(tokenizer))

## Initilise Model

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
model.eval()

## Import Instances

In [None]:
original = pd.read_csv('data/annotated/partitions.csv')
counterfactual = pd.read_csv('data/annotated/counterfactuals.csv')

In [None]:
counterfactual = counterfactual[['id', 'counterfactual', 'new_temp', 'new_label']]

In [None]:
counterfactual = counterfactual.rename(columns={'n': 'id', 'counterfactual': 'context', 'new_temp':'label_temp', 'new_label':'label'})

In [None]:
label_mapping = {
    'BEFORE': 0,
    'AFTER': 1,
    'EQUAL': 2,
    'VAGUE': 3
}

original['label'] = original['label_temp'].map(label_mapping)
counterfactual['label'] = counterfactual['label_temp'].map(label_mapping)

In [None]:
from datasets import Dataset, DatasetDict
dataset = DatasetDict({
    "original": Dataset.from_pandas(original),
    "counterfactual":  Dataset.from_pandas(counterfactual)
})

## Process Instances

In [None]:
import re
def annotate_text(row, column):
    context = row['context']
    eventA = re.escape(row['eventA'])
    eventB = re.escape(row['eventB'])

    # Make sure the longer event is replaced first if they overlap
    if len(eventA) > len(eventB):
        context = re.sub(eventA, f"[a1]{row['eventA']}[/a1]", context)
        context = re.sub(eventB, f"[a2]{row['eventB']}[/a2]", context)
    else:
        context = re.sub(eventB, f"[a2]{row['eventB']}[/a2]", context)
        context = re.sub(eventA, f"[a1]{row['eventB']}[/a1]", context)

    return context

In [None]:
from transformers import Trainer
evaluator = Trainer(
            model=model,
            eval_dataset=tokenized_datasets["original_with_key"],
            tokenizer=tokenizer,
            data_collator=data_collator,
            compute_metrics=compute_metrics,
        )

In [None]:
eval_mode = True
print('Evaluate:')
evaluator.evaluate(tokenized_datasets['original_with_key'])

In [None]:
original['annotated_context'] = original.apply(lambda row: annotate_text(row, 'context'), axis=1)
counterfactual['annotated_context'] = counterfactual.apply(lambda row: annotate_text(row, 'context'), axis=1)

In [None]:
from datasets import Dataset, DatasetDict
dataset = DatasetDict({
    "original": Dataset.from_pandas(original),
    "counterfactual":  Dataset.from_pandas(counterfactual)
})

In [None]:
from transformers import DataCollatorWithPadding
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
tokenizer_class = str(type(tokenizer)).strip("><'").split('.')[-1]

In [None]:
def preprocess_function(examples):
  max_length_value = 508
  return tokenizer(examples["annotated_context"], truncation=True, max_length=max_length_value)

tokenized_datasets = dataset.map(preprocess_function, batched=True)

In [None]:
tokenized_datasets

## Evaluation Function

In [None]:
eval_mode = False


def compute_metrics(eval_preds):
    predictions, labels = eval_preds
    predictions = np.argmax(predictions, axis=1)

    if eval_mode:
        report = classification_report(y_true=labels, y_pred=predictions,
                                       target_names=['BEFORE', 'AFTER', 'EQUAL', 'VAGUE'])
        for i in range(labels.shape[0]):
            if labels[i] == 3 and predictions[i] != 3:
                labels[i] = predictions[i]
        report_no_vague = classification_report(y_true=labels, y_pred=predictions,
                                                target_names=['BEFORE', 'AFTER', 'EQUAL', 'VAGUE'])


        print(report)
        print(report_no_vague)

    results = \
        classification_report(y_true=labels, y_pred=predictions, target_names=['BEFORE', 'AFTER', 'EQUAL','VAGUE'],
                              output_dict=True)
    final_results = results['weighted avg']
    final_results.pop('support')
    final_results['BEFORE-f1'] = results['BEFORE']['f1-score']
    final_results['AFTER-f1'] = results['AFTER']['f1-score']
    final_results['EQUAL-f1'] = results['EQUAL']['f1-score']
    final_results['VAGUE-f1'] = results['VAGUE']['f1-score']
    return final_results

## Evaluating Original Datasplit

In [None]:
from transformers import Trainer
evaluator = Trainer(
            model=model,
            eval_dataset=tokenized_datasets["original"],
            tokenizer=tokenizer,
            data_collator=data_collator,
            compute_metrics=compute_metrics,
        )

In [None]:
eval_mode = True
print('Evaluate:')
evaluator.evaluate(tokenized_datasets['original'])

## Evaluating Counterfactual Datasplit

In [None]:
from transformers import Trainer
evaluator = Trainer(
            model=model,
            eval_dataset=tokenized_datasets["counterfactual"],
            tokenizer=tokenizer,
            data_collator=data_collator,
            compute_metrics=compute_metrics,
        )

In [None]:
eval_mode = True
print('Evaluate:')
evaluator.evaluate(tokenized_datasets['counterfactual'])

## Predictions 

In [None]:
def evaluate(model, dataloader):
    model.eval()
    device = next(model.parameters()).device
    all_predictions, all_labels = [], []

    for batch in dataloader:
        batch = {k: v.to(device) for k, v in batch.items()}
        with torch.no_grad():
            outputs = model(**batch)

        logits = outputs.logits
        predictions = torch.argmax(logits, dim=-1).cpu().numpy()
        labels = batch['labels'].cpu().numpy()

        all_predictions.extend(predictions)
        all_labels.extend(labels)

    return np.array(all_predictions), np.array(all_labels)

In [None]:
from transformers import default_data_collator
counterfactual_data_collator = default_data_collator
counterfactual_dataloader = DataLoader(tokenized_datasets['counterfactual'], collate_fn=counterfactual_data_collator)

counterfactual_predictions, counterfactuals_labels = evaluate(model, counterfactual_dataloader)

counterfactuals_df = pd.DataFrame({
    'prediction': counterfactual_predictions,
    'true_label': counterfactuals_labels
})


In [None]:
original_data_collator = default_data_collator
original_dataloader = DataLoader(tokenized_datasets['original'], collate_fn=original_data_collator)

original_predictions, original_labels = evaluate(model, original_dataloader)

original_df = pd.DataFrame({
    'prediction': original_predictions,
    'true_label': original_labels
})

In [None]:
len(original_predictions)

## Saving Predictions

In [None]:
df['counterfactual_predictions'] = counterfactual_predictions
df['original_predictions'] = original_predictions
df.rename(columns={'new_temp': 'label_temp_counterfactuals'}, inplace=True)
df.rename(columns={'new_label': 'label_counterfactuals'}, inplace=True)

In [None]:
df=df[['id', 'eventA', 'eventB',
       'context', 'label_temp', 'label', 'original_predictions',
       'counterfactual', 'label_temp_counterfactuals', 'label_counterfactuals', 'counterfactual_predictions']]