# Adam-Smith

In [None]:
import numpy as np
import os
import pandas as pd
import torch
import logging
from typing import List
from components.data_modules.BertDataModule import BertDataset
from components.interface_modules.load_ensemble_list import (load_ensemble_list)
from components.interface_modules.transformer_local import (load_local_tokenizer)
from components.models.BertFineTunerPl import BertFineTunerPl

runs_as_inference_server = os.environ.get('TIRA_INFERENCE_SERVER', None) is not None
dataset_dir = os.environ.get('TIRA_INPUT_DIRECTORY', './dataset')
output_dir = os.environ.get('TIRA_OUTPUT_DIRECTORY', './output')

## Setup

In [None]:
_model_dir = 'checkpoints/human_value_trained_models'

_model_registry = {}
_ensemble_threshold = 0.26

_, _ensemble_list, _label_columns, NAME = load_ensemble_list(_model_dir, _ensemble_threshold)

logging.info(f'Initializing with configuration: {NAME}')
print(f'Initializing with configuration: {NAME}')

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

for idx, elem in enumerate(_ensemble_list):
    logging.debug(f"Loading model {elem['MODEL_CHECKPOINT']}")

    PARAMS = elem["PARAMS"]
    TRAINED_MODEL = BertFineTunerPl.load_from_checkpoint(
        elem["MODEL_CHECKPOINT"],
        params=PARAMS,
        label_columns=_label_columns,
        n_classes=len(_label_columns)
    )
    TRAINED_MODEL.eval()
    TRAINED_MODEL.freeze()
    TRAINED_MODEL = TRAINED_MODEL.to(device)
    _model_registry[elem['MODEL_CHECKPOINT']] = TRAINED_MODEL

    logging.debug(f"With Tokenizer {PARAMS['MODEL_PATH']}")
    if PARAMS['MODEL_PATH'] not in _model_registry.keys():
        TOKENIZER = load_local_tokenizer(PARAMS["MODEL_PATH"])
        _model_registry[PARAMS['MODEL_PATH']] = TOKENIZER

## Predict function

In [None]:
def _predict_unseen_data(trained_model, model_tokenizer, params, data):
    silver_df_dataset = BertDataset(
        data=data,
        tokenizer=model_tokenizer,
        max_token_count=params["MAX_TOKEN_COUNT"],
    )

    predictions = []

    for item in silver_df_dataset:
        _, prediction = trained_model(
            item["input_ids"].unsqueeze(dim=0).to(device),
            item["attention_mask"].unsqueeze(dim=0).to(device)
        )
        predictions.append(prediction.flatten())

    predictions = torch.stack(predictions).detach().cpu()

    return predictions

In [None]:
def predict(input_list: List) -> List:
    data = pd.DataFrame(input_list, columns=['text'])

    predictions = []
    for idx, elem in enumerate(_ensemble_list):
        logging.debug(f'Classifying with {elem["MODEL_CHECKPOINT"]}')
        TRAINED_MODEL = _model_registry[elem["MODEL_CHECKPOINT"]]
        PARAMS = elem["PARAMS"]
        TOKENIZER = _model_registry[PARAMS['MODEL_PATH']]
        try:
            pred = _predict_unseen_data(
                trained_model=TRAINED_MODEL,
                model_tokenizer=TOKENIZER,
                params=PARAMS,
                data=data
            )
            predictions.append(pred)
        except BaseException as e:
            logging.error(f'Exception while running model \'{elem["MODEL_CHECKPOINT"]}\': {str(e)}')
            return []

    predictions = torch.stack(predictions).numpy()
    predictions_avg = np.mean(predictions, axis=0)

    upper, lower = 1, 0
    y_pred = np.where(predictions_avg > _ensemble_threshold, upper, lower)

    prediction_list = [{} for _ in input_list]
    for idx, l_name in enumerate(_label_columns):
        for i in range(len(prediction_list)):
            prediction_list[i][l_name] = str(y_pred[i, idx])

    return prediction_list

## Classification on TIRA

In [None]:
if not runs_as_inference_server:
    test_df_input = pd.read_csv(os.path.join(dataset_dir, 'arguments.tsv'), sep='\t')

    test_df_input["text"] = test_df_input["Premise"] + " " + test_df_input["Stance"] + " " + test_df_input["Conclusion"]
    input_list = test_df_input["text"].tolist()

    print(f'Starting prediction of {len(input_list)} instances')
    prediction_list = predict(input_list)

    test_prediction_df = pd.concat([test_df_input[['Argument ID']], pd.DataFrame.from_dict(prediction_list)], axis=1)

    prediction_file = os.path.join(output_dir, "predictions.tsv")
    print(f'Writing prediction to: {prediction_file}')
    test_prediction_df.to_csv(prediction_file, sep="\t", index=False)