In [1]:
import torch
import pandas as pd
from torch import nn
from torch.utils.data import DataLoader, TensorDataset
import numpy as np
from sklearn.metrics import f1_score
import torch.nn.functional as F

In [2]:
train = pd.read_csv("../Data/TaskA_train_neural.csv", index_col=False)
test = pd.read_csv("../Data/TaskA_test_neural.csv", index_col=False)

In [3]:
X_train = train.loc[:, ["topic", "Premise", "Conclusion", "SBERT_premise", "SBERT_conclusion", "SBERT_cosine_sim"]]
y_train = train.loc[:, ["Validity", "Novelty"]]
X_test = test.loc[:, ["topic", "Premise", "Conclusion", "SBERT_premise", "SBERT_conclusion", "SBERT_cosine_sim"]]
y_test = test.loc[:, ["Validity", "Novelty"]]

In [6]:
def str_to_list(text):
    '''
    Return tensor string into list
    '''
    # clean string
    clean_str = text.replace('tensor(', '').replace(')', '').strip()
    # convert to list
    tensor = eval(clean_str, {"torch": torch, "__builtins__": {}})
    return tensor

def process_covariate_data(df):
    '''
    Expanding all tensors in a single cell
    '''
    # convert str to tensor (list)
    SBERT_premise = df.SBERT_premise.apply(lambda x: str_to_list(x))
    SBERT_conclusion = df.SBERT_conclusion.apply(lambda x: str_to_list(x))

    # expand the list into individual entries
    df_expand1 = SBERT_premise.apply(pd.Series)
    df_expand2 = SBERT_conclusion.apply(pd.Series)

    # assign a meaningful name
    df_expand1.columns = ['pre_emb{}'.format(i+1) for i in range(df_expand1.shape[1])]
    df_expand2.columns = ['con_emb{}'.format(i+1) for i in range(df_expand2.shape[1])]

    # put everything together
    df_final = pd.concat([df.drop(['SBERT_premise', "SBERT_conclusion"], axis=1), df_expand1, df_expand2], axis=1)
    return df_final

def preprocess_input(x, y):
    '''
    return DataLoader for later input into the model
    '''
    # pd.dataframe to array
    x = np.array(x, dtype=np.float64)
    y = np.array(y, dtype=np.float64)
    # transform y for nn model
    # assume a value of 0 is not valid/novel
    y[y == -1] = 0
    # Transform the data
    transformation_dict = {
    (1, 1): [1, 0, 0, 0],
    (1, 0): [0, 1, 0, 0],
    (0, 1): [0, 0, 1, 0],
    (0, 0): [0, 0, 0, 1],
    }
    
    y = np.array([transformation_dict[tuple(row)] for row in y])
    # array to tensor
    x_torch = torch.tensor(x)
    y_torch = torch.tensor(y)
    data = TensorDataset(x_torch, y_torch)

    batch_size = 10
    loader = DataLoader(data, batch_size=batch_size, shuffle=True)
    return loader

class SimpleNN(nn.Module):
    def __init__(self):
        super(SimpleNN, self).__init__()
        self.input_dim = 1537
        self.hidden_dim = 200
        self.output_dim = 4
        self.fc1 = nn.Linear(self.input_dim, self.hidden_dim)  # Assuming n input features
        self.bn1 = nn.BatchNorm1d(self.hidden_dim)  # Batch normalization layer
        self.dropout1 = nn.Dropout(0.5)
        self.fc2 = nn.Linear(self.hidden_dim, self.output_dim)  # Outputs 4 values, one for each output column
        self.softmax = nn.Softmax(dim=1)
    def forward(self, x):
        x = x.float()
        x = torch.relu(self.bn1(self.fc1(x)))
        x = self.dropout1(x)
        x = self.fc2(x)
        x = self.softmax(x)
        return x

In [9]:
X_train1 = process_covariate_data(X_train)
X_test1 = process_covariate_data(X_test)

train_loader = preprocess_input(X_train1.iloc[:, 3:], y_train)
test_loader = preprocess_input(X_test1.iloc[:, 3:], y_test)

In [10]:
model = SimpleNN()
loss_function = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

  from .autonotebook import tqdm as notebook_tqdm


In [11]:
epochs = 51
for epoch in range(epochs):
    # for each batch
    for inputs, targets in train_loader:
        inputs = inputs.float()
        targets = targets.float()
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = loss_function(outputs, targets)
        loss.backward()
        optimizer.step()

    # Optionally print the loss every few epochs
    if epoch % 10 == 0:
        print(f'Epoch {epoch+1}, Loss: {loss.item()}')

Epoch 1, Loss: 1.1268659830093384
Epoch 11, Loss: 1.2287373542785645
Epoch 21, Loss: 1.0439461469650269
Epoch 31, Loss: 1.1889241933822632
Epoch 41, Loss: 1.0718261003494263
Epoch 51, Loss: 0.895089328289032


In [12]:
y_test.loc[y_test.Validity == -1, "Validity"] = 0
y_test.loc[y_test.Novelty == -1, "Novelty"] = 0

In [13]:
model.eval()
test_label = {"validity":list(y_test.Validity), "novelty":list(y_test.Novelty)}
test_preds = {"validity":[], "novelty": []}
with torch.no_grad():
    test_loss = 0
    for inputs, _ in test_loader:
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        predicted = F.one_hot(predicted, num_classes=4)
        # Transform the data
        transformation_dict = {
        (1, 0, 0, 0): (1, 1), 
        (0, 1, 0, 0): (1, 0),
        (0, 0, 1, 0): (0, 1),
        (0, 0, 0, 1): (0, 1)
        }
        predicted = [transformation_dict[tuple(row.tolist())] for row in predicted]
        # Obtain the prediction
        for pred in predicted:
            test_preds["validity"].append(pred[0])
            test_preds["novelty"].append(pred[1]) 

In [14]:
from sklearn.metrics import classification_report
from typing import Dict
import numpy as np
class SharedTaskConstants:
    """
    Use these constants to interface with the data, not with the id2label used
    inside the Huggingface models!!
    """
    targets = ['validity', 'novelty']
    validity_label_mapping = {
        -1: "not-valid",
        0: "not-valid",  # can be excluded since test set does not contain these
        1: "valid",
    }

    novelty_label_mapping = {
        -1: "not-novel",
        0: "not-novel",  # can be excluded since test set does not contain these
        1: "novel",
    }

    validity_id2label = {v: k for k, v in validity_label_mapping.items()}
    novelty_id2label = {v: k for k, v in novelty_label_mapping.items()}

    local_str_mapping = {
        'novel': 1,
        'not-novel': 0,
        'valid': 1,
        'not-valid': 0
    }

    @staticmethod
    def val_nov_metric(is_validity: np.ndarray, should_validity: np.ndarray, is_novelty: np.ndarray,
                       should_novelty: np.ndarray) -> Dict[str, float]:
        ret = dict()

        ret_base_help = {
            "true_positive_validity": np.sum(np.where(
                np.all(np.stack([is_validity >= .5, should_validity >= .5]), axis=0),
                1, 0)),
            "true_positive_novelty": np.sum(np.where(
                np.all(np.stack([is_novelty >= .5, should_novelty >= .5]), axis=0),
                1, 0)),
            "true_positive_valid_novel": np.sum(np.where(
                np.all(np.stack([is_validity >= .5, is_novelty >= .5,
                                 should_validity >= .5, should_novelty >= .5]), axis=0),
                1, 0)),
            "true_positive_nonvalid_novel": np.sum(np.where(
                np.all(np.stack([is_validity < .5, is_novelty >= .5,
                                 should_validity < .5, should_novelty >= .5]), axis=0),
                1, 0)),
            "true_positive_valid_nonnovel": np.sum(np.where(
                np.all(np.stack([is_validity >= .5, is_novelty < .5,
                                 should_validity >= .5, should_novelty < .5]), axis=0),
                1, 0)),
            "true_positive_nonvalid_nonnovel": np.sum(np.where(
                np.all(np.stack([is_validity < .5, is_novelty < .5,
                                 should_validity < .5, should_novelty < .5]), axis=0),
                1, 0)),
            "classified_positive_validity": np.sum(np.where(is_validity >= .5, 1, 0)),
            "classified_positive_novelty": np.sum(np.where(is_novelty >= .5, 1, 0)),
            "classified_positive_valid_novel": np.sum(np.where(
                np.all(np.stack([is_validity >= .5, is_novelty >= .5]), axis=0),
                1, 0)),
            "classified_positive_nonvalid_novel": np.sum(np.where(
                np.all(np.stack([is_validity < .5, is_novelty >= .5]), axis=0),
                1, 0)),
            "classified_positive_valid_nonnovel": np.sum(np.where(
                np.all(np.stack([is_validity >= .5, is_novelty < .5]), axis=0),
                1, 0)),
            "classified_positive_nonvalid_nonnovel": np.sum(np.where(
                np.all(np.stack([is_validity < .5, is_novelty < .5]), axis=0),
                1, 0)),
            "indeed_positive_validity": np.sum(np.where(should_validity >= .5, 1, 0)),
            "indeed_positive_novelty": np.sum(np.where(should_novelty >= .5, 1, 0)),
            "indeed_positive_valid_novel": np.sum(np.where(
                np.all(np.stack([should_validity >= .5, should_novelty >= .5]), axis=0),
                1, 0)),
            "indeed_positive_nonvalid_novel": np.sum(np.where(
                np.all(np.stack([should_validity < .5, should_novelty >= .5]), axis=0),
                1, 0)),
            "indeed_positive_valid_nonnovel": np.sum(np.where(
                np.all(np.stack([should_validity >= .5, should_novelty < .5]), axis=0),
                1, 0)),
            "indeed_positive_nonvalid_nonnovel": np.sum(np.where(
                np.all(np.stack([should_validity < .5, should_novelty < .5]), axis=0),
                1, 0)),
        }

        ret_help = {
            "precision_validity": ret_base_help["true_positive_validity"] /
                                  max(1, ret_base_help["classified_positive_validity"]),
            "precision_novelty": ret_base_help["true_positive_novelty"] /
                                 max(1, ret_base_help["classified_positive_novelty"]),
            "recall_validity": ret_base_help["true_positive_validity"] /
                               max(1, ret_base_help["indeed_positive_validity"]),
            "recall_novelty": ret_base_help["true_positive_novelty"] /
                              max(1, ret_base_help["indeed_positive_novelty"]),
            "precision_valid_novel": ret_base_help["true_positive_valid_novel"] /
                                     max(1, ret_base_help["classified_positive_valid_novel"]),
            "precision_valid_nonnovel": ret_base_help["true_positive_valid_nonnovel"] /
                                        max(1, ret_base_help["classified_positive_valid_nonnovel"]),
            "precision_nonvalid_novel": ret_base_help["true_positive_nonvalid_novel"] /
                                        max(1, ret_base_help["classified_positive_nonvalid_novel"]),
            "precision_nonvalid_nonnovel": ret_base_help["true_positive_nonvalid_nonnovel"] /
                                           max(1, ret_base_help["classified_positive_nonvalid_nonnovel"]),
            "recall_valid_novel": ret_base_help["true_positive_valid_novel"] /
                                  max(1, ret_base_help["indeed_positive_valid_novel"]),
            "recall_valid_nonnovel": ret_base_help["true_positive_valid_nonnovel"] /
                                     max(1, ret_base_help["indeed_positive_valid_nonnovel"]),
            "recall_nonvalid_novel": ret_base_help["true_positive_nonvalid_novel"] /
                                     max(1, ret_base_help["indeed_positive_nonvalid_novel"]),
            "recall_nonvalid_nonnovel": ret_base_help["true_positive_nonvalid_nonnovel"] /
                                        max(1, ret_base_help["indeed_positive_nonvalid_nonnovel"])
        }

        ret.update({
            "f1_validity": 2 * ret_help["precision_validity"] * ret_help["recall_validity"] / max(1e-4, ret_help[
                "precision_validity"] + ret_help["recall_validity"]),
            "f1_novelty": 2 * ret_help["precision_novelty"] * ret_help["recall_novelty"] / max(1e-4, ret_help[
                "precision_novelty"] + ret_help["recall_novelty"]),
            "f1_valid_novel": 2 * ret_help["precision_valid_novel"] * ret_help["recall_valid_novel"] / max(1e-4,
                                                                                                           ret_help[
                                                                                                               "precision_valid_novel"] +
                                                                                                           ret_help[
                                                                                                               "recall_valid_novel"]),
            "f1_valid_nonnovel": 2 * ret_help["precision_valid_nonnovel"] * ret_help["recall_valid_nonnovel"] / max(
                1e-4, ret_help["precision_valid_nonnovel"] + ret_help["recall_valid_nonnovel"]),
            "f1_nonvalid_novel": 2 * ret_help["precision_nonvalid_novel"] * ret_help["recall_nonvalid_novel"] / max(
                1e-4, ret_help["precision_nonvalid_novel"] + ret_help["recall_nonvalid_novel"]),
            "f1_nonvalid_nonnovel": 2 * ret_help["precision_nonvalid_nonnovel"] * ret_help[
                "recall_nonvalid_nonnovel"] / max(1e-4, ret_help["precision_nonvalid_nonnovel"] + ret_help[
                "recall_nonvalid_nonnovel"])
        })

        ret.update({
            "f1_macro": (ret["f1_valid_novel"] + ret["f1_valid_nonnovel"] + ret["f1_nonvalid_novel"] + ret[
                "f1_nonvalid_nonnovel"]) / 4
        })

        return ret

In [15]:
def print_results(baseline_name: str, y_true: dict, y_pred: dict):
    print(f"==== {baseline_name} ====")
    print("Validity")
    results_validity = classification_report(
        y_true['validity'],
        y_pred['validity'],
        target_names=['not-valid', 'valid'],
        labels=[0, 1],
        zero_division=0
    )
    print(results_validity)

    print("Novelty")
    results_novelty = classification_report(
        y_true['novelty'],
        y_pred['novelty'],
        target_names=['not-novel', 'novel'],
        labels=[0, 1],
        zero_division=0
    )
    print(results_novelty)

    print("Combined (organization eval)")
    res = SharedTaskConstants.val_nov_metric(
        np.array(y_pred['validity']),
        np.array(y_true['validity']),
        np.array(y_pred['novelty']),
        np.array(y_true['novelty']),
    )
    print(res['f1_macro'].round(4))

In [16]:
print_results("Roberta_NLI", test_label, test_preds)

==== Roberta_NLI ====
Validity
              precision    recall  f1-score   support

   not-valid       0.40      0.70      0.51       206
       valid       0.61      0.30      0.40       314

    accuracy                           0.46       520
   macro avg       0.50      0.50      0.45       520
weighted avg       0.52      0.46      0.44       520

Novelty
              precision    recall  f1-score   support

   not-novel       0.52      0.24      0.33       294
       novel       0.42      0.71      0.53       226

    accuracy                           0.45       520
   macro avg       0.47      0.48      0.43       520
weighted avg       0.48      0.45      0.42       520

Combined (organization eval)
0.1638
