#### Install Dependencies

In [None]:
!pip install numpy
!pip install transformers

#### Imports

In [None]:
import json
import numpy as np
from transformers import (BertTokenizerFast,
                          BertForSequenceClassification,
                          DistilBertTokenizerFast,
                          DistilBertForSequenceClassification)

#### Parameters

In [None]:
# A list for the different fileNames where the weights
# are stored for the different models to be used in
# the ensemble methods.  This list is parallel to
# "modelTypes", "taskTypes", "tokenizerTypes",
# and "useParentComments".
fileNames = ["file2",
              "file1"]

# A list for the different models to be used in
# the ensemble methods.  This list is parallel to
# "fileNames", "taskTypes", "tokenizerTypes",
# and "useParentComments".
modelTypes = ["bert-base-uncased",
              "distilbert-base-uncased"]

# A list for the different tasks to be used in
# the ensemble methods.  This list is parallel to
# "fileNames", "modelTypes", "tokenizerTypes",
# and "useParentComments".
taskTypes = [BertForSequenceClassification,
             DistilBertForSequenceClassification]

# A list for the different tokenizers to be used in
# the ensemble methods.  This list is parallel to
# "fileNames", "modelTypes", "taskTypes",
# and "useParentComments".
tokenizerTypes = [BertTokenizerFast,
                  DistilBertTokenizerFast]

# A list for the different tokenizers to be used in
# the ensemble methods.  This list is parallel to
# "fileNames", "modelTypes" and "taskTypes".
useParentComments = [True, True]

targets = ['0', '1']  # '0' for not sarcastic.  '1' for sarcastic.
maxLength = 128  # Max length for each comment.
testFileName = "/content/data/testing.json"  # Location for testing set.

# Apply an equal weighting scheme.  This can be changed.  "weights"
# is a NumPy array that is the length of the number of models where
# the probabilities in the list sum must sum to 1.
weights = np.array([1 / len(modelTypes) for _ in len(modelTypes)])

#### Load the Testing Data for Inference

In [None]:
def parseDataset(fileName):
    """
    Takes as input a fileName of a json file, then opens the
    file and returns three lists for the parent and child comments
    and labels for sarcastic or not sarcastic.
    """
    parentText, childText, labels = [], [], []  # Instantiate containers.

    # Open the training data and convert it to a json list.
    with open(fileName, 'r') as json_file:
        jsonl = list(json_file)

    # Loop through all elements in the json list.
    for dataEntry in jsonl:
        data = json.loads(dataEntry)  # Load the dictionary.

        # Construct the parent, child, and label
        # lists that will be returned.
        parentText.append(data["parent"])
        childText.append(data["child"])
        labels.append(int(data["label"][0]))

    # Return the data with the parent comment.
    return labels, parentText, childText


# Initialize the testing set.
testData = parseDataset(testFileName)

#### Function to Make Predictions

In [None]:
def predict(*args, tokenizer):
    """
    Takes as input a string, text, then predicts if
    text is sarcastic (1) or not sarcastic (0).
    """

    # Tokenize the text, then run the input
    # through the model and take the argmax
    # to get a probability.
    inputs = tokenizer(*args, padding=True, truncation=True,
                       max_length=maxLength, return_tensors="pt").to("cuda")
    outputs = model(**inputs)
    probs = outputs[0].softmax(1)

    # Return whether the text is sarcastic (1)
    # or not sarcastic (0).
    return probs.argmax()

#### Load the models

In [None]:
# Initialize a container to store the predictions
# for each model for each testing sample.
predictions = np.zeros((len(testData), len(modelTypes)))

# Zip the files, the models, the type of the
# tasks, and the tokenizer types to loop on.
# The enumerate the zipped variables.
zipped = enumerate(zip(fileNames, modelTypes, taskTypes, tokenizerTypes))

# Loop for the every model and load each model.
for modelIndex, fileName, modelType, taskType, tokenizerType in zipped:

    # Load the saved model and tokenizer.
    model = taskType.from_pretrained(fileName,
                                     num_labels=len(targets)).to("cuda")
    tokenizer = tokenizerType.from_pretrained(fileName)

    # Loop for all the data in the
    # test set and compute the accuracy
    # for each sample for this model.
    for dataIndex, value in enumerate(zip(testData[0], *testData[1:])):

        # Check if parent comments are enabled for this model.
        if useParentComments[modelIndex]:
            text = value[1:]  # Set text to parent and child comment.
        else:
            text = [value[2]]  # Set the text to the child comment.
        
        # Set the value in the predictions matrix.
        predictions[dataIndex, modelIndex] = (predict(*text) == value[0])

# Loop for all the models and calculate the accuracy of each model.
for index, accuracy in enumerate(np.sum(predictions, axis=0)):
    print(f"The accuracy of model {index} is {accuracy} / "
          f"{len(testData)} = {accuracy / len(testData)}")

accuracy = 0  # A value to hold the number of correctly predicted comments.

# Loop for all the votes, apply the weighting scheme
# and check if the prediction is correct
for vote, label in zip(predictions, testData[0]):
    accuracy += ((np.dot(weights, vote) > 0.5) == label)

# Print the accuracy of the ensemble model.
print(f"Ensembling these {len(modelTypes)} models yield an "
      f"accuracy of {accuracy} / {len(testData)} = "
      f"{accuracy / len(testData)}")