In [2]:
#!/usr/bin/env python3
import argparse
import json
import pandas as pd
import numpy as np
from transformers import AutoModelForSequenceClassification, XLNetTokenizer
import torch
from keras.utils import pad_sequences
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
df = pd.read_json("data/validation.jsonl", lines=True)

In [6]:
true_labels = list(df['tags'])
true_labels = [item for sublist in true_labels for item in sublist]
uuids = list(df['uuid'])
decoding_array = {  
            0 : "multi",
            1: "passage",
            2 : "phrase"
            }
model = AutoModelForSequenceClassification.from_pretrained("EstrixDS/XLNet_SemEval_Task1")
tokenizer = XLNetTokenizer.from_pretrained('xlnet-base-cased', do_lower_case=True)
# Create sentence and label lists
sentences = df['postText'].values

# We need to add special tokens at the beginning and end of each sentence for XLNet to work properly
sentences = [sentence[0] + " [SEP] [CLS]" for sentence in sentences]
tokenized_texts = [tokenizer.tokenize(sent) for sent in sentences]


MAX_LEN = 256
# Use the XLNet tokenizer to convert the tokens to their index numbers in the XLNet vocabulary
input_ids = [tokenizer.convert_tokens_to_ids(x) for x in tokenized_texts]
# Pad our input tokens
input_ids = pad_sequences(input_ids, maxlen=MAX_LEN, dtype="long", truncating="post", padding="post")
# Create attention masks
attention_masks = []

# Create a mask of 1s for each token followed by 0s for padding
for seq in input_ids:
    seq_mask = [float(i>0) for i in seq]
    attention_masks.append(seq_mask) 

prediction_inputs = torch.tensor(input_ids)
prediction_masks = torch.tensor(attention_masks)

batch_size = 32  


prediction_data = TensorDataset(prediction_inputs, prediction_masks)
prediction_sampler = SequentialSampler(prediction_data)
prediction_dataloader = DataLoader(prediction_data, sampler=prediction_sampler, batch_size=batch_size)
# Prediction on test set

# Put model in evaluation mode
model.eval()

# Tracking variables 
predictions = []
# Predict 
for batch in prediction_dataloader:
    # Add batch to GPU
    batch = tuple(t for t in batch)
    # Unpack the inputs from our dataloader
    b_input_ids, b_input_mask = batch
    # Telling the model not to compute or store gradients, saving memory and speeding up prediction
    with torch.no_grad():
        # Forward pass, calculate logit predictions
        outputs = model(b_input_ids, token_type_ids=None, attention_mask=b_input_mask)
        logits = outputs[0]

    # Move logits and labels to CPU
    logits = logits.detach().cpu().numpy()
    
    # Store predictions and true labels
    for i in logits:
        predictions.append(i)
decoded_pred = []
for z in predictions:
    label = np.where(z == z.max())
    decoded_label = decoding_array[label[0][0]]
    decoded_pred.append(decoded_label)

{0: 'multi', 1: 'passage', 2: 'phrase'}
[-0.09633252 -0.51153016  0.5233277 ]
[-0.6206944  1.0529232 -0.4994568]
[ 0.36697555 -1.4582872   1.5293717 ]
[ 0.8836718 -2.0157027  1.3445599]
[-0.8933996  1.4569148 -0.4938417]
[-0.99288833 -0.9207619   2.3369708 ]
[-1.0384986  -0.35779676  1.8897498 ]
[-0.959985    0.9579983  -0.18352915]
[-1.4583246   1.7873356  -0.07945199]
[-1.1699524   1.384661    0.04090883]
[ 1.0335108   0.16628508 -1.5101539 ]
[-0.45465112 -1.0087416   2.2786796 ]
[ 0.6076848   0.08860607 -1.0492194 ]
[-1.411865  -0.0073873  1.5171795]
[-0.29710814 -1.1043086   1.9408407 ]
[-0.39609197  1.4887915  -0.8675545 ]
[-1.0495147  2.2483516 -0.9245384]
[-1.0066575   1.0645547   0.02053583]
[-0.5648729   0.74463284 -0.13506268]
[-1.446449   1.0170202  0.3164273]
[-0.7640938  1.3005023 -0.3515777]
[-1.1252096  2.2595196 -1.0287063]
[-0.9374156  1.9168053 -0.8453071]
[-0.57325613  0.9862334  -0.344638  ]
[-1.4634087   1.6417038  -0.06321429]
[ 0.44460148 -1.2835959   1.3778563 ]

In [13]:
from sklearn.metrics import balanced_accuracy_score
bac = balanced_accuracy_score(y_true=true_labels,y_pred=decoded_pred)
print(bac)

0.7452153110047847


In [14]:
from sklearn.metrics import matthews_corrcoef
mc = matthews_corrcoef(y_true=true_labels,y_pred=decoded_pred)
print(mc)

0.6434106524853385


In [19]:
from sklearn.metrics import jaccard_score
jac = jaccard_score(y_true=true_labels,y_pred=decoded_pred,average='weighted')
print(jac)

0.6245858257095941


In [16]:
from sklearn.metrics import classification_report
rep = classification_report(y_true=true_labels,y_pred=decoded_pred)
print(rep)

              precision    recall  f1-score   support

       multi       0.87      0.59      0.70        22
     passage       0.65      0.89      0.76        38
      phrase       0.87      0.75      0.80        52

    accuracy                           0.77       112
   macro avg       0.80      0.75      0.75       112
weighted avg       0.79      0.77      0.77       112

