# Evaluating a Classifier based on deep learning

This notebook is based on a fine-tuned distilBERT model. Please execute this notebook `part_2_deep_learning_training.ipynb` in advance.

Codes in this notebook are executed on a Linux-based virtual machine with the following **computational requirements**:
* GPU:  RTX2080 Super
* vCPU:  8 
* CPU Memory: 48GB 
* GPU Memory: 8GB

## Import necessary dependencies and data

In [1]:
from sklearn.metrics import accuracy_score, f1_score
import numpy as np
import tensorflow as tf
import json
import os
from data_extraction import get_raw_dataset
from transformers import DistilBertTokenizer


2025-03-12 00:40:18.051894: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-03-12 00:40:18.288416: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-03-12 00:40:19.064064: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /home/student/.local/lib/python3.10/site-packages/tensorrt_libs:/usr/local/cuda-12.3/lib64:/usr/lib/x86_64-linux-gnu
2025-03-12 00:40:19.064257: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Cou

## Load the Fine-Tuned Model

In [2]:
# Load the Model
model_path = 'models/model_deep_learning_distilBERT_tuned'

try:
    model = tf.keras.models.load_model(model_path)
    model.summary()
except OSError as e:
    print(f"Error loading model: {e}")

2025-03-12 00:40:20.583224: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2025-03-12 00:40:20.635920: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2025-03-12 00:40:20.636330: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2025-03-12 00:40:20.637464: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags

Model: "tf_distil_bert_for_sequence_classification"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 distilbert (Custom>TFDistil  multiple                 66362880  
 BertMainLayer)                                                  
                                                                 
 pre_classifier (Dense)      multiple                  590592    
                                                                 
 classifier (Dense)          multiple                  1538      
                                                                 
 dropout_19 (Dropout)        multiple                  0         
                                                                 
Total params: 66,955,010
Trainable params: 66,955,010
Non-trainable params: 0
_________________________________________________________________


## Load the Validation Set and the Test Set Data for Evaluation

In [3]:
X_dev, y_dev = get_raw_dataset('dev')
assert X_dev.shape[0] == y_dev.shape[0]

In [4]:
# Vectorize the Validation data
X_dev_list = X_dev.tolist()

# Load the tokenizer
tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')
encoded_dev_input = tokenizer(
    X_dev_list,
    padding=True,
    truncation=True,
    max_length=32,
    return_tensors='tf'
)

# Convert y_dev to a tensor
y_dev_tensor = tf.convert_to_tensor(y_dev.values)

# Check the lengths before and after encoding
assert len(X_dev) == len(X_dev_list) == len(encoded_dev_input['input_ids'] == len(encoded_dev_input['attention_mask'])) == len(y_dev) == len(y_dev_tensor)

In [5]:
X_test, ids = get_raw_dataset(mode='test')
assert len(X_test) == len(ids)

In [6]:
# Vectorize the test data
X_test_list = X_test.tolist()

# Load the tokenizer
tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')
encoded_test_input = tokenizer(
    X_test_list,
    padding=True,
    truncation=True,
    max_length=32,
    return_tensors='tf'
)

# Check the lengths before and after encoding
assert len(X_test) == len(X_test_list) == len(encoded_test_input['input_ids']) == len(encoded_test_input['attention_mask']) == len(ids)

## Make Predictions based on the Model

In [7]:
def predict(model, input, tokenizer):
    """
    Generate predictions for the given input using the provided model and tokenizer.

    Args:
        model (tf.keras.Model): The fine-tuned model used for making predictions.
        input (str): The input text to be predicted.
        tokenizer (DistilBertTokenizer): The tokenizer used to encode the input text.

    Returns:
        int: The predicted class according to the maximum probability from the model's output logits.
        
    Author:
        Kelvin Mock
    """
    # Encode the input for prediction
    predict_input = tokenizer.encode(
        input,
        truncation=True,
        padding="max_length",  # Ensures a fixed-length input
        max_length=5,  # Must match training settings
        return_tensors="tf"
    )

    # Convert the input to a dictionary with the key 'input_ids'
    predict_input_dict = {'input_ids': predict_input}

    tf_output = model.predict(predict_input_dict)

    probabilities = tf.nn.softmax(tf_output["logits"][0]).numpy()

    return np.argmax(probabilities)

In [None]:
### Prediction on Validation (dev set) Data
pred_dev = np.array([])
for devText in X_dev_list:
    currPred = predict(
        model=model,
        input=devText,
        tokenizer=tokenizer
    )
    pred_dev = np.append(pred_dev, currPred)



In [9]:
# Check the size of prediction array
assert len(pred_dev) == len(X_dev_list)
print("Length of Prediction Array: ", len(pred_dev))

Length of Prediction Array:  5000


In [10]:
# Evaluate the model on the dev set
acc = accuracy_score(y_dev, pred_dev)
macro_f1 = f1_score(y_dev, pred_dev, average='macro')
micro_f1 = f1_score(y_dev, pred_dev, average='micro')

print("Evaluation on Dev Set:")
print(f"Accuracy: {acc:.4f}")
print(f"Macro F1: {macro_f1:.4f}")
print(f"Micro F1: {micro_f1:.4f}")

Evaluation on Dev Set:
Accuracy: 0.5000
Macro F1: 0.3333
Micro F1: 0.5000


In [11]:
# Save the predictions
try:
    if not os.path.exists('predictions'):
        os.makedirs('predictions')
    np.save('predictions/DISTILBERT_pred_dev.npy', pred_dev) # predicted labels
    np.save('predictions/DISTILBERT_y_dev.npy', y_dev) # true labels
except OSError as e:
    print(f"Error saving predictions: {e}")
else:
    print("Predictions saved to 'predictions' folder")

Predictions saved to 'predictions' folder


In [12]:
### Prediction on Test Data

# Define a relative path for the output file
relative_output_file = os.path.join(os.curdir, 'content', 'Result_distilBERT.jsonl')

with open(relative_output_file, 'w') as f:
    # Prediction Loop
    for id_val, testText in zip(ids, X_test_list):
        currPred = predict(
            model=model,
            input=testText,
            tokenizer=tokenizer
        )
        # Write predictions to the output file in JSONL format
        result = {"id": id_val, "label": int(currPred)}
        f.write(json.dumps(result) + "\n")

print(f"\nPrediction file '{relative_output_file}' has been generated.")


Prediction file './content/Result_distilBERT.jsonl' has been generated.
