### 1. Read the base model and create the modified model

In [1]:
import torch
from transformers import AutoModelForSequenceClassification, AutoTokenizer
import torch.quantization

from torch import cuda
device = 'cuda' if cuda.is_available() else 'cpu'
print("Using device:", device)


# Step 1: Load the pre-trained Hugging Face model
model_name = "/workspaces/LLM-Experimentation-Capstone/10_code/bert_pruning_model/bert-distilled-pruned-sem_eval-english"
model = AutoModelForSequenceClassification.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Step 2: Prepare input data and inference pipeline
input_text = "This is an example sentence to test quantization."
inputs = tokenizer(input_text, return_tensors="pt", padding=True, truncation=True)

# Step 3: Create a quantization configuration and quantize the model
quantized_model = torch.quantization.quantize_dynamic(
    model,  # Original model
    qconfig_spec={torch.nn.Linear},  # Configuration for quantizing linear layers
    dtype=torch.qint8,  # Quantization data type (int8)
)

  from .autonotebook import tqdm as notebook_tqdm


Using device: cuda


### 2. Evaluate the modified model

##### 2.1 Model Size

In [2]:
param_size = 0
for param in model.parameters():
    param_size += param.nelement() * param.element_size()
buffer_size = 0
for buffer in model.buffers():
    buffer_size += buffer.nelement() * buffer.element_size()

base_model_size = (param_size + buffer_size) / 1024**2
print('Base model size: {:.3f}MB'.format(base_model_size))

param_size = 0
for param in quantized_model.parameters():
    param_size += param.nelement() * param.element_size()
buffer_size = 0
for buffer in quantized_model.buffers():
    buffer_size += buffer.nelement() * buffer.element_size()

quantized_model_size = (param_size + buffer_size) / 1024**2
print('Base model size: {:.3f}MB'.format(quantized_model_size))

Base model size: 255.443MB
Base model size: 91.000MB


In [3]:
from datasets import load_dataset
from sklearn.metrics import f1_score, roc_auc_score, accuracy_score
import time
import pandas as pd
dataset = load_dataset("sem_eval_2018_task_1", "subtask5.english")
dataset = dataset['validation']

input = [x['Tweet'] for x in dataset]

def convert_dict_to_labels(dictionary):
    return [int(dictionary[key]) for key in dictionary if key not in ['ID', 'Tweet']]
# Iterate through the list of dictionaries and convert each one
labels = [convert_dict_to_labels(data_dict) for data_dict in dataset]
print("Labels:", labels[0])
print("Text:" , input[0])

def multi_label_metrics(predictions, labels, threshold=0.5):
    # first, apply sigmoid on predictions which are of shape (batch_size, num_labels)
    sigmoid = torch.nn.Sigmoid()
    probs = [sigmoid(tensor) for tensor in predictions]
    # next, use threshold to turn them into integer predictions
    y_pred = []
    for inner_list in probs:
        new_inner_list = [1 if value > threshold else 0 for value in list(inner_list[0])]
        y_pred.append(new_inner_list)

    # finally, compute metrics
    y_true = labels
    f1_micro_average = f1_score(y_true=y_true, y_pred=y_pred, average='micro')
    roc_auc = roc_auc_score(y_true, y_pred, average = 'micro')
    accuracy = accuracy_score(y_true, y_pred)
    # return as dictionary
    metrics = {'f1': f1_micro_average,
               'roc_auc': roc_auc,
               'accuracy': accuracy}
    return metrics

predictions = []
times = []

for input_ in input:
    inputs = tokenizer(input_, return_tensors="pt", padding=True, truncation=True)
    st = time.time()
    quantized_output = quantized_model(**inputs).logits
    predictions.append(quantized_output)
    times.append(time.time() - st)

print(pd.Series(times).describe().T)
print(multi_label_metrics(predictions, labels))


Labels: [1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0]
Text: @RanaAyyub @rajnathsingh Oh, hidden revenge and anger...I rememberthe time,she rebutted you.
count    886.000000
mean       0.020949
std        0.020954
min        0.008423
25%        0.013354
50%        0.016047
75%        0.021345
max        0.253119
dtype: float64
{'f1': 0.6638779527559056, 'roc_auc': 0.7756045703834104, 'accuracy': 0.20090293453724606}
