In [1]:
import numpy as np
import torch
from datasets import load_dataset, DatasetDict
from transformers import AutoTokenizer, DistilBertForSequenceClassification
from iDistilbert import iDistilBertForSequenceClassification
import torch.quantization
from torch.utils.data import DataLoader
from tqdm import tqdm

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
#Load data and tokenize
student_id = "distilbert/distilbert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(student_id)

dataset = load_dataset("imdb")

def pre_process(examples):
    tokenized = tokenizer(examples["text"], truncation=True, padding = 'max_length',  max_length=512)
    tokenized['label'] = examples['label']
    return tokenized

tokenized_data = dataset.map(pre_process, batched = True)




In [3]:
from transformers import BitsAndBytesConfig
import torch
from transformers import DistilBertForSequenceClassification, AutoModelForSequenceClassification, DistilBertConfig, DataCollatorWithPadding

student_config = DistilBertConfig(
    distance_metric="manhattan_distance",
    activation_function="relu",
    signed_inhibitor=True,
    center=True,
)

student_model = iDistilBertForSequenceClassification(config=student_config)

initialized_weights = torch.load('/shared/Tony/MSc2024/KD_task_specific/inhibit_bert/models/iDistilbert_IMDB.pth')

student_model.load_state_dict(initialized_weights, strict=False)

quantized_model = torch.quantization.quantize_dynamic(
    student_model,  
    {torch.nn.Linear},  # Quantize linear layers
    dtype=torch.qint8  # Quantize to 8-bit integers
)

quantized_model.eval()


2024-10-22 02:25:25.592950: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-10-22 02:25:25.605384: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-10-22 02:25:25.618339: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-10-22 02:25:25.622225: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-10-22 02:25:25.633143: I tensorflow/core/platform/cpu_feature_guar

iDistilBertForSequenceClassification(
  (distilbert): iDistilBertModel(
    (embeddings): Embeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (transformer): iTransformer(
      (layer): ModuleList(
        (0-5): 6 x iTransformerBlock(
          (attention): iMultiHeadSelfAttention(
            (dropout): Dropout(p=0.1, inplace=False)
            (q_lin): DynamicQuantizedLinear(in_features=768, out_features=768, dtype=torch.qint8, qscheme=torch.per_tensor_affine)
            (k_lin): DynamicQuantizedLinear(in_features=768, out_features=768, dtype=torch.qint8, qscheme=torch.per_tensor_affine)
            (v_lin): DynamicQuantizedLinear(in_features=768, out_features=768, dtype=torch.qint8, qscheme=torch.per_tensor_affine)
            (out_lin): DynamicQuantizedLinear(in_features=768, out_fe

In [4]:
import evaluate
import numpy as np

#experiment = comet_ml.get_global_experiment()

accuracy = evaluate.load("accuracy")

def preprocess_logits_for_metrics(logits, labels):
    """
    Preprocess the logits to ensure they are in the correct format for metric computation.
    This function will be called during the evaluation process.
    """
    if isinstance(logits, tuple):  
        logits = logits[0]  # get logit tensors

    pred_ids = torch.argmax(logits, dim=-1)
    
    return pred_ids, labels
    
def compute_metrics(eval_pred):
    
    predictions, labels = eval_pred

    return accuracy.compute(predictions=predictions[0], references=labels)


In [5]:
from transformers import Trainer, TrainingArguments

quantized_model = quantized_model.to('cpu')

data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
         
training_args = TrainingArguments(
    output_dir="./results",  
    per_device_eval_batch_size=2, 
    use_cpu=True,  
    no_cuda = True,
    logging_dir="./logs",  
    logging_steps=10,
)

trainer = Trainer(
    model=quantized_model,
    args=training_args,
    eval_dataset=tokenized_data['test'],
    data_collator = data_collator,
    preprocess_logits_for_metrics = preprocess_logits_for_metrics,
    compute_metrics = compute_metrics,
)

eval_results = trainer.evaluate()
print(f"Accuracy after quantization: {eval_results['eval_accuracy']}")


comet_ml is installed but the Comet API Key is not configured. Please set the `COMET_API_KEY` environment variable to enable Comet logging. Check out the documentation for other ways of configuring it: https://www.comet.com/docs/v2/guides/experiment-management/configure-sdk/#set-the-api-key


Accuracy after quantization: 0.9124
