### Import

In [20]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer
import torch
import evaluate
import numpy as np
import pandas as pd
from transformers import DataCollatorWithPadding
from datasets import load_dataset, DatasetDict, Dataset

In [21]:
import os
import sys
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
if project_root not in sys.path:
    sys.path.insert(0, project_root)
from hf_data import *
from metrics import *


In [22]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cpu


### Dataset

In [23]:
ds = emotions()
train = emotions('train')
val = emotions('validation')
test = emotions('test')

In [24]:
ds

DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 16000
    })
    validation: Dataset({
        features: ['text', 'label'],
        num_rows: 2000
    })
    test: Dataset({
        features: ['text', 'label'],
        num_rows: 2000
    })
})

### Model

In [25]:
l = labels_and_ids()
print(l)

{'i2l': {0: 'sadness', 1: 'joy', 2: 'love', 3: 'anger', 4: 'fear', 5: 'surprise'}, 'l2i': {'sadness': 0, 'joy': 1, 'love': 2, 'anger': 3, 'fear': 4, 'surprise': 5}}


In [26]:
model_path = "distilbert/distilbert-base-uncased"
finetune_model_dir = r"E:\codes\advanced_nlp\hf_emotion_classifier\models\distillbert_finetuned_model\distillbert_finetuned_model"
tokenizer = AutoTokenizer.from_pretrained(finetune_model_dir)
model = AutoModelForSequenceClassification.from_pretrained(finetune_model_dir, num_labels = 6, id2label=l['i2l'], label2id = l['l2i'],)


In [27]:
print(model.config.id2label)
print(model.config.label2id)


{0: 'sadness', 1: 'joy', 2: 'love', 3: 'anger', 4: 'fear', 5: 'surprise'}
{'sadness': 0, 'joy': 1, 'love': 2, 'anger': 3, 'fear': 4, 'surprise': 5}


In [28]:
max_length = 128
def tokenize_batch(batch):
    return tokenizer(batch["text"], truncation=True, padding='max_length', max_length=max_length)
# map datasets
tokenized_train = train.map(lambda x: tokenize_batch(x), batched=True)
tokenized_valid = val.map(lambda x: tokenize_batch(x), batched=True)
tokenized_test  = test.map(lambda x: tokenize_batch(x), batched=True)
# tokenized_calibration = calibration_data.map(tokenize_batch, batched=True)  

# Set format for PyTorch compatibility
cols = ["input_ids", "attention_mask", "label"]
tokenized_train.set_format(type="torch", columns=cols)
tokenized_valid.set_format(type="torch", columns=cols)
tokenized_test.set_format(type="torch", columns=cols)
# tokenized_calibration.set_format(type="torch", columns=cols)
# 

Map: 100%|██████████| 2000/2000 [00:00<00:00, 10964.02 examples/s]


In [29]:
print_model_params(model)

distilbert.embeddings.word_embeddings.weight                  shape=(30522, 768)  dtype=torch.float32
distilbert.embeddings.position_embeddings.weight              shape=(512, 768)  dtype=torch.float32
distilbert.embeddings.LayerNorm.weight                        shape=(768,)  dtype=torch.float32
distilbert.embeddings.LayerNorm.bias                          shape=(768,)  dtype=torch.float32
distilbert.transformer.layer.0.attention.q_lin.weight         shape=(768, 768)  dtype=torch.float32
distilbert.transformer.layer.0.attention.q_lin.bias           shape=(768,)  dtype=torch.float32
distilbert.transformer.layer.0.attention.k_lin.weight         shape=(768, 768)  dtype=torch.float32
distilbert.transformer.layer.0.attention.k_lin.bias           shape=(768,)  dtype=torch.float32
distilbert.transformer.layer.0.attention.v_lin.weight         shape=(768, 768)  dtype=torch.float32
distilbert.transformer.layer.0.attention.v_lin.bias           shape=(768,)  dtype=torch.float32
distilbert.transfo

 currently all are in float 32.

## torchao

In [30]:
model.eval()


DistilBertForSequenceClassification(
  (distilbert): DistilBertModel(
    (embeddings): Embeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (transformer): Transformer(
      (layer): ModuleList(
        (0-5): 6 x TransformerBlock(
          (attention): DistilBertSdpaAttention(
            (dropout): Dropout(p=0.1, inplace=False)
            (q_lin): Linear(in_features=768, out_features=768, bias=True)
            (k_lin): Linear(in_features=768, out_features=768, bias=True)
            (v_lin): Linear(in_features=768, out_features=768, bias=True)
            (out_lin): Linear(in_features=768, out_features=768, bias=True)
          )
          (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (ffn): FFN(
            (dropout): Dropout(p=0.1, inplace=False)


In [31]:
model_on_cpu = model.to("cpu")


In [32]:
quantized_model_dynamic = torch.quantization.quantize_dynamic(
    model_on_cpu,
    {torch.nn.Linear},
    dtype=torch.qint8
)
quantized_model_dynamic.to("cpu")

DistilBertForSequenceClassification(
  (distilbert): DistilBertModel(
    (embeddings): Embeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (transformer): Transformer(
      (layer): ModuleList(
        (0-5): 6 x TransformerBlock(
          (attention): DistilBertSdpaAttention(
            (dropout): Dropout(p=0.1, inplace=False)
            (q_lin): DynamicQuantizedLinear(in_features=768, out_features=768, dtype=torch.qint8, qscheme=torch.per_tensor_affine)
            (k_lin): DynamicQuantizedLinear(in_features=768, out_features=768, dtype=torch.qint8, qscheme=torch.per_tensor_affine)
            (v_lin): DynamicQuantizedLinear(in_features=768, out_features=768, dtype=torch.qint8, qscheme=torch.per_tensor_affine)
            (out_lin): DynamicQuantizedLinear(in_features=768, out_featur

In [33]:
quantized_model_dynamic.eval()


DistilBertForSequenceClassification(
  (distilbert): DistilBertModel(
    (embeddings): Embeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (transformer): Transformer(
      (layer): ModuleList(
        (0-5): 6 x TransformerBlock(
          (attention): DistilBertSdpaAttention(
            (dropout): Dropout(p=0.1, inplace=False)
            (q_lin): DynamicQuantizedLinear(in_features=768, out_features=768, dtype=torch.qint8, qscheme=torch.per_tensor_affine)
            (k_lin): DynamicQuantizedLinear(in_features=768, out_features=768, dtype=torch.qint8, qscheme=torch.per_tensor_affine)
            (v_lin): DynamicQuantizedLinear(in_features=768, out_features=768, dtype=torch.qint8, qscheme=torch.per_tensor_affine)
            (out_lin): DynamicQuantizedLinear(in_features=768, out_featur

In [35]:

save_path = r"E:\codes\advanced_nlp\hf_emotion_classifier\models\ptq_distilbert"
os.makedirs(save_path, exist_ok=True)

torch.save(quantized_model_dynamic, os.path.join(save_path, "quantized_model.pt"))
tokenizer.save_pretrained(save_path)

('E:\\codes\\advanced_nlp\\hf_emotion_classifier\\models\\ptq_distilbert\\tokenizer_config.json',
 'E:\\codes\\advanced_nlp\\hf_emotion_classifier\\models\\ptq_distilbert\\special_tokens_map.json',
 'E:\\codes\\advanced_nlp\\hf_emotion_classifier\\models\\ptq_distilbert\\vocab.txt',
 'E:\\codes\\advanced_nlp\\hf_emotion_classifier\\models\\ptq_distilbert\\added_tokens.json',
 'E:\\codes\\advanced_nlp\\hf_emotion_classifier\\models\\ptq_distilbert\\tokenizer.json')