In [None]:
!git clone https://github.com/sinhajiya/hf_emotion_classifier.git
from google.colab import drive
drive.mount('/content/gdrive')

Cloning into 'hf_emotion_classifier'...
remote: Enumerating objects: 49, done.[K
remote: Counting objects: 100% (49/49), done.[K
remote: Compressing objects: 100% (42/42), done.[K
remote: Total 49 (delta 17), reused 22 (delta 5), pack-reused 0 (from 0)[K
Receiving objects: 100% (49/49), 1.26 MiB | 4.33 MiB/s, done.
Resolving deltas: 100% (17/17), done.
Mounted at /content/gdrive


In [None]:
import torch
import torch.ao.quantization as tq
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from datasets import load_dataset
from torch.utils.data import DataLoader
from tqdm import tqdm
import os
import sys
project_root = os.path.abspath("/content/hf_emotion_classifier")
if project_root not in sys.path:
    sys.path.insert(0, project_root)

from hf_data import *


from torch.optim import AdamW


In [None]:

finetune_model_dir = '/content/gdrive/MyDrive/ANLP_weights/distillbert_finetuned_model'
tokenizer = AutoTokenizer.from_pretrained(finetune_model_dir)
model = AutoModelForSequenceClassification.from_pretrained(finetune_model_dir)
model.train()


DistilBertForSequenceClassification(
  (distilbert): DistilBertModel(
    (embeddings): Embeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (transformer): Transformer(
      (layer): ModuleList(
        (0-5): 6 x TransformerBlock(
          (attention): DistilBertSdpaAttention(
            (dropout): Dropout(p=0.1, inplace=False)
            (q_lin): Linear(in_features=768, out_features=768, bias=True)
            (k_lin): Linear(in_features=768, out_features=768, bias=True)
            (v_lin): Linear(in_features=768, out_features=768, bias=True)
            (out_lin): Linear(in_features=768, out_features=768, bias=True)
          )
          (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (ffn): FFN(
            (dropout): Dropout(p=0.1, inplace=False)


In [None]:
qat_qconfig = tq.get_default_qat_qconfig("fbgemm")

for name, module in model.named_modules():
    if isinstance(module, torch.nn.Linear):
        module.qconfig = qat_qconfig
    else:
        module.qconfig = None


In [None]:
tq.prepare_qat(model, inplace=True)
print("Model prepared")


For migrations of users: 
1. Eager mode quantization (torch.ao.quantization.quantize, torch.ao.quantization.quantize_dynamic), please migrate to use torchao eager mode quantize_ API instead 
2. FX graph mode quantization (torch.ao.quantization.quantize_fx.prepare_fx,torch.ao.quantization.quantize_fx.convert_fx, please migrate to use torchao pt2e quantization API instead (prepare_pt2e, convert_pt2e) 
3. pt2e quantization has been migrated to torchao (https://github.com/pytorch/ao/tree/main/torchao/quantization/pt2e) 
see https://github.com/pytorch/ao/issues/2259 for more details
  tq.prepare_qat(model, inplace=True)


Model prepared


In [None]:
train = emotions('train')

In [None]:

tokenized_train = train.map(lambda x: tokenize_batch(x, tokenizer), batched=True)
tokenized_train.set_format(type="torch", columns=["input_ids", "attention_mask", "label"])

print(tokenized_train)

Dataset({
    features: ['text', 'label', 'input_ids', 'attention_mask'],
    num_rows: 16000
})


In [None]:
train_loader = DataLoader(tokenized_train, batch_size=16, shuffle=True)

optimizer = AdamW(model.parameters(), lr=2e-5)
criterion = torch.nn.CrossEntropyLoss()

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

for epoch in range(5):
    model.train()
    total_loss = 0.0
    correct = 0
    total = 0

    for batch in tqdm(train_loader, desc=f"Epoch {epoch+1}"):
        optimizer.zero_grad()

        inputs = {k: v.to(device) for k, v in batch.items() if k in ["input_ids", "attention_mask"]}
        labels = batch["label"].to(device)

        outputs = model(**inputs)
        logits = outputs.logits
        loss = criterion(logits, labels)

        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        preds = torch.argmax(logits, dim=1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)

    avg_loss = total_loss / len(train_loader)
    train_acc = correct / total * 100

    print(f"Epoch {epoch+1}: Avg Loss = {avg_loss:.4f} | Accuracy = {train_acc:.2f}%\n")


Epoch 1: 100%|██████████| 1000/1000 [03:03<00:00,  5.44it/s]


Epoch 1: Avg Loss = 0.1421 | Accuracy = 94.53%



Epoch 2: 100%|██████████| 1000/1000 [03:02<00:00,  5.47it/s]


Epoch 2: Avg Loss = 0.1071 | Accuracy = 95.21%



Epoch 3: 100%|██████████| 1000/1000 [03:02<00:00,  5.47it/s]


Epoch 3: Avg Loss = 0.0894 | Accuracy = 95.97%



Epoch 4: 100%|██████████| 1000/1000 [03:02<00:00,  5.47it/s]


Epoch 4: Avg Loss = 0.0770 | Accuracy = 96.63%



Epoch 5: 100%|██████████| 1000/1000 [03:02<00:00,  5.47it/s]

Epoch 5: Avg Loss = 0.0640 | Accuracy = 97.36%






In [None]:
model.to("cpu")
model.eval()

quantized_model_qat = tq.convert(model, inplace=False)

For migrations of users: 
1. Eager mode quantization (torch.ao.quantization.quantize, torch.ao.quantization.quantize_dynamic), please migrate to use torchao eager mode quantize_ API instead 
2. FX graph mode quantization (torch.ao.quantization.quantize_fx.prepare_fx,torch.ao.quantization.quantize_fx.convert_fx, please migrate to use torchao pt2e quantization API instead (prepare_pt2e, convert_pt2e) 
3. pt2e quantization has been migrated to torchao (https://github.com/pytorch/ao/tree/main/torchao/quantization/pt2e) 
see https://github.com/pytorch/ao/issues/2259 for more details
  quantized_model_qat = tq.convert(model, inplace=False)


In [None]:
torch.save(quantized_model_qat, r"/content/gdrive/MyDrive/ANLP_weights/qat_distilbert.pt")

