### Import

In [1]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer
import torch
import evaluate
import numpy as np
import pandas as pd
from transformers import DataCollatorWithPadding
from datasets import load_dataset, DatasetDict, Dataset
import os
import sys
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
if project_root not in sys.path:
    sys.path.insert(0, project_root)
from hf_data import *
from metrics import *
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

  from .autonotebook import tqdm as notebook_tqdm


Using device: cuda


### Dataset

In [3]:
ds = emotions()
train = emotions('train')
val = emotions('validation')
test = emotions('test')

In [4]:
ds

DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 16000
    })
    validation: Dataset({
        features: ['text', 'label'],
        num_rows: 2000
    })
    test: Dataset({
        features: ['text', 'label'],
        num_rows: 2000
    })
})

### Model

In [5]:
l = labels_and_ids()
print(l)

{'i2l': {0: 'sadness', 1: 'joy', 2: 'love', 3: 'anger', 4: 'fear', 5: 'surprise'}, 'l2i': {'sadness': 0, 'joy': 1, 'love': 2, 'anger': 3, 'fear': 4, 'surprise': 5}}


In [6]:
finetune_model_dir = "/home/DSE411/Documents/nlp/hf_emotion_classifier/training_scripts/bert/final_model"
tokenizer = AutoTokenizer.from_pretrained(finetune_model_dir)
model = AutoModelForSequenceClassification.from_pretrained(finetune_model_dir, num_labels = 6, id2label=l['i2l'], label2id = l['l2i'],)


In [7]:
print(model.config.id2label)
print(model.config.label2id)


{0: 'sadness', 1: 'joy', 2: 'love', 3: 'anger', 4: 'fear', 5: 'surprise'}
{'sadness': 0, 'joy': 1, 'love': 2, 'anger': 3, 'fear': 4, 'surprise': 5}


In [10]:
token_data = ds.map(lambda x: tokenize_batch(x, tokenizer), batched=True)
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)


Map: 100%|██████████| 16000/16000 [00:00<00:00, 28923.45 examples/s]


In [11]:
tokenized_train = token_data['train']
tokenized_validation = token_data['validation']
tokenized_test = token_data['test']

In [12]:
cols = ["input_ids", "attention_mask", "label"]
tokenized_train.set_format(type="torch", columns=cols)
tokenized_validation.set_format(type="torch", columns=cols)
tokenized_test.set_format(type="torch", columns=cols)

In [13]:
print_model_params(model)

bert.embeddings.word_embeddings.weight                        shape=(30522, 768)  dtype=torch.float32
bert.embeddings.position_embeddings.weight                    shape=(512, 768)  dtype=torch.float32
bert.embeddings.token_type_embeddings.weight                  shape=(2, 768)  dtype=torch.float32
bert.embeddings.LayerNorm.weight                              shape=(768,)  dtype=torch.float32
bert.embeddings.LayerNorm.bias                                shape=(768,)  dtype=torch.float32
bert.encoder.layer.0.attention.self.query.weight              shape=(768, 768)  dtype=torch.float32
bert.encoder.layer.0.attention.self.query.bias                shape=(768,)  dtype=torch.float32
bert.encoder.layer.0.attention.self.key.weight                shape=(768, 768)  dtype=torch.float32
bert.encoder.layer.0.attention.self.key.bias                  shape=(768,)  dtype=torch.float32
bert.encoder.layer.0.attention.self.value.weight              shape=(768, 768)  dtype=torch.float32
bert.encoder.lay

 currently all are in float 32.

## torchao

In [14]:
model.eval()


BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e

In [15]:
model_on_cpu = model.to("cpu")


In [16]:
quantized_model_dynamic = torch.quantization.quantize_dynamic(
    model_on_cpu,
    {torch.nn.Linear},
    dtype=torch.qint8
)
quantized_model_dynamic.to("cpu")

For migrations of users: 
1. Eager mode quantization (torch.ao.quantization.quantize, torch.ao.quantization.quantize_dynamic), please migrate to use torchao eager mode quantize_ API instead 
2. FX graph mode quantization (torch.ao.quantization.quantize_fx.prepare_fx,torch.ao.quantization.quantize_fx.convert_fx, please migrate to use torchao pt2e quantization API instead (prepare_pt2e, convert_pt2e) 
3. pt2e quantization has been migrated to torchao (https://github.com/pytorch/ao/tree/main/torchao/quantization/pt2e) 
see https://github.com/pytorch/ao/issues/2259 for more details
  quantized_model_dynamic = torch.quantization.quantize_dynamic(


BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): DynamicQuantizedLinear(in_features=768, out_features=768, dtype=torch.qint8, qscheme=torch.per_tensor_affine)
              (key): DynamicQuantizedLinear(in_features=768, out_features=768, dtype=torch.qint8, qscheme=torch.per_tensor_affine)
              (value): DynamicQuantizedLinear(in_features=768, out_features=768, dtype=torch.qint8, qscheme=torch.per_tensor_affine)
              (dropout): Dropout(p=0.1, inplace=False)
        

In [17]:
quantized_model_dynamic.eval()


BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): DynamicQuantizedLinear(in_features=768, out_features=768, dtype=torch.qint8, qscheme=torch.per_tensor_affine)
              (key): DynamicQuantizedLinear(in_features=768, out_features=768, dtype=torch.qint8, qscheme=torch.per_tensor_affine)
              (value): DynamicQuantizedLinear(in_features=768, out_features=768, dtype=torch.qint8, qscheme=torch.per_tensor_affine)
              (dropout): Dropout(p=0.1, inplace=False)
        

In [18]:
save_path = "./ptq"
os.makedirs(save_path, exist_ok=True)

torch.save(quantized_model_dynamic, os.path.join(save_path, "quantized_model.pt"))
tokenizer.save_pretrained(save_path)

('./ptq/tokenizer_config.json',
 './ptq/special_tokens_map.json',
 './ptq/vocab.txt',
 './ptq/added_tokens.json',
 './ptq/tokenizer.json')