# Quantizing Models with PyTorch

## Loading the Model

In [None]:
from transformers import GPT2LMHeadModel

model = GPT2LMHeadModel.from_pretrained("gpt2")

## Post-Training Quantization (PTQ)

In [None]:
import torch
from archai.quantization.ptq import dynamic_quantization_torch

torch.set_num_threads(1)
model_qnt = dynamic_quantization_torch(model)

## Comparing Default and Quantized Models

In [None]:
from archai.common.file_utils import calculate_torch_model_size

print(f"Model: {calculate_torch_model_size(model)}MB")
print(f"Model-QNT: {calculate_torch_model_size(model_qnt)}MB")

inputs = {"input_ids": torch.randint(1, 10, (1, 192))}
logits = model(**inputs).logits
logits_qnt = model_qnt(**inputs).logits

print(f"Difference between logits: {logits_qnt - logits}")