### AWQ Quantiation of model checkpoints
#### Framework: `AutoAWQ by casper-hansen` https://github.com/casper-hansen/AutoAWQ

In [1]:
import torch
import gc
import os
from awq import AutoAWQForCausalLM
from transformers import AutoTokenizer

In [None]:
#Create output directory
os.makedirs("./awq_models", exist_ok = True)

In [3]:
# List of all model checkpoints
model_paths = [
    "training_output/checkpoint-25",
    "training_output/checkpoint-50",
    "training_output/checkpoint-75",
    "training_output/checkpoint-100",
    "training_output/checkpoint-125",
    "training_output/checkpoint-150",
    "training_output/checkpoint-175",
    "training_output/checkpoint-200",
]

In [5]:
# Iterate over each checkpoint and convert it into AWQ
for i in range(0, len(model_paths)):
    raw_model_path = f"./raw_models/{model_paths[i].split('/')[-1]}"
    print(f"Working on {raw_model_path}")
    quant_config = {
      "zero_point": True,
      "q_group_size": 128,
      "w_bit": 4,
      "version": "GEMM",
    }

    # Load model
    model = AutoAWQForCausalLM.from_pretrained(raw_model_path, low_cpu_mem_usage=True, use_cache=False, trust_remote_code=True)
    tokenizer = AutoTokenizer.from_pretrained(raw_model_path)

    # Quantize
    model.quantize(tokenizer, quant_config=quant_config)

    quantized_model_path = f"./awq_models/{model_paths[i].split('/')[-1]}"

    model.save_quantized(quantized_model_path) # Save model
    tokenizer.save_pretrained(quantized_model_path) # Save tokenizer
    del model, tokenizer
    torch.cuda.empty_cache()
    gc.collect()

Working on ./raw_models/checkpoint-25


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Repo card metadata block was not found. Setting CardData to empty.
AWQ: 100%|██████████| 28/28 [16:16<00:00, 34.88s/it]


Working on ./raw_models/checkpoint-50


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Repo card metadata block was not found. Setting CardData to empty.
AWQ: 100%|██████████| 28/28 [16:09<00:00, 34.64s/it]


Working on ./raw_models/checkpoint-75


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Repo card metadata block was not found. Setting CardData to empty.
AWQ: 100%|██████████| 28/28 [15:53<00:00, 34.04s/it]


Working on ./raw_models/checkpoint-100


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Repo card metadata block was not found. Setting CardData to empty.
AWQ: 100%|██████████| 28/28 [15:55<00:00, 34.12s/it]


Working on ./raw_models/checkpoint-125


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Repo card metadata block was not found. Setting CardData to empty.
AWQ: 100%|██████████| 28/28 [15:33<00:00, 33.34s/it]


Working on ./raw_models/checkpoint-150


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Repo card metadata block was not found. Setting CardData to empty.
AWQ: 100%|██████████| 28/28 [15:23<00:00, 32.97s/it]


Working on ./raw_models/checkpoint-175


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Repo card metadata block was not found. Setting CardData to empty.
AWQ: 100%|██████████| 28/28 [15:39<00:00, 33.56s/it]


Working on ./raw_models/checkpoint-200


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Repo card metadata block was not found. Setting CardData to empty.
AWQ: 100%|██████████| 28/28 [16:21<00:00, 35.06s/it]
