In [None]:
import os
import unittest
from transformers import AutoModelForCausalLM, AutoTokenizer
from llmcompressor.modifiers.quantization import QuantizationModifier
from llmcompressor.transformers import oneshot

class TestQuantizationProcess(unittest.TestCase):

    def test_quantization_and_save(self):
        # Load model
        model_stub = "deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
        model_name = model_stub.split("/")[-1]

        model = AutoModelForCausalLM.from_pretrained(
            model_stub,
            torch_dtype="auto",
        )

        tokenizer = AutoTokenizer.from_pretrained(model_stub)

        # Configure the quantization algorithm and scheme
        recipe = QuantizationModifier(
            targets="Linear",
            scheme="FP8_DYNAMIC",
            ignore=["lm_head"],
        )

        # Apply quantization
        oneshot(
            model=model,
            recipe=recipe,
        )

        # Save to disk in compressed-tensors format
        save_path = model_name + "-FP8-dynamic"
        model.save_pretrained(save_path)
        tokenizer.save_pretrained(save_path)

        # Assertions to verify save
        self.assertTrue(os.path.exists(save_path), f"Save path does not exist: {save_path}")
        self.assertTrue(os.path.exists(os.path.join(save_path, "config.json")), "Model config not found")
        self.assertTrue(os.path.exists(os.path.join(save_path, "tokenizer_config.json")), "Tokenizer config not found")

unittest.main(argv=[''], verbosity=2, exit=False)
