In [1]:
# Note, working with 1.7 Deepsparse and SparseML here

from sparseml.transformers import oneshot, SparseAutoModel
from datasets import load_dataset
from transformers import AutoConfig, AutoTokenizer, AutoModel
from typing import Union
from evaluate import evaluator
from sparseml import export
import sparseml.core.session as session_manager

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
### Setup

model_name = "cardiffnlp/twitter-roberta-base-sentiment-latest"
dataset_name = "tweet_eval"
dataset_subname = "sentiment"
config = AutoConfig.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

dataset_train = load_dataset(dataset_name, dataset_subname, split="train")
dataset_test = load_dataset(dataset_name, dataset_subname, split="test").shuffle(seed=420).select(range(500))

In [8]:
recipe = """
test_stage:
  obcq_modifiers:
    SmoothQuantModifier:
      smoothing_strength: 0.5
      mappings: [
            [["re:.*query", "re:.*key", "re:.*value"], "re:.*output.LayerNorm"],
            [["re:.*intermediate.dense"], "re:.*output.LayerNorm"],
        ]
    QuantizationModifier:
      scheme_overrides:
        Embedding:
          input_activations: null
          weights:
            num_bits: 8
            symmetric: false
        Linear:
          input_activations:
            num_bits: 8
            symmetric: false
          weights:
            num_bits: 8
            symmetric: true
    SparseGPTModifier:
      sparsity: 0.0
      quantize: true
      targets: ["re:roberta.encoder.layer.\\\d+$"]
"""

In [9]:
### Apply One-Shot

def format_data(data):
    return {"text": data["text"]}

oneshot(
    model=model_name,
    dataset=dataset_train,
    recipe=recipe,
    preprocessing_func = format_data,
    output_dir="./oneshot_output",
    pad_to_max_length=False,
)



2024-05-10 09:05:47 sparseml.transformers.utils.helpers INFO     model_path is a huggingface model id. Attempting to download recipe from https://huggingface.co/
2024-05-10 09:05:47 sparseml.transformers.utils.helpers INFO     Found recipe: recipe.yaml for model id: cardiffnlp/twitter-roberta-base-sentiment-latest. Downloading...
2024-05-10 09:05:47 sparseml.transformers.utils.helpers INFO     Unable to to find recipe recipe.yaml for model id: cardiffnlp/twitter-roberta-base-sentiment-latest: 404 Client Error. (Request ID: Root=1-663de36b-43d6d733132f1fde79c2a9d1;8802b9c9-52e6-4482-aad5-8a120f59f6ec)

Entry Not Found for url: https://huggingface.co/cardiffnlp/twitter-roberta-base-sentiment-latest/resolve/main/recipe.yaml.. Skipping recipe resolution.
2024-05-10 09:05:47 sparseml.transformers.utils.helpers INFO     Failed to infer the recipe from the model_path
Running tokenizer on dataset: 100%|██████████| 45615/45615 [00:02<00:00, 16540.07 examples/s]
Adding labels: 100%|██████████| 4

['input_ids', 'attention_mask', 'labels']


2024-05-10 09:05:53 sparseml.modifiers.smoothquant.pytorch INFO     Running SmoothQuantModifier calibration with 512 samples...
100%|██████████| 512/512 [00:04<00:00, 116.73it/s]
2024-05-10 09:05:57 sparseml.modifiers.smoothquant.pytorch INFO     Smoothing activation scales...
2024-05-10 09:05:57 sparseml.modifiers.quantization.pytorch INFO     Running QuantizationModifier calibration with 512 samples...
100%|██████████| 512/512 [00:45<00:00, 11.25it/s]
2024-05-10 09:06:43 sparseml.modifiers.pruning.wanda.pytorch INFO     Preparing roberta.encoder.layer.0 for compression
2024-05-10 09:06:43 sparseml.modifiers.pruning.wanda.pytorch INFO     Preparing roberta.encoder.layer.1 for compression
2024-05-10 09:06:43 sparseml.modifiers.pruning.wanda.pytorch INFO     Preparing roberta.encoder.layer.2 for compression
2024-05-10 09:06:43 sparseml.modifiers.pruning.wanda.pytorch INFO     Preparing roberta.encoder.layer.3 for compression
2024-05-10 09:06:43 sparseml.modifiers.pruning.wanda.pytorch I

In [10]:
### Evaluate

active_session = session_manager.active_session()
active_session.reset()

def evaluate_model(model: Union[str, AutoModel]):
    task_evaluator = evaluator("text-classification")
    eval_results = task_evaluator.compute(
        model_or_pipeline=model,
        tokenizer = tokenizer,
        data=dataset_test,
        metric="accuracy",
        label_mapping=config.label2id,
        )
    return eval_results

eval_quant = evaluate_model(SparseAutoModel.text_classification_from_pretrained("./oneshot_output"))
eval_baseline = evaluate_model(model_name)

print(f"Evaluation quantized model:\n{eval_quant}")
print(f"Evaluation baseline model:\n{eval_baseline}")



You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
2024-05-10 09:08:16 sparseml.transformers.utils.helpers INFO     Found recipe in the model_path: ./oneshot_output/recipe.yaml
2024-05-10 09:08:16 sparseml.core.recipe.recipe INFO     Loading recipe from file ./oneshot_output/recipe.yaml
manager stage: Model structure initialized
2024-05-10 09:08:16 sparseml.pytorch.model_load.helpers INFO     Applied an unstaged recipe to the model at ./oneshot_output
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
2024-05-10 09:08:17 sparseml.pytorch.model_load.helpers INFO     Reloaded 1485 model params for SparseML Recipe from ./oneshot_output
2024-05-10 09:08:17 sparseml.pytorch.model_load.helpers INFO     Loaded student from ./oneshot_output with 124647939 total params. Of those there are 85526784 prunable params which have 12.627738931467364 avg sparsity.
2024-05-10 09:08:17 

Evaluation quantized model:
{'accuracy': 0.332, 'total_time_in_seconds': 14.503595827962272, 'samples_per_second': 34.47420942577721, 'latency_in_seconds': 0.029007191655924545}
Evaluation baseline model:
{'accuracy': 0.682, 'total_time_in_seconds': 2.8320093309739605, 'samples_per_second': 176.55309060300456, 'latency_in_seconds': 0.005664018661947921}


In [11]:
### Export
export(source_path = "./oneshot_output", target_path = "./oneshot_deployment", task="text-classification")

2024-05-10 09:08:50 sparseml.export.export INFO     Starting export for transformers model...
2024-05-10 09:08:50 sparseml.transformers.integration_helper_functions INFO     Fetching default helper functions for transformers integration
2024-05-10 09:08:50 sparseml.export.export INFO     Creating model for the export...
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
2024-05-10 09:08:51 sparseml.transformers.utils.helpers INFO     Found recipe in the model_path: /root/sparseml/oneshot_output/recipe.yaml
2024-05-10 09:08:51 sparseml.core.recipe.recipe INFO     Loading recipe from file /root/sparseml/oneshot_output/recipe.yaml
manager stage: Model structure initialized
2024-05-10 09:08:51 sparseml.pytorch.model_load.helpers INFO     Applied an unstaged recipe to the model at /root/sparseml/oneshot_output
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
2024-05-10 0