In [1]:
!pip show sparseml-nightly

Name: sparseml-nightly
Version: 1.6.0.20231105
Summary: Libraries for applying sparsification recipes to neural networks with a few lines of code, enabling faster and smaller models
Home-page: https://github.com/neuralmagic/sparseml
Author: Neuralmagic, Inc.
Author-email: support@neuralmagic.com
License: Apache
Location: /home/rshaw/zephyr-training/sparseml-env/lib/python3.10/site-packages
Requires: click, GPUtil, ipywidgets, jupyter, matplotlib, merge-args, numpy, onnx, packaging, pandas, progressbar2, protobuf, psutil, pydantic, pyyaml, requests, scikit-image, scikit-learn, scipy, setuptools, sparsezoo-nightly, toposort, tqdm
Required-by: 


### **Confirm Model Is Working Okay**

In [2]:
# from transformers import AutoModelForCausalLM, AutoTokenizer

# model_id = "HuggingFaceH4/mistral-7b-sft-beta"
# tokenizer = AutoTokenizer.from_pretrained(model_id)
# model = AutoModelForCausalLM.from_pretrained(model_id)

In [3]:
# from datasets import load_dataset

# dataset_id = "HuggingFaceH4/ultrachat_200k"
# dataset = load_dataset("HuggingFaceH4/ultrachat_200k")

In [4]:
# chat_template = tokenizer.apply_chat_template(dataset["train_sft"][0]["messages"][:-1], tokenize=False, add_generation_prompt=True)
# print(chat_template)

# tokens = tokenizer(chat_template, return_tensors="pt")

In [5]:
# preds = model.generate(**tokens, max_new_tokens=20)
# tokenizer.batch_decode(preds)

### **Construct Calib Dataset**

In [1]:
import tqdm
from sparseml.transformers.data.base_llm import TransformersDataset
from transformers import AutoTokenizer

system_prompt = {
    "content": "You are a friendly chatbot",
    "role": "system"
}

class ChatDataset(TransformersDataset):
    def __init__(
        self,
        model,
        seqlen,
        nsamples,
        path,
        seed: int = 0,
        split: str = "train_sft",
        split_percent_to_use: float = 1.0,
    ):
        super().__init__(
            model=model,
            seqlen=seqlen,
            nsamples=nsamples,
            path=path,
            name=None,
            seed=seed,
            split=split,
            use_max_tokens=False,
            split_percent_to_use=split_percent_to_use,
        )

        tok = AutoTokenizer.from_pretrained(model)

        processed_data = []
        for sample in tqdm.tqdm(self._data):
            assert "messages" in sample
            messages_with_sys_prompt = [system_prompt] + sample["messages"]
            processed_data.append(
                tok.apply_chat_template(
                    messages_with_sys_prompt, 
                    tokenize=False
                )
            )

        # print(processed_data[-1])
            
        self.create_dataloader(processed_data)

This is CRITICAL for the proper application of quantization in SparseML flows.

To resolve this, please run: `pip uninstall transformers;pip install nm-transformers`
Failing to do so is UNSUPPORTED and may significantly affect model performance.
****************************************************************


In [2]:
model_id = "HuggingFaceH4/mistral-7b-sft-beta"
dataset_id = "HuggingFaceH4/ultrachat_200k"

In [3]:
dataset = ChatDataset(
    model=model_id,
    seqlen=512,
    nsamples=512,
    path=dataset_id
)

calibration_data = dataset.loader
tokenizer = dataset.tokenizer

  table = cls._concat_blocks(blocks, axis=0)
100%|██████████| 512/512 [00:00<00:00, 12980.20it/s]


In [4]:
from transformers import AutoModelForCausalLM
import torch

model = AutoModelForCausalLM.from_pretrained(model_id)
print(model.dtype)
print(model.device)
print(model)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

torch.float32
cpu
MistralForCausalLM(
  (model): MistralModel(
    (embed_tokens): Embedding(32000, 4096)
    (layers): ModuleList(
      (0-31): 32 x MistralDecoderLayer(
        (self_attn): MistralAttention(
          (q_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear(in_features=4096, out_features=1024, bias=False)
          (v_proj): Linear(in_features=4096, out_features=1024, bias=False)
          (o_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (rotary_emb): MistralRotaryEmbedding()
        )
        (mlp): MistralMLP(
          (gate_proj): Linear(in_features=4096, out_features=14336, bias=False)
          (up_proj): Linear(in_features=4096, out_features=14336, bias=False)
          (down_proj): Linear(in_features=14336, out_features=4096, bias=False)
          (act_fn): SiLUActivation()
        )
        (input_layernorm): MistralRMSNorm()
        (post_attention_layernorm): MistralRMSNorm()
      )
    )
   

In [5]:
import sparseml.core.session as session_manager
from sparseml.core.framework import Framework

recipe_file = "/home/rshaw/zephyr-training/pruning/recipe-50sparse.yaml"

session_manager.create_session()
session = session_manager.active_session()
session.apply(
    framework=Framework.pytorch,
    recipe=recipe_file,
    model=model,
    calib_data=calibration_data,
    start=0.0,
    device="cuda",
    copy_data=False,
)




------------------------------------------------------------
SKIPPING MODEL.to(device)
------------------------------------------------------------





2023-11-06 17:59:29 sparseml.modifiers.obcq.pytorch INFO     
===== Compressing layer 1/32 to sparsity 0.5 =====



----------------------------------------------------------------------------------------------
moving inputs to gpu...
----------------------------------------------------------------------------------------------
done...


2023-11-06 17:59:44 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.q_proj of layer 0
2023-11-06 17:59:46 sparseml.modifiers.obcq.utils.sparsegpt INFO     time 1.31
2023-11-06 17:59:46 sparseml.modifiers.obcq.utils.sparsegpt INFO     error 0.05
2023-11-06 18:00:00 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.k_proj of layer 0
2023-11-06 18:00:01 sparseml.modifiers.obcq.utils.sparsegpt INFO     time 1.24
2023-11-06 18:00:01 sparseml.modifiers.obcq.utils.sparsegpt INFO     error 0.12
2023-11-06 18:00:17 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.v_proj of layer 0
2023-11-06 18:00:18 sparseml.modifiers.obcq.utils.sparsegpt INFO     time 1.26
2023-11-06 18:00:18 sparseml.modifiers.obcq.utils.sparsegpt INFO     error 0.30
2023-11-06 18:00:34 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.o_proj of layer 0
2023-11-06 18:00:35 sparseml.mo

torch.cuda.memory_allocated: 4.457424GB
torch.cuda.max_memory_allocated: 7.816967GB
torch.cuda.max_memory_reserved: 8.798828GB



----------------------------------------------------------------------------------------------
moving inputs to gpu...
----------------------------------------------------------------------------------------------
done...


2023-11-06 18:02:17 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.q_proj of layer 1
2023-11-06 18:02:18 sparseml.modifiers.obcq.utils.sparsegpt INFO     time 1.31
2023-11-06 18:02:18 sparseml.modifiers.obcq.utils.sparsegpt INFO     error 30.10
2023-11-06 18:02:35 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.k_proj of layer 1
2023-11-06 18:02:36 sparseml.modifiers.obcq.utils.sparsegpt INFO     time 1.31
2023-11-06 18:02:36 sparseml.modifiers.obcq.utils.sparsegpt INFO     error 9.00
2023-11-06 18:02:53 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.v_proj of layer 1
2023-11-06 18:02:54 sparseml.modifiers.obcq.utils.sparsegpt INFO     time 1.33
2023-11-06 18:02:54 sparseml.modifiers.obcq.utils.sparsegpt INFO     error 5.26
2023-11-06 18:03:11 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.o_proj of layer 1
2023-11-06 18:03:12 sparseml.m

torch.cuda.memory_allocated: 4.457302GB
torch.cuda.max_memory_allocated: 7.816967GB
torch.cuda.max_memory_reserved: 8.806641GB



----------------------------------------------------------------------------------------------
moving inputs to gpu...
----------------------------------------------------------------------------------------------
done...


2023-11-06 18:04:57 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.q_proj of layer 2
2023-11-06 18:04:58 sparseml.modifiers.obcq.utils.sparsegpt INFO     time 1.33
2023-11-06 18:04:58 sparseml.modifiers.obcq.utils.sparsegpt INFO     error 2555.46
2023-11-06 18:05:15 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.k_proj of layer 2
2023-11-06 18:05:16 sparseml.modifiers.obcq.utils.sparsegpt INFO     time 1.33
2023-11-06 18:05:16 sparseml.modifiers.obcq.utils.sparsegpt INFO     error 901.78
2023-11-06 18:05:33 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.v_proj of layer 2
2023-11-06 18:05:34 sparseml.modifiers.obcq.utils.sparsegpt INFO     time 1.33
2023-11-06 18:05:34 sparseml.modifiers.obcq.utils.sparsegpt INFO     error 210.92
2023-11-06 18:05:51 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.o_proj of layer 2
2023-11-06 18:05:53 spar

torch.cuda.memory_allocated: 4.457302GB
torch.cuda.max_memory_allocated: 7.816967GB
torch.cuda.max_memory_reserved: 8.806641GB



----------------------------------------------------------------------------------------------
moving inputs to gpu...
----------------------------------------------------------------------------------------------
done...


2023-11-06 18:07:37 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.q_proj of layer 3
2023-11-06 18:07:39 sparseml.modifiers.obcq.utils.sparsegpt INFO     time 1.33
2023-11-06 18:07:39 sparseml.modifiers.obcq.utils.sparsegpt INFO     error 2140.45
2023-11-06 18:07:56 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.k_proj of layer 3
2023-11-06 18:07:57 sparseml.modifiers.obcq.utils.sparsegpt INFO     time 1.33
2023-11-06 18:07:57 sparseml.modifiers.obcq.utils.sparsegpt INFO     error 771.81
2023-11-06 18:08:14 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.v_proj of layer 3
2023-11-06 18:08:15 sparseml.modifiers.obcq.utils.sparsegpt INFO     time 1.34
2023-11-06 18:08:15 sparseml.modifiers.obcq.utils.sparsegpt INFO     error 234.46
2023-11-06 18:08:32 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.o_proj of layer 3
2023-11-06 18:08:33 spar

torch.cuda.memory_allocated: 4.457302GB
torch.cuda.max_memory_allocated: 7.816967GB
torch.cuda.max_memory_reserved: 8.806641GB



----------------------------------------------------------------------------------------------
moving inputs to gpu...
----------------------------------------------------------------------------------------------
done...


2023-11-06 18:10:18 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.q_proj of layer 4
2023-11-06 18:10:19 sparseml.modifiers.obcq.utils.sparsegpt INFO     time 1.32
2023-11-06 18:10:19 sparseml.modifiers.obcq.utils.sparsegpt INFO     error 3627.70
2023-11-06 18:10:36 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.k_proj of layer 4
2023-11-06 18:10:37 sparseml.modifiers.obcq.utils.sparsegpt INFO     time 1.33
2023-11-06 18:10:37 sparseml.modifiers.obcq.utils.sparsegpt INFO     error 1286.52
2023-11-06 18:10:54 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.v_proj of layer 4
2023-11-06 18:10:55 sparseml.modifiers.obcq.utils.sparsegpt INFO     time 1.32
2023-11-06 18:10:55 sparseml.modifiers.obcq.utils.sparsegpt INFO     error 440.39
2023-11-06 18:11:12 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.o_proj of layer 4
2023-11-06 18:11:13 spa

torch.cuda.memory_allocated: 4.457302GB
torch.cuda.max_memory_allocated: 7.816967GB
torch.cuda.max_memory_reserved: 8.806641GB



----------------------------------------------------------------------------------------------
moving inputs to gpu...
----------------------------------------------------------------------------------------------
done...


2023-11-06 18:12:58 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.q_proj of layer 5
2023-11-06 18:12:59 sparseml.modifiers.obcq.utils.sparsegpt INFO     time 1.33
2023-11-06 18:12:59 sparseml.modifiers.obcq.utils.sparsegpt INFO     error 5885.69
2023-11-06 18:13:16 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.k_proj of layer 5
2023-11-06 18:13:17 sparseml.modifiers.obcq.utils.sparsegpt INFO     time 1.33
2023-11-06 18:13:17 sparseml.modifiers.obcq.utils.sparsegpt INFO     error 2182.52
2023-11-06 18:13:34 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.v_proj of layer 5
2023-11-06 18:13:35 sparseml.modifiers.obcq.utils.sparsegpt INFO     time 1.33
2023-11-06 18:13:35 sparseml.modifiers.obcq.utils.sparsegpt INFO     error 586.41
2023-11-06 18:13:52 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.o_proj of layer 5
2023-11-06 18:13:54 spa

torch.cuda.memory_allocated: 4.457302GB
torch.cuda.max_memory_allocated: 7.816967GB
torch.cuda.max_memory_reserved: 8.806641GB



----------------------------------------------------------------------------------------------
moving inputs to gpu...
----------------------------------------------------------------------------------------------
done...


2023-11-06 18:15:38 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.q_proj of layer 6
2023-11-06 18:15:39 sparseml.modifiers.obcq.utils.sparsegpt INFO     time 1.32
2023-11-06 18:15:39 sparseml.modifiers.obcq.utils.sparsegpt INFO     error 4999.64
2023-11-06 18:15:56 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.k_proj of layer 6
2023-11-06 18:15:57 sparseml.modifiers.obcq.utils.sparsegpt INFO     time 1.32
2023-11-06 18:15:57 sparseml.modifiers.obcq.utils.sparsegpt INFO     error 1878.91
2023-11-06 18:16:14 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.v_proj of layer 6
2023-11-06 18:16:15 sparseml.modifiers.obcq.utils.sparsegpt INFO     time 1.32
2023-11-06 18:16:15 sparseml.modifiers.obcq.utils.sparsegpt INFO     error 557.31
2023-11-06 18:16:32 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.o_proj of layer 6
2023-11-06 18:16:33 spa

torch.cuda.memory_allocated: 4.457302GB
torch.cuda.max_memory_allocated: 7.816967GB
torch.cuda.max_memory_reserved: 8.806641GB



----------------------------------------------------------------------------------------------
moving inputs to gpu...
----------------------------------------------------------------------------------------------
done...


2023-11-06 18:18:17 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.q_proj of layer 7
2023-11-06 18:18:19 sparseml.modifiers.obcq.utils.sparsegpt INFO     time 1.33
2023-11-06 18:18:19 sparseml.modifiers.obcq.utils.sparsegpt INFO     error 5964.08
2023-11-06 18:18:35 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.k_proj of layer 7
2023-11-06 18:18:37 sparseml.modifiers.obcq.utils.sparsegpt INFO     time 1.32
2023-11-06 18:18:37 sparseml.modifiers.obcq.utils.sparsegpt INFO     error 2250.06
2023-11-06 18:18:54 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.v_proj of layer 7
2023-11-06 18:18:55 sparseml.modifiers.obcq.utils.sparsegpt INFO     time 1.32
2023-11-06 18:18:55 sparseml.modifiers.obcq.utils.sparsegpt INFO     error 707.32
2023-11-06 18:19:12 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.o_proj of layer 7
2023-11-06 18:19:13 spa

torch.cuda.memory_allocated: 4.457302GB
torch.cuda.max_memory_allocated: 7.816967GB
torch.cuda.max_memory_reserved: 8.806641GB



----------------------------------------------------------------------------------------------
moving inputs to gpu...
----------------------------------------------------------------------------------------------
done...


2023-11-06 18:20:57 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.q_proj of layer 8
2023-11-06 18:20:59 sparseml.modifiers.obcq.utils.sparsegpt INFO     time 1.32
2023-11-06 18:20:59 sparseml.modifiers.obcq.utils.sparsegpt INFO     error 6425.66
2023-11-06 18:21:15 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.k_proj of layer 8
2023-11-06 18:21:17 sparseml.modifiers.obcq.utils.sparsegpt INFO     time 1.32
2023-11-06 18:21:17 sparseml.modifiers.obcq.utils.sparsegpt INFO     error 2361.46
2023-11-06 18:21:33 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.v_proj of layer 8
2023-11-06 18:21:35 sparseml.modifiers.obcq.utils.sparsegpt INFO     time 1.33
2023-11-06 18:21:35 sparseml.modifiers.obcq.utils.sparsegpt INFO     error 751.06
2023-11-06 18:21:51 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.o_proj of layer 8
2023-11-06 18:21:53 spa

torch.cuda.memory_allocated: 4.457302GB
torch.cuda.max_memory_allocated: 7.816967GB
torch.cuda.max_memory_reserved: 8.806641GB



----------------------------------------------------------------------------------------------
moving inputs to gpu...
----------------------------------------------------------------------------------------------
done...


2023-11-06 18:23:37 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.q_proj of layer 9
2023-11-06 18:23:39 sparseml.modifiers.obcq.utils.sparsegpt INFO     time 1.32
2023-11-06 18:23:39 sparseml.modifiers.obcq.utils.sparsegpt INFO     error 8542.04
2023-11-06 18:23:55 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.k_proj of layer 9
2023-11-06 18:23:57 sparseml.modifiers.obcq.utils.sparsegpt INFO     time 1.32
2023-11-06 18:23:57 sparseml.modifiers.obcq.utils.sparsegpt INFO     error 3270.70
2023-11-06 18:24:13 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.v_proj of layer 9
2023-11-06 18:24:15 sparseml.modifiers.obcq.utils.sparsegpt INFO     time 1.32
2023-11-06 18:24:15 sparseml.modifiers.obcq.utils.sparsegpt INFO     error 995.81
2023-11-06 18:24:32 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.o_proj of layer 9
2023-11-06 18:24:33 spa

torch.cuda.memory_allocated: 4.457302GB
torch.cuda.max_memory_allocated: 7.816967GB
torch.cuda.max_memory_reserved: 8.806641GB



----------------------------------------------------------------------------------------------
moving inputs to gpu...
----------------------------------------------------------------------------------------------
done...


2023-11-06 18:26:17 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.q_proj of layer 10
2023-11-06 18:26:19 sparseml.modifiers.obcq.utils.sparsegpt INFO     time 1.32
2023-11-06 18:26:19 sparseml.modifiers.obcq.utils.sparsegpt INFO     error 7801.41
2023-11-06 18:26:35 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.k_proj of layer 10
2023-11-06 18:26:37 sparseml.modifiers.obcq.utils.sparsegpt INFO     time 1.32
2023-11-06 18:26:37 sparseml.modifiers.obcq.utils.sparsegpt INFO     error 3253.01
2023-11-06 18:26:53 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.v_proj of layer 10
2023-11-06 18:26:55 sparseml.modifiers.obcq.utils.sparsegpt INFO     time 1.32
2023-11-06 18:26:55 sparseml.modifiers.obcq.utils.sparsegpt INFO     error 826.11
2023-11-06 18:27:12 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.o_proj of layer 10
2023-11-06 18:27:13

torch.cuda.memory_allocated: 4.457302GB
torch.cuda.max_memory_allocated: 7.816967GB
torch.cuda.max_memory_reserved: 8.806641GB



----------------------------------------------------------------------------------------------
moving inputs to gpu...
----------------------------------------------------------------------------------------------
done...


2023-11-06 18:28:57 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.q_proj of layer 11
2023-11-06 18:28:58 sparseml.modifiers.obcq.utils.sparsegpt INFO     time 1.32
2023-11-06 18:28:58 sparseml.modifiers.obcq.utils.sparsegpt INFO     error 8717.14
2023-11-06 18:29:15 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.k_proj of layer 11
2023-11-06 18:29:16 sparseml.modifiers.obcq.utils.sparsegpt INFO     time 1.32
2023-11-06 18:29:16 sparseml.modifiers.obcq.utils.sparsegpt INFO     error 3448.88
2023-11-06 18:29:33 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.v_proj of layer 11
2023-11-06 18:29:34 sparseml.modifiers.obcq.utils.sparsegpt INFO     time 1.33
2023-11-06 18:29:34 sparseml.modifiers.obcq.utils.sparsegpt INFO     error 1139.76
2023-11-06 18:29:51 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.o_proj of layer 11
2023-11-06 18:29:5

torch.cuda.memory_allocated: 4.457302GB
torch.cuda.max_memory_allocated: 7.816967GB
torch.cuda.max_memory_reserved: 8.806641GB



----------------------------------------------------------------------------------------------
moving inputs to gpu...
----------------------------------------------------------------------------------------------
done...


2023-11-06 18:31:36 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.q_proj of layer 12
2023-11-06 18:31:38 sparseml.modifiers.obcq.utils.sparsegpt INFO     time 1.32
2023-11-06 18:31:38 sparseml.modifiers.obcq.utils.sparsegpt INFO     error 9960.01
2023-11-06 18:31:54 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.k_proj of layer 12
2023-11-06 18:31:56 sparseml.modifiers.obcq.utils.sparsegpt INFO     time 1.32
2023-11-06 18:31:56 sparseml.modifiers.obcq.utils.sparsegpt INFO     error 3558.50
2023-11-06 18:32:12 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.v_proj of layer 12
2023-11-06 18:32:14 sparseml.modifiers.obcq.utils.sparsegpt INFO     time 1.32
2023-11-06 18:32:14 sparseml.modifiers.obcq.utils.sparsegpt INFO     error 1244.84
2023-11-06 18:32:30 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.o_proj of layer 12
2023-11-06 18:32:3

torch.cuda.memory_allocated: 4.457302GB
torch.cuda.max_memory_allocated: 7.816967GB
torch.cuda.max_memory_reserved: 8.806641GB



----------------------------------------------------------------------------------------------
moving inputs to gpu...
----------------------------------------------------------------------------------------------
done...


2023-11-06 18:34:16 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.q_proj of layer 13
2023-11-06 18:34:17 sparseml.modifiers.obcq.utils.sparsegpt INFO     time 1.34
2023-11-06 18:34:17 sparseml.modifiers.obcq.utils.sparsegpt INFO     error 11112.79
2023-11-06 18:34:34 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.k_proj of layer 13
2023-11-06 18:34:35 sparseml.modifiers.obcq.utils.sparsegpt INFO     time 1.34
2023-11-06 18:34:35 sparseml.modifiers.obcq.utils.sparsegpt INFO     error 4720.46
2023-11-06 18:34:52 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.v_proj of layer 13
2023-11-06 18:34:53 sparseml.modifiers.obcq.utils.sparsegpt INFO     time 1.32
2023-11-06 18:34:53 sparseml.modifiers.obcq.utils.sparsegpt INFO     error 1435.06
2023-11-06 18:35:10 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.o_proj of layer 13
2023-11-06 18:35:

torch.cuda.memory_allocated: 4.457302GB
torch.cuda.max_memory_allocated: 7.816967GB
torch.cuda.max_memory_reserved: 8.806641GB



----------------------------------------------------------------------------------------------
moving inputs to gpu...
----------------------------------------------------------------------------------------------
done...


2023-11-06 18:36:55 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.q_proj of layer 14
2023-11-06 18:36:57 sparseml.modifiers.obcq.utils.sparsegpt INFO     time 1.32
2023-11-06 18:36:57 sparseml.modifiers.obcq.utils.sparsegpt INFO     error 10507.15
2023-11-06 18:37:13 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.k_proj of layer 14
2023-11-06 18:37:15 sparseml.modifiers.obcq.utils.sparsegpt INFO     time 1.32
2023-11-06 18:37:15 sparseml.modifiers.obcq.utils.sparsegpt INFO     error 3792.59
2023-11-06 18:37:32 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.v_proj of layer 14
2023-11-06 18:37:33 sparseml.modifiers.obcq.utils.sparsegpt INFO     time 1.33
2023-11-06 18:37:33 sparseml.modifiers.obcq.utils.sparsegpt INFO     error 2013.50
2023-11-06 18:37:50 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.o_proj of layer 14
2023-11-06 18:37:

torch.cuda.memory_allocated: 4.457302GB
torch.cuda.max_memory_allocated: 7.816967GB
torch.cuda.max_memory_reserved: 8.806641GB



----------------------------------------------------------------------------------------------
moving inputs to gpu...
----------------------------------------------------------------------------------------------
done...


2023-11-06 18:39:36 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.q_proj of layer 15
2023-11-06 18:39:37 sparseml.modifiers.obcq.utils.sparsegpt INFO     time 1.32
2023-11-06 18:39:37 sparseml.modifiers.obcq.utils.sparsegpt INFO     error 13886.56
2023-11-06 18:39:54 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.k_proj of layer 15
2023-11-06 18:39:55 sparseml.modifiers.obcq.utils.sparsegpt INFO     time 1.32
2023-11-06 18:39:55 sparseml.modifiers.obcq.utils.sparsegpt INFO     error 4490.59
2023-11-06 18:40:12 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.v_proj of layer 15
2023-11-06 18:40:13 sparseml.modifiers.obcq.utils.sparsegpt INFO     time 1.32
2023-11-06 18:40:13 sparseml.modifiers.obcq.utils.sparsegpt INFO     error 2519.37
2023-11-06 18:40:30 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.o_proj of layer 15
2023-11-06 18:40:

torch.cuda.memory_allocated: 4.457302GB
torch.cuda.max_memory_allocated: 7.816967GB
torch.cuda.max_memory_reserved: 8.806641GB



----------------------------------------------------------------------------------------------
moving inputs to gpu...
----------------------------------------------------------------------------------------------
done...


2023-11-06 18:42:16 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.q_proj of layer 16
2023-11-06 18:42:17 sparseml.modifiers.obcq.utils.sparsegpt INFO     time 1.33
2023-11-06 18:42:17 sparseml.modifiers.obcq.utils.sparsegpt INFO     error 15176.93
2023-11-06 18:42:34 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.k_proj of layer 16
2023-11-06 18:42:36 sparseml.modifiers.obcq.utils.sparsegpt INFO     time 1.33
2023-11-06 18:42:36 sparseml.modifiers.obcq.utils.sparsegpt INFO     error 5555.56
2023-11-06 18:42:52 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.v_proj of layer 16
2023-11-06 18:42:54 sparseml.modifiers.obcq.utils.sparsegpt INFO     time 1.33
2023-11-06 18:42:54 sparseml.modifiers.obcq.utils.sparsegpt INFO     error 2694.75
2023-11-06 18:43:11 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.o_proj of layer 16
2023-11-06 18:43:

torch.cuda.memory_allocated: 4.457302GB
torch.cuda.max_memory_allocated: 7.816967GB
torch.cuda.max_memory_reserved: 8.806641GB



----------------------------------------------------------------------------------------------
moving inputs to gpu...
----------------------------------------------------------------------------------------------
done...


2023-11-06 18:44:57 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.q_proj of layer 17
2023-11-06 18:44:58 sparseml.modifiers.obcq.utils.sparsegpt INFO     time 1.32
2023-11-06 18:44:58 sparseml.modifiers.obcq.utils.sparsegpt INFO     error 13043.64
2023-11-06 18:45:15 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.k_proj of layer 17
2023-11-06 18:45:16 sparseml.modifiers.obcq.utils.sparsegpt INFO     time 1.32
2023-11-06 18:45:16 sparseml.modifiers.obcq.utils.sparsegpt INFO     error 4639.60
2023-11-06 18:45:33 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.v_proj of layer 17
2023-11-06 18:45:34 sparseml.modifiers.obcq.utils.sparsegpt INFO     time 1.32
2023-11-06 18:45:34 sparseml.modifiers.obcq.utils.sparsegpt INFO     error 2200.51
2023-11-06 18:45:51 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.o_proj of layer 17
2023-11-06 18:45:

torch.cuda.memory_allocated: 4.457302GB
torch.cuda.max_memory_allocated: 7.816967GB
torch.cuda.max_memory_reserved: 8.806641GB



----------------------------------------------------------------------------------------------
moving inputs to gpu...
----------------------------------------------------------------------------------------------
done...


2023-11-06 18:47:37 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.q_proj of layer 18
2023-11-06 18:47:38 sparseml.modifiers.obcq.utils.sparsegpt INFO     time 1.33
2023-11-06 18:47:38 sparseml.modifiers.obcq.utils.sparsegpt INFO     error 15093.23
2023-11-06 18:47:55 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.k_proj of layer 18
2023-11-06 18:47:56 sparseml.modifiers.obcq.utils.sparsegpt INFO     time 1.32
2023-11-06 18:47:56 sparseml.modifiers.obcq.utils.sparsegpt INFO     error 4884.28
2023-11-06 18:48:13 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.v_proj of layer 18
2023-11-06 18:48:15 sparseml.modifiers.obcq.utils.sparsegpt INFO     time 1.32
2023-11-06 18:48:15 sparseml.modifiers.obcq.utils.sparsegpt INFO     error 2700.34
2023-11-06 18:48:31 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.o_proj of layer 18
2023-11-06 18:48:

torch.cuda.memory_allocated: 4.457302GB
torch.cuda.max_memory_allocated: 7.816967GB
torch.cuda.max_memory_reserved: 8.806641GB



----------------------------------------------------------------------------------------------
moving inputs to gpu...
----------------------------------------------------------------------------------------------
done...


2023-11-06 18:50:17 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.q_proj of layer 19
2023-11-06 18:50:18 sparseml.modifiers.obcq.utils.sparsegpt INFO     time 1.32
2023-11-06 18:50:18 sparseml.modifiers.obcq.utils.sparsegpt INFO     error 17659.00
2023-11-06 18:50:35 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.k_proj of layer 19
2023-11-06 18:50:36 sparseml.modifiers.obcq.utils.sparsegpt INFO     time 1.33
2023-11-06 18:50:36 sparseml.modifiers.obcq.utils.sparsegpt INFO     error 5346.69
2023-11-06 18:50:53 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.v_proj of layer 19
2023-11-06 18:50:55 sparseml.modifiers.obcq.utils.sparsegpt INFO     time 1.32
2023-11-06 18:50:55 sparseml.modifiers.obcq.utils.sparsegpt INFO     error 3621.70
2023-11-06 18:51:11 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.o_proj of layer 19
2023-11-06 18:51:

torch.cuda.memory_allocated: 4.457302GB
torch.cuda.max_memory_allocated: 7.816967GB
torch.cuda.max_memory_reserved: 8.806641GB



----------------------------------------------------------------------------------------------
moving inputs to gpu...
----------------------------------------------------------------------------------------------
done...


2023-11-06 18:52:57 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.q_proj of layer 20
2023-11-06 18:52:58 sparseml.modifiers.obcq.utils.sparsegpt INFO     time 1.33
2023-11-06 18:52:58 sparseml.modifiers.obcq.utils.sparsegpt INFO     error 17734.67
2023-11-06 18:53:15 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.k_proj of layer 20
2023-11-06 18:53:16 sparseml.modifiers.obcq.utils.sparsegpt INFO     time 1.32
2023-11-06 18:53:16 sparseml.modifiers.obcq.utils.sparsegpt INFO     error 5696.65
2023-11-06 18:53:33 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.v_proj of layer 20
2023-11-06 18:53:35 sparseml.modifiers.obcq.utils.sparsegpt INFO     time 1.32
2023-11-06 18:53:35 sparseml.modifiers.obcq.utils.sparsegpt INFO     error 3875.21
2023-11-06 18:53:51 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.o_proj of layer 20
2023-11-06 18:53:

torch.cuda.memory_allocated: 4.457302GB
torch.cuda.max_memory_allocated: 7.816967GB
torch.cuda.max_memory_reserved: 8.806641GB



----------------------------------------------------------------------------------------------
moving inputs to gpu...
----------------------------------------------------------------------------------------------
done...


2023-11-06 18:55:37 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.q_proj of layer 21
2023-11-06 18:55:39 sparseml.modifiers.obcq.utils.sparsegpt INFO     time 1.32
2023-11-06 18:55:39 sparseml.modifiers.obcq.utils.sparsegpt INFO     error 18332.68
2023-11-06 18:55:55 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.k_proj of layer 21
2023-11-06 18:55:57 sparseml.modifiers.obcq.utils.sparsegpt INFO     time 1.32
2023-11-06 18:55:57 sparseml.modifiers.obcq.utils.sparsegpt INFO     error 5851.79
2023-11-06 18:56:14 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.v_proj of layer 21
2023-11-06 18:56:15 sparseml.modifiers.obcq.utils.sparsegpt INFO     time 1.32
2023-11-06 18:56:15 sparseml.modifiers.obcq.utils.sparsegpt INFO     error 4031.51
2023-11-06 18:56:32 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.o_proj of layer 21
2023-11-06 18:56:

torch.cuda.memory_allocated: 4.457302GB
torch.cuda.max_memory_allocated: 7.816967GB
torch.cuda.max_memory_reserved: 8.806641GB



----------------------------------------------------------------------------------------------
moving inputs to gpu...
----------------------------------------------------------------------------------------------
done...


2023-11-06 18:58:18 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.q_proj of layer 22
2023-11-06 18:58:19 sparseml.modifiers.obcq.utils.sparsegpt INFO     time 1.33
2023-11-06 18:58:19 sparseml.modifiers.obcq.utils.sparsegpt INFO     error 17970.44
2023-11-06 18:58:36 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.k_proj of layer 22
2023-11-06 18:58:37 sparseml.modifiers.obcq.utils.sparsegpt INFO     time 1.32
2023-11-06 18:58:37 sparseml.modifiers.obcq.utils.sparsegpt INFO     error 5665.70
2023-11-06 18:58:54 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.v_proj of layer 22
2023-11-06 18:58:55 sparseml.modifiers.obcq.utils.sparsegpt INFO     time 1.33
2023-11-06 18:58:55 sparseml.modifiers.obcq.utils.sparsegpt INFO     error 4354.91
2023-11-06 18:59:12 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.o_proj of layer 22
2023-11-06 18:59:

torch.cuda.memory_allocated: 4.457302GB
torch.cuda.max_memory_allocated: 7.816967GB
torch.cuda.max_memory_reserved: 8.806641GB



----------------------------------------------------------------------------------------------
moving inputs to gpu...
----------------------------------------------------------------------------------------------
done...


2023-11-06 19:00:58 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.q_proj of layer 23
2023-11-06 19:01:00 sparseml.modifiers.obcq.utils.sparsegpt INFO     time 1.33
2023-11-06 19:01:00 sparseml.modifiers.obcq.utils.sparsegpt INFO     error 19067.62
2023-11-06 19:01:17 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.k_proj of layer 23
2023-11-06 19:01:18 sparseml.modifiers.obcq.utils.sparsegpt INFO     time 1.33
2023-11-06 19:01:18 sparseml.modifiers.obcq.utils.sparsegpt INFO     error 5819.25
2023-11-06 19:01:35 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.v_proj of layer 23
2023-11-06 19:01:36 sparseml.modifiers.obcq.utils.sparsegpt INFO     time 1.32
2023-11-06 19:01:36 sparseml.modifiers.obcq.utils.sparsegpt INFO     error 4767.92
2023-11-06 19:01:53 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.o_proj of layer 23
2023-11-06 19:01:

torch.cuda.memory_allocated: 4.457302GB
torch.cuda.max_memory_allocated: 7.816967GB
torch.cuda.max_memory_reserved: 8.806641GB



----------------------------------------------------------------------------------------------
moving inputs to gpu...
----------------------------------------------------------------------------------------------
done...


2023-11-06 19:03:39 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.q_proj of layer 24
2023-11-06 19:03:40 sparseml.modifiers.obcq.utils.sparsegpt INFO     time 1.33
2023-11-06 19:03:40 sparseml.modifiers.obcq.utils.sparsegpt INFO     error 21455.66
2023-11-06 19:03:57 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.k_proj of layer 24
2023-11-06 19:03:59 sparseml.modifiers.obcq.utils.sparsegpt INFO     time 1.33
2023-11-06 19:03:59 sparseml.modifiers.obcq.utils.sparsegpt INFO     error 6989.91
2023-11-06 19:04:15 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.v_proj of layer 24
2023-11-06 19:04:17 sparseml.modifiers.obcq.utils.sparsegpt INFO     time 1.34
2023-11-06 19:04:17 sparseml.modifiers.obcq.utils.sparsegpt INFO     error 5740.30
2023-11-06 19:04:34 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.o_proj of layer 24
2023-11-06 19:04:

torch.cuda.memory_allocated: 4.457302GB
torch.cuda.max_memory_allocated: 7.816967GB
torch.cuda.max_memory_reserved: 8.806641GB



----------------------------------------------------------------------------------------------
moving inputs to gpu...
----------------------------------------------------------------------------------------------
done...


2023-11-06 19:06:19 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.q_proj of layer 25
2023-11-06 19:06:21 sparseml.modifiers.obcq.utils.sparsegpt INFO     time 1.32
2023-11-06 19:06:21 sparseml.modifiers.obcq.utils.sparsegpt INFO     error 22018.10
2023-11-06 19:06:37 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.k_proj of layer 25
2023-11-06 19:06:39 sparseml.modifiers.obcq.utils.sparsegpt INFO     time 1.32
2023-11-06 19:06:39 sparseml.modifiers.obcq.utils.sparsegpt INFO     error 6854.61
2023-11-06 19:06:55 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.v_proj of layer 25
2023-11-06 19:06:57 sparseml.modifiers.obcq.utils.sparsegpt INFO     time 1.33
2023-11-06 19:06:57 sparseml.modifiers.obcq.utils.sparsegpt INFO     error 6303.19
2023-11-06 19:07:14 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.o_proj of layer 25
2023-11-06 19:07:

torch.cuda.memory_allocated: 4.457302GB
torch.cuda.max_memory_allocated: 7.816967GB
torch.cuda.max_memory_reserved: 8.806641GB



----------------------------------------------------------------------------------------------
moving inputs to gpu...
----------------------------------------------------------------------------------------------
done...


2023-11-06 19:08:59 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.q_proj of layer 26
2023-11-06 19:09:00 sparseml.modifiers.obcq.utils.sparsegpt INFO     time 1.33
2023-11-06 19:09:00 sparseml.modifiers.obcq.utils.sparsegpt INFO     error 19295.73
2023-11-06 19:09:17 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.k_proj of layer 26
2023-11-06 19:09:19 sparseml.modifiers.obcq.utils.sparsegpt INFO     time 1.32
2023-11-06 19:09:19 sparseml.modifiers.obcq.utils.sparsegpt INFO     error 5761.50
2023-11-06 19:09:35 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.v_proj of layer 26
2023-11-06 19:09:37 sparseml.modifiers.obcq.utils.sparsegpt INFO     time 1.35
2023-11-06 19:09:37 sparseml.modifiers.obcq.utils.sparsegpt INFO     error 6137.34
2023-11-06 19:09:53 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.o_proj of layer 26
2023-11-06 19:09:

torch.cuda.memory_allocated: 4.457302GB
torch.cuda.max_memory_allocated: 7.816967GB
torch.cuda.max_memory_reserved: 8.806641GB



----------------------------------------------------------------------------------------------
moving inputs to gpu...
----------------------------------------------------------------------------------------------
done...


2023-11-06 19:11:39 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.q_proj of layer 27
2023-11-06 19:11:41 sparseml.modifiers.obcq.utils.sparsegpt INFO     time 1.33
2023-11-06 19:11:41 sparseml.modifiers.obcq.utils.sparsegpt INFO     error 21405.12
2023-11-06 19:11:57 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.k_proj of layer 27
2023-11-06 19:11:59 sparseml.modifiers.obcq.utils.sparsegpt INFO     time 1.32
2023-11-06 19:11:59 sparseml.modifiers.obcq.utils.sparsegpt INFO     error 6950.20
2023-11-06 19:12:16 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.v_proj of layer 27
2023-11-06 19:12:17 sparseml.modifiers.obcq.utils.sparsegpt INFO     time 1.32
2023-11-06 19:12:17 sparseml.modifiers.obcq.utils.sparsegpt INFO     error 6213.89
2023-11-06 19:12:34 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.o_proj of layer 27
2023-11-06 19:12:

torch.cuda.memory_allocated: 4.457302GB
torch.cuda.max_memory_allocated: 7.816967GB
torch.cuda.max_memory_reserved: 8.806641GB



----------------------------------------------------------------------------------------------
moving inputs to gpu...
----------------------------------------------------------------------------------------------
done...


2023-11-06 19:14:20 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.q_proj of layer 28
2023-11-06 19:14:21 sparseml.modifiers.obcq.utils.sparsegpt INFO     time 1.33
2023-11-06 19:14:21 sparseml.modifiers.obcq.utils.sparsegpt INFO     error 16715.63
2023-11-06 19:14:38 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.k_proj of layer 28
2023-11-06 19:14:39 sparseml.modifiers.obcq.utils.sparsegpt INFO     time 1.33
2023-11-06 19:14:39 sparseml.modifiers.obcq.utils.sparsegpt INFO     error 4556.25
2023-11-06 19:14:56 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.v_proj of layer 28
2023-11-06 19:14:57 sparseml.modifiers.obcq.utils.sparsegpt INFO     time 1.33
2023-11-06 19:14:57 sparseml.modifiers.obcq.utils.sparsegpt INFO     error 6831.86
2023-11-06 19:15:14 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.o_proj of layer 28
2023-11-06 19:15:

torch.cuda.memory_allocated: 4.457302GB
torch.cuda.max_memory_allocated: 7.816967GB
torch.cuda.max_memory_reserved: 8.806641GB



----------------------------------------------------------------------------------------------
moving inputs to gpu...
----------------------------------------------------------------------------------------------
done...


2023-11-06 19:17:00 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.q_proj of layer 29
2023-11-06 19:17:02 sparseml.modifiers.obcq.utils.sparsegpt INFO     time 1.33
2023-11-06 19:17:02 sparseml.modifiers.obcq.utils.sparsegpt INFO     error 18767.50
2023-11-06 19:17:19 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.k_proj of layer 29
2023-11-06 19:17:20 sparseml.modifiers.obcq.utils.sparsegpt INFO     time 1.34
2023-11-06 19:17:20 sparseml.modifiers.obcq.utils.sparsegpt INFO     error 5116.62
2023-11-06 19:17:37 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.v_proj of layer 29
2023-11-06 19:17:38 sparseml.modifiers.obcq.utils.sparsegpt INFO     time 1.33
2023-11-06 19:17:38 sparseml.modifiers.obcq.utils.sparsegpt INFO     error 10216.00
2023-11-06 19:17:55 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.o_proj of layer 29
2023-11-06 19:17

torch.cuda.memory_allocated: 4.457302GB
torch.cuda.max_memory_allocated: 7.816967GB
torch.cuda.max_memory_reserved: 8.806641GB



----------------------------------------------------------------------------------------------
moving inputs to gpu...
----------------------------------------------------------------------------------------------
done...


2023-11-06 19:19:41 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.q_proj of layer 30
2023-11-06 19:19:42 sparseml.modifiers.obcq.utils.sparsegpt INFO     time 1.32
2023-11-06 19:19:42 sparseml.modifiers.obcq.utils.sparsegpt INFO     error 15666.87
2023-11-06 19:19:59 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.k_proj of layer 30
2023-11-06 19:20:00 sparseml.modifiers.obcq.utils.sparsegpt INFO     time 1.32
2023-11-06 19:20:00 sparseml.modifiers.obcq.utils.sparsegpt INFO     error 4018.58
2023-11-06 19:20:17 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.v_proj of layer 30
2023-11-06 19:20:19 sparseml.modifiers.obcq.utils.sparsegpt INFO     time 1.33
2023-11-06 19:20:19 sparseml.modifiers.obcq.utils.sparsegpt INFO     error 9264.97
2023-11-06 19:20:35 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.o_proj of layer 30
2023-11-06 19:20:

torch.cuda.memory_allocated: 4.457302GB
torch.cuda.max_memory_allocated: 7.816967GB
torch.cuda.max_memory_reserved: 8.806641GB



----------------------------------------------------------------------------------------------
moving inputs to gpu...
----------------------------------------------------------------------------------------------
done...


2023-11-06 19:22:21 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.q_proj of layer 31
2023-11-06 19:22:22 sparseml.modifiers.obcq.utils.sparsegpt INFO     time 1.32
2023-11-06 19:22:22 sparseml.modifiers.obcq.utils.sparsegpt INFO     error 14604.77
2023-11-06 19:22:39 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.k_proj of layer 31
2023-11-06 19:22:41 sparseml.modifiers.obcq.utils.sparsegpt INFO     time 1.33
2023-11-06 19:22:41 sparseml.modifiers.obcq.utils.sparsegpt INFO     error 3725.28
2023-11-06 19:22:57 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.v_proj of layer 31
2023-11-06 19:22:59 sparseml.modifiers.obcq.utils.sparsegpt INFO     time 1.34
2023-11-06 19:22:59 sparseml.modifiers.obcq.utils.sparsegpt INFO     error 9093.79
2023-11-06 19:23:15 sparseml.modifiers.obcq.utils.layer_compressor INFO     Compressing module self_attn.o_proj of layer 31
2023-11-06 19:23:

torch.cuda.memory_allocated: 4.457302GB
torch.cuda.max_memory_allocated: 7.816967GB
torch.cuda.max_memory_reserved: 8.806641GB




ModifiedState(model=MistralForCausalLM(
  (model): MistralModel(
    (embed_tokens): Embedding(32000, 4096)
    (layers): ModuleList(
      (0-31): 32 x MistralDecoderLayer(
        (self_attn): MistralAttention(
          (q_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear(in_features=4096, out_features=1024, bias=False)
          (v_proj): Linear(in_features=4096, out_features=1024, bias=False)
          (o_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (rotary_emb): MistralRotaryEmbedding()
        )
        (mlp): MistralMLP(
          (gate_proj): Linear(in_features=4096, out_features=14336, bias=False)
          (up_proj): Linear(in_features=4096, out_features=14336, bias=False)
          (down_proj): Linear(in_features=14336, out_features=4096, bias=False)
          (act_fn): SiLUActivation()
        )
        (input_layernorm): MistralRMSNorm()
        (post_attention_layernorm): MistralRMSNorm()
      )
    )
 

In [6]:
!pwd

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


/home/rshaw/zephyr-training


In [7]:
save_path = "data/one-shot/50sparse-sft-v0"

model.save_pretrained(save_path)
tokenizer.save_pretrained(save_path)

('data/one-shot/50sparse-sft-v0/tokenizer_config.json',
 'data/one-shot/50sparse-sft-v0/special_tokens_map.json',
 'data/one-shot/50sparse-sft-v0/tokenizer.json')

In [12]:
import os
from sparseml.optim.helpers import load_recipe_yaml_str

recipe_output_path = os.path.join(save_path, "recipe.yaml")
with open(recipe_output_path, "w") as fp:
    fp.write(load_recipe_yaml_str(recipe_file))

In [13]:
!ls {save_path}

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


config.json			  model-00006-of-00006.safetensors
generation_config.json		  model.safetensors.index.json
model-00001-of-00006.safetensors  recipe.yaml
model-00002-of-00006.safetensors  special_tokens_map.json
model-00003-of-00006.safetensors  tokenizer_config.json
model-00004-of-00006.safetensors  tokenizer.json
model-00005-of-00006.safetensors
