In [1]:
import json
import gc
import pandas as pd
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from zeus.monitor import ZeusMonitor
from transformers import pipeline
from transformers import set_seed
from transformers import BitsAndBytesConfig

### Dataset downloading
Dataset URL: [https://huggingface.co/datasets/tinyBenchmarks/tinyAlpacaEval](https://huggingface.co/datasets/tinyBenchmarks/tinyAlpacaEval) 

In [2]:
df = pd.read_parquet("hf://datasets/tinyBenchmarks/tinyAlpacaEval/data/test-00000-of-00001.parquet")
len(df["instruction"])

100

### Set seed and CUDA device

In [3]:
set_seed(42)

In [4]:
torch.cuda.get_device_name()

'NVIDIA RTX A1000 6GB Laptop GPU'

In [5]:
torch.cuda.is_available()

True

### Functions for LLM Benchmarking

In [6]:
def run_text_generation_energy_measurement(pipe, instruction_list, monitor, nb_token=256):
    """
    Measures the energy consumption and time taken for text generation using a given pipeline and a list of instructions.
    Also tracks the number of tokens in the input questions and generated responses.

    Parameters:
    pipe (object): The pipeline object that includes the tokenizer and the model for text generation.
    instruction_list (list of str): A list of instructions or prompts to be used for text generation.
    monitor (object): A Zeus object used to monitor the energy consumption and time during the text generation process.
    nb_token (int, optional): The maximum number of new tokens to generate. Default is 256.

    Returns:
    pd.DataFrame: A DataFrame containing the measured energy consumption, time, instruction dataset index,
                  question tokens, and response tokens.
    dict: A dictionary containing the generated text for each instruction.
    """
    gpu_energy = []
    time = []
    instruction_dataset_index = []
    question_tokens = []
    response_tokens = []
    dict_generated_text = {"data":[]}

    tokenizer = pipe.tokenizer

    for i in range(len(instruction_list)):
        if i % 10 == 0:
            print("Instruction index: ", i)

        message = {"role": "user", "content": instruction_list[i]}
        monitor.begin_window("generation")
        outputs = pipe(
            [message],
            max_new_tokens=nb_token
        )
        result_monitor = monitor.end_window("generation")
        gpu_energy.append(result_monitor.total_energy)
        time.append(result_monitor.time)
        instruction_dataset_index.append(i)

        
        model_inputs = tokenizer.apply_chat_template([message], add_generation_prompt=True, return_tensors="pt").to("cuda")
        input_length = model_inputs.shape[1]

        model_outputs = tokenizer.apply_chat_template([outputs[0]["generated_text"][-1]], add_generation_prompt=True, return_tensors="pt").to("cuda")
        output_length = model_outputs.shape[1]
        
        question_tokens.append(input_length)
        response_tokens.append(output_length)

        dict_generated_text["data"].append({
            "question" : outputs[0]["generated_text"][0],
            "generated_text" : outputs[0]["generated_text"][-1],
        })

    dict_results = {
        "gpu_energy": gpu_energy,
        "time": time,
        "instruction_dataset_index": instruction_dataset_index,
        "question_tokens": question_tokens,
        "response_tokens": response_tokens
    }

    return pd.DataFrame(dict_results), dict_generated_text

In [7]:
def run_evaluation(nb_run, pipe, instruction_list):
    """
    Runs multiple evaluations of text generation energy measurement and aggregates the results.

    Parameters:
    nb_run (int): The number of times to run the evaluation.
    pipe (object): The pipeline object that includes the tokenizer and the model for text generation.
    instruction_list (list of str): A list of instructions or prompts to be used for text generation.

    Returns:
    pd.DataFrame: A DataFrame containing the aggregated results of energy consumption, time, instruction dataset index, and run number.
    dict: A dictionary containing the generated text for each instruction from the last run.
    """
    df_all_results = pd.DataFrame({"gpu_energy": [], "time": [], "instruction_dataset_index":[], "run":[]})
    dict_generated_text = None
    for i in range(0,nb_run):
        print("Run: ", i)
        monitor = ZeusMonitor(gpu_indices=[0])
        results, dict_generated_text = run_text_generation_energy_measurement(pipe, instruction_list, monitor)
        results["run"] = i
        df_all_results = pd.concat([df_all_results, results])
    return df_all_results, dict_generated_text

In [8]:
def eval_models(model_id_list, nb_run, quantization_name, quantization_config, instruction_list, csv_suffixe=""):
    """
    Evaluates multiple models for text generation energy measurement and saves the results.

    Parameters:
    model_id_list (list of str): A list of model IDs to evaluate.
    nb_run (int): The number of times to run the evaluation for each model.
    quantization_name (str): The name of the quantization method used.
    quantization_config (object): The configuration object for quantization.
    instruction_list (list of str): A list of instructions or prompts to be used for text generation.
    csv_suffixe (str, optional): A suffix to add to the output CSV file names. Default is an empty string.

    Returns:
    None
    """
    for model_id in model_id_list:

        if quantization_config:
            model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=quantization_config)
            tokenizer = AutoTokenizer.from_pretrained(model_id)
        else:
            model = AutoModelForCausalLM.from_pretrained(model_id)
            tokenizer = AutoTokenizer.from_pretrained(model_id)

        pipe = pipeline(
            "text-generation",
            model=model,
            tokenizer=AutoTokenizer.from_pretrained(model_id),
            pad_token_id=tokenizer.eos_token_id
        )

        print("========= Begin Model Evaluation " + model_id + " ==========")
        df_all_results, dict_generated_text = run_evaluation(nb_run, pipe, instruction_list)
        df_all_results["quantization"] = quantization_name

        if not quantization_config:
            df_all_results["quantization_object"] = ""
            dict_generated_text["quantization_object"] = ""
        else:
            df_all_results["quantization_object"] = quantization_config.to_json_string().replace("\n", "")
            dict_generated_text["quantization_object"] = quantization_config.to_dict()

        df_all_results["model"] = model_id
        df_all_results["toech_memory_allocated"] = torch.cuda.memory_allocated(0)
        df_all_results.to_csv("results/" + model_id.split("/")[1] + csv_suffixe + ".csv", index=False)

        dict_generated_text["model"] = model_id
        dict_generated_text["quantization"] = quantization_name

        with open("results/LLM_generated_text/" + model_id.split("/")[1] + csv_suffixe + ".json", 'w', encoding='utf-8') as json_file:
            json.dump(dict_generated_text, json_file, ensure_ascii=False, indent=4)

        print("========= End Model Evaluation " + model_id + " ==========")

        del pipe
        del tokenizer
        del model
        torch.cuda.empty_cache()
        gc.collect()

### Llama-3.2-1B-Instruct

In [12]:
model_id_list = ["Qwen/Qwen2.5-0.5B-Instruct", "ibm-granite/granite-3.1-1b-a400m-instruct","HuggingFaceTB/SmolLM2-360M-Instruct", "meta-llama/Llama-3.2-1B-Instruct"]

In [14]:
def eval_models_memory_footprint(model_id_list, quantization_config):
    dict_memory_footprint = {
        "model_memory_footprint": [],
        "model":[],
        "quantization":[]
    }
    for model_id in model_id_list:
        
        if quantization_config : 
            model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=quantization_config)
            tokenizer = AutoTokenizer.from_pretrained(model_id)
            print(quantization_config.is_quantizable())
        else: 
            model = AutoModelForCausalLM.from_pretrained(model_id)
            tokenizer = AutoTokenizer.from_pretrained(model_id)
        
        pipe = pipeline(
            "text-generation",
            model=model,
            tokenizer=AutoTokenizer.from_pretrained(model_id),
            pad_token_id = tokenizer.eos_token_id
        )
        
        print("========= Model Memory Footprint Evaluation "+model_id+" ==========")
        print(model.get_memory_footprint())
        print("========= End  Model Memory Footprint Evaluation "+model_id+" ==========")
        
        del pipe
        del tokenizer
        del model
        torch.cuda.empty_cache()
        gc.collect()

In [15]:
quantization_config = BitsAndBytesConfig(
        load_in_8bit=True,
)
eval_models_memory_footprint(model_id_list, quantization_config)

`low_cpu_mem_usage` was None, now default to True since model is quantized.


True


Device set to use cuda:0


630242176


`low_cpu_mem_usage` was None, now default to True since model is quantized.


True


Device set to use cuda:0


2592972928


`low_cpu_mem_usage` was None, now default to True since model is quantized.


True


Device set to use cuda:0


409073664


`low_cpu_mem_usage` was None, now default to True since model is quantized.


True


Device set to use cuda:0


1498552448


#### Without Quantization

In [8]:
eval_models(model_id_list, nb_run=10, quantization_name="Without", quantization_config=None, instruction_list=df["instruction"])

NameError: name 'model_id_list' is not defined

### 8-bit Quantization

In [11]:
quantization_config = BitsAndBytesConfig(
        load_in_8bit=True,
)
eval_models(model_id_list, nb_run=10, quantization_name="8-bit", quantization_config=quantization_config, instruction_list=df["instruction"], csv_suffixe="-q8bits")

`low_cpu_mem_usage` was None, now default to True since model is quantized.
Device set to use cuda:0


Run:  0
[2024-12-21 04:59:19,081] [zeus.monitor.energy](energy.py:208) Monitoring GPU indices [0].
[2024-12-21 04:59:19,081] [zeus.monitor.energy](energy.py:209) Monitoring CPU indices [0, 1]
Instruction index:  0
285
253
80
285
275
90
285
285
192
285
Instruction index:  10
285
285
285
285
89
285
39
260
221
285
Instruction index:  20
285
285
79
37
213
41
285
148
59
285
Instruction index:  30
285
285
285
224
285
42
88
285
284
285
Instruction index:  40
285
86
41
285
102
65
248
284
41
38
Instruction index:  50
285
285
283
284
285
285
42
174
285
285
Instruction index:  60
75
42
285
285
93
89
285
105
88
285
Instruction index:  70
285
41
285
106
102
215
185
251
38
48
Instruction index:  80
187
137
285
285
41
112
285
264
285
214
Instruction index:  90
285
285
285
285
79
285
285
70
285
74
Run:  1
[2024-12-21 05:10:02,102] [zeus.monitor.energy](energy.py:208) Monitoring GPU indices [0].
[2024-12-21 05:10:02,103] [zeus.monitor.energy](energy.py:209) Monitoring CPU indices [0, 1]
Instruction ind

`low_cpu_mem_usage` was None, now default to True since model is quantized.
Device set to use cuda:0


Run:  0
[2024-12-21 06:48:07,982] [zeus.monitor.energy](energy.py:208) Monitoring GPU indices [0].
[2024-12-21 06:48:07,983] [zeus.monitor.energy](energy.py:209) Monitoring CPU indices [0, 1]
Instruction index:  0
315
65
315
315
180
176
315
315
315
315
Instruction index:  10
315
315
315
315
315
315
72
134
145
315
Instruction index:  20
315
315
315
315
315
67
315
136
91
315
Instruction index:  30
315
191
315
315
315
315
315
315
315
315
Instruction index:  40
315
68
315
315
135
107
315
315
159
145
Instruction index:  50
315
315
147
275
315
315
315
315
315
315
Instruction index:  60
69
315
315
315
60
79
315
117
111
212
Instruction index:  70
315
315
315
236
315
315
233
315
80
82
Instruction index:  80
162
158
315
315
315
202
315
89
315
315
Instruction index:  90
315
315
315
315
125
315
315
78
315
83
Run:  1
[2024-12-21 07:02:33,169] [zeus.monitor.energy](energy.py:208) Monitoring GPU indices [0].
[2024-12-21 07:02:33,169] [zeus.monitor.energy](energy.py:209) Monitoring CPU indices [0, 1]


`low_cpu_mem_usage` was None, now default to True since model is quantized.
Device set to use cuda:0


Run:  0
[2024-12-21 09:11:01,044] [zeus.monitor.energy](energy.py:208) Monitoring GPU indices [0].
[2024-12-21 09:11:01,044] [zeus.monitor.energy](energy.py:209) Monitoring CPU indices [0, 1]
Instruction index:  0
287
37
287
286
139
96
287
287
77
287
Instruction index:  10
287
173
287
201
34
287
41
53
36
287
Instruction index:  20
287
287
62
38
287
43
116
41
58
287
Instruction index:  30
287
287
286
49
287
287
59
85
287
287
Instruction index:  40
287
59
141
287
163
90
217
287
72
87
Instruction index:  50
287
287
41
281
287
287
287
181
60
287
Instruction index:  60
63
99
268
287
39
52
237
101
55
46
Instruction index:  70
209
287
99
56
47
263
145
66
60
42
Instruction index:  80
256
105
287
287
287
60
287
34
287
92
Instruction index:  90
287
287
264
253
82
227
201
144
287
41
Run:  1
[2024-12-21 09:18:54,378] [zeus.monitor.energy](energy.py:208) Monitoring GPU indices [0].
[2024-12-21 09:18:54,378] [zeus.monitor.energy](energy.py:209) Monitoring CPU indices [0, 1]
Instruction index:  0
287

`low_cpu_mem_usage` was None, now default to True since model is quantized.
Device set to use cuda:0


Run:  0
[2024-12-21 10:29:45,012] [zeus.monitor.energy](energy.py:208) Monitoring GPU indices [0].
[2024-12-21 10:29:45,012] [zeus.monitor.energy](energy.py:209) Monitoring CPU indices [0, 1]
Instruction index:  0
291
248
67
291
100
57
291
291
272
291
Instruction index:  10
291
291
291
291
155
176
109
65
133
290
Instruction index:  20
291
291
214
154
291
237
291
291
203
291
Instruction index:  30
291
291
291
291
291
291
291
101
291
291
Instruction index:  40
291
170
291
291
113
73
291
291
182
99
Instruction index:  50
291
291
156
291
291
291
291
291
291
290
Instruction index:  60
109
291
291
72
87
121
291
117
75
290
Instruction index:  70
291
291
291
128
114
291
279
291
90
105
Instruction index:  80
199
122
291
130
291
73
291
291
291
276
Instruction index:  90
291
291
246
291
262
291
288
86
291
229
Run:  1
[2024-12-21 10:37:49,125] [zeus.monitor.energy](energy.py:208) Monitoring GPU indices [0].
[2024-12-21 10:37:49,126] [zeus.monitor.energy](energy.py:209) Monitoring CPU indices [0, 1

### 4-bit Quantization

In [11]:
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16
)
eval_models(model_id_list, nb_run=10, quantization_name="4-bit", quantization_config=quantization_config, instruction_list=df["instruction"], csv_suffixe="-q4bits")

`low_cpu_mem_usage` was None, now default to True since model is quantized.
Device set to use cuda:0


Run:  0
[2024-12-21 11:56:09,381] [zeus.device.gpu.nvidia](nvidia.py:47) pynvml is available and initialized.
[2024-12-21 11:56:09,381] [zeus.device.cpu.rapl](rapl.py:136) RAPL is available.
[2024-12-21 11:56:09,382] [RaplWraparoundTracker](rapl.py:82) Monitoring wrap around of /sys/class/powercap/intel-rapl/intel-rapl:0/energy_uj
[2024-12-21 11:56:09,384] [RaplWraparoundTracker](rapl.py:82) Monitoring wrap around of /sys/class/powercap/intel-rapl/intel-rapl:0/intel-rapl:0:0/energy_uj
[2024-12-21 11:56:09,385] [RaplWraparoundTracker](rapl.py:82) Monitoring wrap around of /sys/class/powercap/intel-rapl/intel-rapl:0/intel-rapl:0:1/energy_uj
[2024-12-21 11:56:09,386] [RaplWraparoundTracker](rapl.py:82) Monitoring wrap around of /sys/class/powercap/intel-rapl/intel-rapl:1/energy_uj
[2024-12-21 11:56:09,386] [zeus.monitor.energy](energy.py:208) Monitoring GPU indices [0].
[2024-12-21 11:56:09,387] [zeus.monitor.energy](energy.py:209) Monitoring CPU indices [0, 1]
Instruction index:  0
[2024

You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset


Instruction index:  10
Instruction index:  20
Instruction index:  30
Instruction index:  40
Instruction index:  50
Instruction index:  60
Instruction index:  70
Instruction index:  80
Instruction index:  90
Run:  1
[2024-12-21 11:59:33,872] [zeus.monitor.energy](energy.py:208) Monitoring GPU indices [0].
[2024-12-21 11:59:33,872] [zeus.monitor.energy](energy.py:209) Monitoring CPU indices [0, 1]
Instruction index:  0
Instruction index:  10
Instruction index:  20
Instruction index:  30
Instruction index:  40
Instruction index:  50
Instruction index:  60
Instruction index:  70
Instruction index:  80
Instruction index:  90
Run:  2
[2024-12-21 12:02:58,874] [zeus.monitor.energy](energy.py:208) Monitoring GPU indices [0].
[2024-12-21 12:02:58,874] [zeus.monitor.energy](energy.py:209) Monitoring CPU indices [0, 1]
Instruction index:  0
Instruction index:  10
Instruction index:  20
Instruction index:  30
Instruction index:  40
Instruction index:  50
Instruction index:  60
Instruction index:  

`low_cpu_mem_usage` was None, now default to True since model is quantized.
Device set to use cuda:0


Run:  0
[2024-12-21 12:29:49,790] [zeus.monitor.energy](energy.py:208) Monitoring GPU indices [0].
[2024-12-21 12:29:49,790] [zeus.monitor.energy](energy.py:209) Monitoring CPU indices [0, 1]
Instruction index:  0
Instruction index:  10
Instruction index:  20
Instruction index:  30
Instruction index:  40
Instruction index:  50
Instruction index:  60
Instruction index:  70
Instruction index:  80
Instruction index:  90
Run:  1
[2024-12-21 12:38:38,043] [zeus.monitor.energy](energy.py:208) Monitoring GPU indices [0].
[2024-12-21 12:38:38,043] [zeus.monitor.energy](energy.py:209) Monitoring CPU indices [0, 1]
Instruction index:  0
Instruction index:  10
Instruction index:  20
Instruction index:  30
Instruction index:  40
Instruction index:  50
Instruction index:  60
Instruction index:  70
Instruction index:  80
Instruction index:  90
Run:  2
[2024-12-21 12:47:16,854] [zeus.monitor.energy](energy.py:208) Monitoring GPU indices [0].
[2024-12-21 12:47:16,855] [zeus.monitor.energy](energy.py:2

`low_cpu_mem_usage` was None, now default to True since model is quantized.
Device set to use cuda:0


Run:  0
[2024-12-21 13:56:30,645] [zeus.monitor.energy](energy.py:208) Monitoring GPU indices [0].
[2024-12-21 13:56:30,645] [zeus.monitor.energy](energy.py:209) Monitoring CPU indices [0, 1]
Instruction index:  0
Instruction index:  10
Instruction index:  20
Instruction index:  30
Instruction index:  40
Instruction index:  50
Instruction index:  60
Instruction index:  70
Instruction index:  80
Instruction index:  90
Run:  1
[2024-12-21 13:59:46,441] [zeus.monitor.energy](energy.py:208) Monitoring GPU indices [0].
[2024-12-21 13:59:46,441] [zeus.monitor.energy](energy.py:209) Monitoring CPU indices [0, 1]
Instruction index:  0
Instruction index:  10
Instruction index:  20
Instruction index:  30
Instruction index:  40
Instruction index:  50
Instruction index:  60
Instruction index:  70
Instruction index:  80
Instruction index:  90
Run:  2
[2024-12-21 14:03:02,388] [zeus.monitor.energy](energy.py:208) Monitoring GPU indices [0].
[2024-12-21 14:03:02,389] [zeus.monitor.energy](energy.py:2



Instruction index:  70
Instruction index:  80
Instruction index:  90
Run:  9
[2024-12-21 14:25:56,089] [zeus.monitor.energy](energy.py:208) Monitoring GPU indices [0].
[2024-12-21 14:25:56,089] [zeus.monitor.energy](energy.py:209) Monitoring CPU indices [0, 1]
Instruction index:  0
Instruction index:  10
Instruction index:  20
Instruction index:  30
Instruction index:  40
Instruction index:  50
Instruction index:  60
Instruction index:  70
Instruction index:  80
Instruction index:  90


`low_cpu_mem_usage` was None, now default to True since model is quantized.
Device set to use cuda:0


Run:  0
[2024-12-21 14:29:16,791] [zeus.monitor.energy](energy.py:208) Monitoring GPU indices [0].
[2024-12-21 14:29:16,792] [zeus.monitor.energy](energy.py:209) Monitoring CPU indices [0, 1]
Instruction index:  0
Instruction index:  10
Instruction index:  20
Instruction index:  30
Instruction index:  40
Instruction index:  50
Instruction index:  60
Instruction index:  70
Instruction index:  80
Instruction index:  90
Run:  1
[2024-12-21 14:33:51,152] [zeus.monitor.energy](energy.py:208) Monitoring GPU indices [0].
[2024-12-21 14:33:51,152] [zeus.monitor.energy](energy.py:209) Monitoring CPU indices [0, 1]
Instruction index:  0
Instruction index:  10
Instruction index:  20
Instruction index:  30
Instruction index:  40
Instruction index:  50
Instruction index:  60
Instruction index:  70
Instruction index:  80
Instruction index:  90
Run:  2
[2024-12-21 14:38:34,462] [zeus.monitor.energy](energy.py:208) Monitoring GPU indices [0].
[2024-12-21 14:38:34,462] [zeus.monitor.energy](energy.py:2

### 4-bit Quantization + double quant

In [10]:
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True
)
eval_models(model_id_list, nb_run=10, quantization_name="4-bit + Nested Quantization", quantization_config=quantization_config,
            instruction_list=df["instruction"], csv_suffixe="-q4bits-double-quant")

`low_cpu_mem_usage` was None, now default to True since model is quantized.
Device set to use cuda:0


Run:  0
[2024-12-21 16:04:52,210] [zeus.device.gpu.nvidia](nvidia.py:47) pynvml is available and initialized.
[2024-12-21 16:04:52,211] [zeus.device.cpu.rapl](rapl.py:136) RAPL is available.
[2024-12-21 16:04:52,211] [RaplWraparoundTracker](rapl.py:82) Monitoring wrap around of /sys/class/powercap/intel-rapl/intel-rapl:0/energy_uj
[2024-12-21 16:04:52,214] [RaplWraparoundTracker](rapl.py:82) Monitoring wrap around of /sys/class/powercap/intel-rapl/intel-rapl:0/intel-rapl:0:0/energy_uj
[2024-12-21 16:04:52,215] [RaplWraparoundTracker](rapl.py:82) Monitoring wrap around of /sys/class/powercap/intel-rapl/intel-rapl:0/intel-rapl:0:1/energy_uj
[2024-12-21 16:04:52,217] [RaplWraparoundTracker](rapl.py:82) Monitoring wrap around of /sys/class/powercap/intel-rapl/intel-rapl:1/energy_uj
[2024-12-21 16:04:52,220] [zeus.monitor.energy](energy.py:208) Monitoring GPU indices [0].
[2024-12-21 16:04:52,220] [zeus.monitor.energy](energy.py:209) Monitoring CPU indices [0, 1]
Instruction index:  0
[2024

You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset


Instruction index:  10
Instruction index:  20
Instruction index:  30
Instruction index:  40
Instruction index:  50
Instruction index:  60
Instruction index:  70
Instruction index:  80
Instruction index:  90
Run:  1
[2024-12-21 16:09:09,724] [zeus.monitor.energy](energy.py:208) Monitoring GPU indices [0].
[2024-12-21 16:09:09,724] [zeus.monitor.energy](energy.py:209) Monitoring CPU indices [0, 1]
Instruction index:  0
Instruction index:  10
Instruction index:  20
Instruction index:  30
Instruction index:  40
Instruction index:  50
Instruction index:  60
Instruction index:  70
Instruction index:  80
Instruction index:  90
Run:  2
[2024-12-21 16:13:10,709] [zeus.monitor.energy](energy.py:208) Monitoring GPU indices [0].
[2024-12-21 16:13:10,709] [zeus.monitor.energy](energy.py:209) Monitoring CPU indices [0, 1]
Instruction index:  0
Instruction index:  10
Instruction index:  20
Instruction index:  30
Instruction index:  40
Instruction index:  50
Instruction index:  60
Instruction index:  

`low_cpu_mem_usage` was None, now default to True since model is quantized.
Device set to use cuda:0


Run:  0
[2024-12-21 16:48:27,372] [zeus.monitor.energy](energy.py:208) Monitoring GPU indices [0].
[2024-12-21 16:48:27,373] [zeus.monitor.energy](energy.py:209) Monitoring CPU indices [0, 1]
Instruction index:  0
Instruction index:  10
Instruction index:  20
Instruction index:  30
Instruction index:  40
Instruction index:  50
Instruction index:  60
Instruction index:  70
Instruction index:  80
Instruction index:  90
Run:  1
[2024-12-21 16:57:47,267] [zeus.monitor.energy](energy.py:208) Monitoring GPU indices [0].
[2024-12-21 16:57:47,267] [zeus.monitor.energy](energy.py:209) Monitoring CPU indices [0, 1]
Instruction index:  0
Instruction index:  10
Instruction index:  20
Instruction index:  30
Instruction index:  40
Instruction index:  50
Instruction index:  60
Instruction index:  70
Instruction index:  80
Instruction index:  90
Run:  2
[2024-12-21 17:06:53,818] [zeus.monitor.energy](energy.py:208) Monitoring GPU indices [0].
[2024-12-21 17:06:53,818] [zeus.monitor.energy](energy.py:2

`low_cpu_mem_usage` was None, now default to True since model is quantized.
Device set to use cuda:0


Run:  0
[2024-12-21 18:15:37,894] [zeus.monitor.energy](energy.py:208) Monitoring GPU indices [0].
[2024-12-21 18:15:37,895] [zeus.monitor.energy](energy.py:209) Monitoring CPU indices [0, 1]
Instruction index:  0
Instruction index:  10
Instruction index:  20
Instruction index:  30
Instruction index:  40
Instruction index:  50
Instruction index:  60
Instruction index:  70
Instruction index:  80
Instruction index:  90
Run:  1
[2024-12-21 18:19:48,869] [zeus.monitor.energy](energy.py:208) Monitoring GPU indices [0].
[2024-12-21 18:19:48,869] [zeus.monitor.energy](energy.py:209) Monitoring CPU indices [0, 1]
Instruction index:  0
Instruction index:  10
Instruction index:  20
Instruction index:  30
Instruction index:  40
Instruction index:  50
Instruction index:  60
Instruction index:  70
Instruction index:  80
Instruction index:  90
Run:  2
[2024-12-21 18:23:58,365] [zeus.monitor.energy](energy.py:208) Monitoring GPU indices [0].
[2024-12-21 18:23:58,366] [zeus.monitor.energy](energy.py:2



Instruction index:  70
Instruction index:  80
Instruction index:  90
Run:  9
[2024-12-21 18:53:10,063] [zeus.monitor.energy](energy.py:208) Monitoring GPU indices [0].
[2024-12-21 18:53:10,063] [zeus.monitor.energy](energy.py:209) Monitoring CPU indices [0, 1]
Instruction index:  0
Instruction index:  10
Instruction index:  20
Instruction index:  30
Instruction index:  40
Instruction index:  50
Instruction index:  60




Instruction index:  70
Instruction index:  80
Instruction index:  90


`low_cpu_mem_usage` was None, now default to True since model is quantized.
Device set to use cuda:0


Run:  0
[2024-12-21 18:57:21,772] [zeus.monitor.energy](energy.py:208) Monitoring GPU indices [0].
[2024-12-21 18:57:21,773] [zeus.monitor.energy](energy.py:209) Monitoring CPU indices [0, 1]
Instruction index:  0
Instruction index:  10
Instruction index:  20
Instruction index:  30
Instruction index:  40
Instruction index:  50
Instruction index:  60
Instruction index:  70
Instruction index:  80
Instruction index:  90
Run:  1
[2024-12-21 19:02:33,492] [zeus.monitor.energy](energy.py:208) Monitoring GPU indices [0].
[2024-12-21 19:02:33,492] [zeus.monitor.energy](energy.py:209) Monitoring CPU indices [0, 1]
Instruction index:  0
Instruction index:  10
Instruction index:  20
Instruction index:  30
Instruction index:  40
Instruction index:  50
Instruction index:  60
Instruction index:  70
Instruction index:  80
Instruction index:  90
Run:  2
[2024-12-21 19:07:47,664] [zeus.monitor.energy](energy.py:208) Monitoring GPU indices [0].
[2024-12-21 19:07:47,665] [zeus.monitor.energy](energy.py:2

In [10]:
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_quant_type="nf4",
)
eval_models(model_id_list, nb_run=10, quantization_name="4-bit + Normal Float 4", quantization_config=quantization_config,
            instruction_list=df["instruction"], csv_suffixe="-q4bits-nf4")

`low_cpu_mem_usage` was None, now default to True since model is quantized.
Device set to use cuda:0


Run:  0
[2024-12-21 20:59:08,743] [zeus.device.gpu.nvidia](nvidia.py:47) pynvml is available and initialized.
[2024-12-21 20:59:08,744] [zeus.device.cpu.rapl](rapl.py:136) RAPL is available.
[2024-12-21 20:59:08,745] [RaplWraparoundTracker](rapl.py:82) Monitoring wrap around of /sys/class/powercap/intel-rapl/intel-rapl:0/energy_uj
[2024-12-21 20:59:08,747] [RaplWraparoundTracker](rapl.py:82) Monitoring wrap around of /sys/class/powercap/intel-rapl/intel-rapl:0/intel-rapl:0:0/energy_uj
[2024-12-21 20:59:08,748] [RaplWraparoundTracker](rapl.py:82) Monitoring wrap around of /sys/class/powercap/intel-rapl/intel-rapl:0/intel-rapl:0:1/energy_uj
[2024-12-21 20:59:08,749] [RaplWraparoundTracker](rapl.py:82) Monitoring wrap around of /sys/class/powercap/intel-rapl/intel-rapl:1/energy_uj
[2024-12-21 20:59:08,749] [zeus.monitor.energy](energy.py:208) Monitoring GPU indices [0].
[2024-12-21 20:59:08,750] [zeus.monitor.energy](energy.py:209) Monitoring CPU indices [0, 1]
Instruction index:  0
[2024

You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset


Instruction index:  10
Instruction index:  20
Instruction index:  30
Instruction index:  40
Instruction index:  50
Instruction index:  60
Instruction index:  70
Instruction index:  80
Instruction index:  90
Run:  1
[2024-12-21 21:02:27,293] [zeus.monitor.energy](energy.py:208) Monitoring GPU indices [0].
[2024-12-21 21:02:27,293] [zeus.monitor.energy](energy.py:209) Monitoring CPU indices [0, 1]
Instruction index:  0




Instruction index:  10
Instruction index:  20
Instruction index:  30
Instruction index:  40
Instruction index:  50
Instruction index:  60
Instruction index:  70
Instruction index:  80
Instruction index:  90
Run:  2
[2024-12-21 21:06:08,040] [zeus.monitor.energy](energy.py:208) Monitoring GPU indices [0].
[2024-12-21 21:06:08,041] [zeus.monitor.energy](energy.py:209) Monitoring CPU indices [0, 1]
Instruction index:  0
Instruction index:  10
Instruction index:  20
Instruction index:  30
Instruction index:  40
Instruction index:  50
Instruction index:  60
Instruction index:  70
Instruction index:  80
Instruction index:  90
Run:  3
[2024-12-21 21:09:40,093] [zeus.monitor.energy](energy.py:208) Monitoring GPU indices [0].
[2024-12-21 21:09:40,094] [zeus.monitor.energy](energy.py:209) Monitoring CPU indices [0, 1]
Instruction index:  0




Instruction index:  10
Instruction index:  20
Instruction index:  30
Instruction index:  40
Instruction index:  50
Instruction index:  60
Instruction index:  70
Instruction index:  80
Instruction index:  90
Run:  4
[2024-12-21 21:13:06,698] [zeus.monitor.energy](energy.py:208) Monitoring GPU indices [0].
[2024-12-21 21:13:06,699] [zeus.monitor.energy](energy.py:209) Monitoring CPU indices [0, 1]
Instruction index:  0
Instruction index:  10
Instruction index:  20
Instruction index:  30
Instruction index:  40
Instruction index:  50
Instruction index:  60
Instruction index:  70
Instruction index:  80
Instruction index:  90
Run:  5
[2024-12-21 21:16:59,699] [zeus.monitor.energy](energy.py:208) Monitoring GPU indices [0].
[2024-12-21 21:16:59,699] [zeus.monitor.energy](energy.py:209) Monitoring CPU indices [0, 1]
Instruction index:  0




Instruction index:  10
Instruction index:  20
Instruction index:  30
Instruction index:  40
Instruction index:  50
Instruction index:  60
Instruction index:  70
Instruction index:  80
Instruction index:  90
Run:  6
[2024-12-21 21:20:32,189] [zeus.monitor.energy](energy.py:208) Monitoring GPU indices [0].
[2024-12-21 21:20:32,190] [zeus.monitor.energy](energy.py:209) Monitoring CPU indices [0, 1]
Instruction index:  0
Instruction index:  10
Instruction index:  20
Instruction index:  30
Instruction index:  40
Instruction index:  50
Instruction index:  60
Instruction index:  70
Instruction index:  80
Instruction index:  90
Run:  7
[2024-12-21 21:24:01,008] [zeus.monitor.energy](energy.py:208) Monitoring GPU indices [0].
[2024-12-21 21:24:01,009] [zeus.monitor.energy](energy.py:209) Monitoring CPU indices [0, 1]
Instruction index:  0
Instruction index:  10
Instruction index:  20
Instruction index:  30
Instruction index:  40
Instruction index:  50
Instruction index:  60
Instruction index:  

`low_cpu_mem_usage` was None, now default to True since model is quantized.
Device set to use cuda:0


Run:  0
[2024-12-21 21:34:31,029] [zeus.monitor.energy](energy.py:208) Monitoring GPU indices [0].
[2024-12-21 21:34:31,029] [zeus.monitor.energy](energy.py:209) Monitoring CPU indices [0, 1]
Instruction index:  0
Instruction index:  10
Instruction index:  20
Instruction index:  30
Instruction index:  40
Instruction index:  50
Instruction index:  60
Instruction index:  70
Instruction index:  80
Instruction index:  90
Run:  1
[2024-12-21 21:42:48,692] [zeus.monitor.energy](energy.py:208) Monitoring GPU indices [0].
[2024-12-21 21:42:48,693] [zeus.monitor.energy](energy.py:209) Monitoring CPU indices [0, 1]
Instruction index:  0
Instruction index:  10
Instruction index:  20
Instruction index:  30
Instruction index:  40
Instruction index:  50
Instruction index:  60
Instruction index:  70
Instruction index:  80
Instruction index:  90
Run:  2
[2024-12-21 21:51:04,648] [zeus.monitor.energy](energy.py:208) Monitoring GPU indices [0].
[2024-12-21 21:51:04,648] [zeus.monitor.energy](energy.py:2

`low_cpu_mem_usage` was None, now default to True since model is quantized.
Device set to use cuda:0


Run:  0
[2024-12-21 22:56:31,046] [zeus.monitor.energy](energy.py:208) Monitoring GPU indices [0].
[2024-12-21 22:56:31,046] [zeus.monitor.energy](energy.py:209) Monitoring CPU indices [0, 1]
Instruction index:  0
Instruction index:  10
Instruction index:  20
Instruction index:  30
Instruction index:  40
Instruction index:  50
Instruction index:  60
Instruction index:  70
Instruction index:  80
Instruction index:  90
Run:  1
[2024-12-21 23:00:17,564] [zeus.monitor.energy](energy.py:208) Monitoring GPU indices [0].
[2024-12-21 23:00:17,564] [zeus.monitor.energy](energy.py:209) Monitoring CPU indices [0, 1]
Instruction index:  0
Instruction index:  10
Instruction index:  20
Instruction index:  30
Instruction index:  40
Instruction index:  50
Instruction index:  60
Instruction index:  70
Instruction index:  80
Instruction index:  90
Run:  2
[2024-12-21 23:04:05,460] [zeus.monitor.energy](energy.py:208) Monitoring GPU indices [0].
[2024-12-21 23:04:05,461] [zeus.monitor.energy](energy.py:2



Instruction index:  70
Instruction index:  80
Instruction index:  90
Run:  7
[2024-12-21 23:23:08,920] [zeus.monitor.energy](energy.py:208) Monitoring GPU indices [0].
[2024-12-21 23:23:08,921] [zeus.monitor.energy](energy.py:209) Monitoring CPU indices [0, 1]
Instruction index:  0
Instruction index:  10
Instruction index:  20
Instruction index:  30
Instruction index:  40
Instruction index:  50
Instruction index:  60
Instruction index:  70
Instruction index:  80
Instruction index:  90
Run:  8
[2024-12-21 23:26:57,170] [zeus.monitor.energy](energy.py:208) Monitoring GPU indices [0].
[2024-12-21 23:26:57,170] [zeus.monitor.energy](energy.py:209) Monitoring CPU indices [0, 1]
Instruction index:  0
Instruction index:  10
Instruction index:  20
Instruction index:  30
Instruction index:  40
Instruction index:  50
Instruction index:  60




Instruction index:  70
Instruction index:  80
Instruction index:  90
Run:  9
[2024-12-21 23:30:46,028] [zeus.monitor.energy](energy.py:208) Monitoring GPU indices [0].
[2024-12-21 23:30:46,028] [zeus.monitor.energy](energy.py:209) Monitoring CPU indices [0, 1]
Instruction index:  0
Instruction index:  10
Instruction index:  20
Instruction index:  30
Instruction index:  40
Instruction index:  50
Instruction index:  60
Instruction index:  70
Instruction index:  80
Instruction index:  90


`low_cpu_mem_usage` was None, now default to True since model is quantized.
Device set to use cuda:0


Run:  0
[2024-12-21 23:34:36,536] [zeus.monitor.energy](energy.py:208) Monitoring GPU indices [0].
[2024-12-21 23:34:36,536] [zeus.monitor.energy](energy.py:209) Monitoring CPU indices [0, 1]
Instruction index:  0
Instruction index:  10
Instruction index:  20
Instruction index:  30
Instruction index:  40
Instruction index:  50
Instruction index:  60
Instruction index:  70
Instruction index:  80
Instruction index:  90
Run:  1
[2024-12-21 23:39:29,661] [zeus.monitor.energy](energy.py:208) Monitoring GPU indices [0].
[2024-12-21 23:39:29,662] [zeus.monitor.energy](energy.py:209) Monitoring CPU indices [0, 1]
Instruction index:  0
Instruction index:  10
Instruction index:  20
Instruction index:  30
Instruction index:  40
Instruction index:  50
Instruction index:  60
Instruction index:  70
Instruction index:  80
Instruction index:  90
Run:  2
[2024-12-21 23:44:07,863] [zeus.monitor.energy](energy.py:208) Monitoring GPU indices [0].
[2024-12-21 23:44:07,864] [zeus.monitor.energy](energy.py:2

### 4-bit Quantization + NF4 + Nested Quantization

In [11]:
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True
)
eval_models(model_id_list, nb_run=10, quantization_name="4-bit + NF4 + Double Quant", quantization_config=quantization_config,
            instruction_list=df["instruction"], csv_suffixe="-q4bits-nf4-double-quant")

`low_cpu_mem_usage` was None, now default to True since model is quantized.
Device set to use cuda:0


Run:  0
[2024-12-22 08:09:48,625] [zeus.device.gpu.nvidia](nvidia.py:47) pynvml is available and initialized.
[2024-12-22 08:09:48,625] [zeus.device.cpu.rapl](rapl.py:136) RAPL is available.
[2024-12-22 08:09:48,626] [RaplWraparoundTracker](rapl.py:82) Monitoring wrap around of /sys/class/powercap/intel-rapl/intel-rapl:0/energy_uj
[2024-12-22 08:09:48,629] [RaplWraparoundTracker](rapl.py:82) Monitoring wrap around of /sys/class/powercap/intel-rapl/intel-rapl:0/intel-rapl:0:0/energy_uj
[2024-12-22 08:09:48,630] [RaplWraparoundTracker](rapl.py:82) Monitoring wrap around of /sys/class/powercap/intel-rapl/intel-rapl:0/intel-rapl:0:1/energy_uj
[2024-12-22 08:09:48,631] [RaplWraparoundTracker](rapl.py:82) Monitoring wrap around of /sys/class/powercap/intel-rapl/intel-rapl:1/energy_uj
[2024-12-22 08:09:48,632] [zeus.monitor.energy](energy.py:208) Monitoring GPU indices [0].
[2024-12-22 08:09:48,632] [zeus.monitor.energy](energy.py:209) Monitoring CPU indices [0, 1]
Instruction index:  0
[2024

You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset


Instruction index:  10
Instruction index:  20
Instruction index:  30
Instruction index:  40
Instruction index:  50
Instruction index:  60
Instruction index:  70
Instruction index:  80
Instruction index:  90
Run:  1
[2024-12-22 08:14:13,527] [zeus.monitor.energy](energy.py:208) Monitoring GPU indices [0].
[2024-12-22 08:14:13,527] [zeus.monitor.energy](energy.py:209) Monitoring CPU indices [0, 1]
Instruction index:  0
Instruction index:  10
Instruction index:  20
Instruction index:  30
Instruction index:  40
Instruction index:  50
Instruction index:  60
Instruction index:  70
Instruction index:  80
Instruction index:  90
Run:  2
[2024-12-22 08:18:27,601] [zeus.monitor.energy](energy.py:208) Monitoring GPU indices [0].
[2024-12-22 08:18:27,602] [zeus.monitor.energy](energy.py:209) Monitoring CPU indices [0, 1]
Instruction index:  0




Instruction index:  10
Instruction index:  20
Instruction index:  30
Instruction index:  40
Instruction index:  50
Instruction index:  60
Instruction index:  70
Instruction index:  80
Instruction index:  90
Run:  3
[2024-12-22 08:22:45,930] [zeus.monitor.energy](energy.py:208) Monitoring GPU indices [0].
[2024-12-22 08:22:45,931] [zeus.monitor.energy](energy.py:209) Monitoring CPU indices [0, 1]
Instruction index:  0




Instruction index:  10
Instruction index:  20
Instruction index:  30
Instruction index:  40
Instruction index:  50
Instruction index:  60
Instruction index:  70
Instruction index:  80
Instruction index:  90
Run:  4
[2024-12-22 08:26:59,566] [zeus.monitor.energy](energy.py:208) Monitoring GPU indices [0].
[2024-12-22 08:26:59,567] [zeus.monitor.energy](energy.py:209) Monitoring CPU indices [0, 1]
Instruction index:  0




Instruction index:  10
Instruction index:  20
Instruction index:  30
Instruction index:  40
Instruction index:  50
Instruction index:  60
Instruction index:  70
Instruction index:  80
Instruction index:  90
Run:  5
[2024-12-22 08:31:30,982] [zeus.monitor.energy](energy.py:208) Monitoring GPU indices [0].
[2024-12-22 08:31:30,982] [zeus.monitor.energy](energy.py:209) Monitoring CPU indices [0, 1]
Instruction index:  0




Instruction index:  10
Instruction index:  20
Instruction index:  30
Instruction index:  40
Instruction index:  50
Instruction index:  60
Instruction index:  70
Instruction index:  80
Instruction index:  90
Run:  6
[2024-12-22 08:35:47,163] [zeus.monitor.energy](energy.py:208) Monitoring GPU indices [0].
[2024-12-22 08:35:47,163] [zeus.monitor.energy](energy.py:209) Monitoring CPU indices [0, 1]
Instruction index:  0
Instruction index:  10
Instruction index:  20
Instruction index:  30
Instruction index:  40
Instruction index:  50
Instruction index:  60
Instruction index:  70
Instruction index:  80
Instruction index:  90
Run:  7
[2024-12-22 08:40:10,744] [zeus.monitor.energy](energy.py:208) Monitoring GPU indices [0].
[2024-12-22 08:40:10,744] [zeus.monitor.energy](energy.py:209) Monitoring CPU indices [0, 1]
Instruction index:  0




Instruction index:  10
Instruction index:  20
Instruction index:  30
Instruction index:  40
Instruction index:  50
Instruction index:  60
Instruction index:  70
Instruction index:  80
Instruction index:  90
Run:  8
[2024-12-22 08:44:30,324] [zeus.monitor.energy](energy.py:208) Monitoring GPU indices [0].
[2024-12-22 08:44:30,325] [zeus.monitor.energy](energy.py:209) Monitoring CPU indices [0, 1]
Instruction index:  0




Instruction index:  10
Instruction index:  20
Instruction index:  30
Instruction index:  40
Instruction index:  50
Instruction index:  60
Instruction index:  70
Instruction index:  80
Instruction index:  90
Run:  9
[2024-12-22 08:48:49,141] [zeus.monitor.energy](energy.py:208) Monitoring GPU indices [0].
[2024-12-22 08:48:49,141] [zeus.monitor.energy](energy.py:209) Monitoring CPU indices [0, 1]
Instruction index:  0
Instruction index:  10
Instruction index:  20
Instruction index:  30
Instruction index:  40
Instruction index:  50
Instruction index:  60
Instruction index:  70
Instruction index:  80
Instruction index:  90


`low_cpu_mem_usage` was None, now default to True since model is quantized.
Device set to use cuda:0


Run:  0
[2024-12-22 08:53:04,273] [zeus.monitor.energy](energy.py:208) Monitoring GPU indices [0].
[2024-12-22 08:53:04,273] [zeus.monitor.energy](energy.py:209) Monitoring CPU indices [0, 1]
Instruction index:  0
Instruction index:  10
Instruction index:  20
Instruction index:  30
Instruction index:  40
Instruction index:  50
Instruction index:  60
Instruction index:  70
Instruction index:  80
Instruction index:  90
Run:  1
[2024-12-22 09:02:07,444] [zeus.monitor.energy](energy.py:208) Monitoring GPU indices [0].
[2024-12-22 09:02:07,444] [zeus.monitor.energy](energy.py:209) Monitoring CPU indices [0, 1]
Instruction index:  0
Instruction index:  10
Instruction index:  20
Instruction index:  30
Instruction index:  40
Instruction index:  50
Instruction index:  60
Instruction index:  70
Instruction index:  80
Instruction index:  90
Run:  2
[2024-12-22 09:11:20,013] [zeus.monitor.energy](energy.py:208) Monitoring GPU indices [0].
[2024-12-22 09:11:20,013] [zeus.monitor.energy](energy.py:2

`low_cpu_mem_usage` was None, now default to True since model is quantized.
Device set to use cuda:0


Run:  0
[2024-12-22 10:24:26,500] [zeus.monitor.energy](energy.py:208) Monitoring GPU indices [0].
[2024-12-22 10:24:26,501] [zeus.monitor.energy](energy.py:209) Monitoring CPU indices [0, 1]
Instruction index:  0
Instruction index:  10
Instruction index:  20
Instruction index:  30
Instruction index:  40
Instruction index:  50
Instruction index:  60
Instruction index:  70
Instruction index:  80
Instruction index:  90
Run:  1
[2024-12-22 10:29:19,016] [zeus.monitor.energy](energy.py:208) Monitoring GPU indices [0].
[2024-12-22 10:29:19,017] [zeus.monitor.energy](energy.py:209) Monitoring CPU indices [0, 1]
Instruction index:  0
Instruction index:  10
Instruction index:  20
Instruction index:  30
Instruction index:  40
Instruction index:  50
Instruction index:  60
Instruction index:  70
Instruction index:  80
Instruction index:  90
Run:  2
[2024-12-22 10:34:11,490] [zeus.monitor.energy](energy.py:208) Monitoring GPU indices [0].
[2024-12-22 10:34:11,491] [zeus.monitor.energy](energy.py:2



Instruction index:  20
Instruction index:  30
Instruction index:  40
Instruction index:  50
Instruction index:  60
Instruction index:  70
Instruction index:  80
Instruction index:  90
Run:  4
[2024-12-22 10:43:56,389] [zeus.monitor.energy](energy.py:208) Monitoring GPU indices [0].
[2024-12-22 10:43:56,390] [zeus.monitor.energy](energy.py:209) Monitoring CPU indices [0, 1]
Instruction index:  0
Instruction index:  10




Instruction index:  20
Instruction index:  30
Instruction index:  40
Instruction index:  50
Instruction index:  60
Instruction index:  70
Instruction index:  80
Instruction index:  90
Run:  5
[2024-12-22 10:48:49,325] [zeus.monitor.energy](energy.py:208) Monitoring GPU indices [0].
[2024-12-22 10:48:49,325] [zeus.monitor.energy](energy.py:209) Monitoring CPU indices [0, 1]
Instruction index:  0
Instruction index:  10
Instruction index:  20
Instruction index:  30
Instruction index:  40
Instruction index:  50
Instruction index:  60
Instruction index:  70
Instruction index:  80
Instruction index:  90
Run:  6
[2024-12-22 10:53:42,847] [zeus.monitor.energy](energy.py:208) Monitoring GPU indices [0].
[2024-12-22 10:53:42,848] [zeus.monitor.energy](energy.py:209) Monitoring CPU indices [0, 1]
Instruction index:  0
Instruction index:  10




Instruction index:  20
Instruction index:  30
Instruction index:  40
Instruction index:  50
Instruction index:  60
Instruction index:  70
Instruction index:  80
Instruction index:  90
Run:  7
[2024-12-22 10:58:36,310] [zeus.monitor.energy](energy.py:208) Monitoring GPU indices [0].
[2024-12-22 10:58:36,310] [zeus.monitor.energy](energy.py:209) Monitoring CPU indices [0, 1]
Instruction index:  0
Instruction index:  10
Instruction index:  20
Instruction index:  30
Instruction index:  40
Instruction index:  50
Instruction index:  60
Instruction index:  70
Instruction index:  80
Instruction index:  90
Run:  8
[2024-12-22 11:03:29,806] [zeus.monitor.energy](energy.py:208) Monitoring GPU indices [0].
[2024-12-22 11:03:29,806] [zeus.monitor.energy](energy.py:209) Monitoring CPU indices [0, 1]
Instruction index:  0
Instruction index:  10
Instruction index:  20
Instruction index:  30
Instruction index:  40
Instruction index:  50
Instruction index:  60
Instruction index:  70
Instruction index:  

`low_cpu_mem_usage` was None, now default to True since model is quantized.
Device set to use cuda:0


Run:  0
[2024-12-22 11:13:22,655] [zeus.monitor.energy](energy.py:208) Monitoring GPU indices [0].
[2024-12-22 11:13:22,655] [zeus.monitor.energy](energy.py:209) Monitoring CPU indices [0, 1]
Instruction index:  0
Instruction index:  10
Instruction index:  20
Instruction index:  30
Instruction index:  40
Instruction index:  50
Instruction index:  60
Instruction index:  70
Instruction index:  80
Instruction index:  90
Run:  1
[2024-12-22 11:18:26,768] [zeus.monitor.energy](energy.py:208) Monitoring GPU indices [0].
[2024-12-22 11:18:26,769] [zeus.monitor.energy](energy.py:209) Monitoring CPU indices [0, 1]
Instruction index:  0
Instruction index:  10
Instruction index:  20
Instruction index:  30
Instruction index:  40
Instruction index:  50
Instruction index:  60
Instruction index:  70
Instruction index:  80
Instruction index:  90
Run:  2
[2024-12-22 11:23:42,584] [zeus.monitor.energy](energy.py:208) Monitoring GPU indices [0].
[2024-12-22 11:23:42,584] [zeus.monitor.energy](energy.py:2