# Simulations for the MESS+ estimator

In [6]:
import time

import numpy as np
import pandas as pd
import wandb
import yaml

from pathlib import Path

from classifier.model import MultilabelBERTClassifier
from classifier.file_reader import read_files_from_folder
from classifier.dataset import create_bert_datasets, preprocess_dataframe

from utils.mess_plus import sample_from_bernoulli


In [7]:
BENCHMARK_NAME = "winogrande"
# this refers to whether we want to use a pre-trained classifier or learn the classifier online while benchmarking.
APPROACH = "online"  # alt: online
# This only has an effect when APPROACH = pretrained. Make sure to adjust the minibatch size accordingly!
NUM_PRETRAINING_STEPS = 400
SEEDS = [42]
NUM_CLASSIFIER_LABELS = 3

PROJECT_ROOT_PATH = Path("mess_plus_simulator").parent

## Load benchmark config

In [8]:

if APPROACH == "pretrained":
	config_path = Path(f"{PROJECT_ROOT_PATH}/config/pretrained/{BENCHMARK_NAME}.yaml")
elif APPROACH == "online":
	config_path = Path(f"{PROJECT_ROOT_PATH}/config/online/{BENCHMARK_NAME}.yaml")
	NUM_PRETRAINING_STEPS = 0
else:
	raise NotImplementedError(f"Approach {APPROACH} not implemented.")

with config_path.open("r") as f:
	CONFIG = yaml.safe_load(f)
	display(CONFIG)


{'run_name': 'baseline',
 'seed': 43,
 'model_zoo': {'meta-llama/Llama-3.2-1B-Instruct': {'category': 'small',
   'gpu_indices': [0],
   'max_seq_len': 2048,
   'gpu_memory_utilization': 0.12,
   'quantization': None},
  'unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit': {'category': 'medium',
   'gpu_indices': [0],
   'max_seq_len': 2048,
   'gpu_memory_utilization': 0.15,
   'quantization': 'bitsandbytes'},
  'unsloth/Llama-3.3-70B-Instruct-bnb-4bit': {'category': 'large',
   'gpu_indices': [0],
   'max_seq_len': 2048,
   'gpu_memory_utilization': 0.68,
   'quantization': 'bitsandbytes'}},
 'classifier_model': {'model_id': 'answerdotai/ModernBERT-base',
  'epochs': 1,
  'learning_rate': 0.066,
  'weight_decay': 0.01,
  'momentum': 0.9,
  'batch_size': 64,
  'max_length': 64,
  'warmup_ratio': 0.1,
  'threshold': 0.5,
  'dropout_rate': 0.1,
  'freeze_bert_layers': True,
  'memory_size': 0,
  'memory_strategy': 'random',
  'reset_optimizer': False,
  'regularization_lambda': 0.0,
  'gpu_in

## Load dataset

In [9]:
input_df = read_files_from_folder(folder_path=f"{PROJECT_ROOT_PATH}/data/inference_outputs/{BENCHMARK_NAME}")
input_df["idx_original"] = input_df.index
input_df = input_df.sample(frac=1).reset_index(drop=True)

display(f"Loaded dataframe with {input_df.shape[0]} rows and {input_df.shape[1]} columns")
display(f"{len(input_df.columns.tolist())} available columns: {input_df.columns.tolist()}")
display(input_df.head())

'Loaded dataframe with 1267 rows and 15 columns'

"15 available columns: ['input_text', 'benchmark_name', 'label_small', 'acc_small', 'energy_consumption_small', 'inference_time_small', 'label_medium', 'acc_medium', 'energy_consumption_medium', 'inference_time_medium', 'label_large', 'acc_large', 'energy_consumption_large', 'inference_time_large', 'idx_original']"

Unnamed: 0,input_text,benchmark_name,label_small,acc_small,energy_consumption_small,inference_time_small,label_medium,acc_medium,energy_consumption_medium,inference_time_medium,label_large,acc_large,energy_consumption_large,inference_time_large,idx_original
0,Donald had to drive Matthew to the doctor imme...,winogrande,0.0,0.0,37.128,0.216491,1.0,1.0,199.45,1.022478,1.0,1.0,1037.318,2.730692,1102
1,The dancer would leave the stage and enter the...,winogrande,0.0,0.0,39.052,0.227257,1.0,1.0,199.287,1.023072,1.0,1.0,1004.743,2.726789,869
2,"Christopher was able to travel abroad, while W...",winogrande,1.0,1.0,39.618,0.220684,1.0,1.0,195.672,1.018793,1.0,1.0,1001.786,2.722344,879
3,The sponges performed worse at cleaning the tu...,winogrande,0.0,0.0,40.061,0.217072,0.0,0.0,196.734,1.020303,0.0,0.0,1009.372,2.727702,317
4,Erin was sick of the pests like Amy always cut...,winogrande,0.0,0.0,71.632,0.222031,1.0,1.0,193.776,1.021516,1.0,1.0,1010.072,2.730747,433


## Load, configure, and train classifier

In [10]:
text_col = "input_text"
label_cols = ["label_small", "label_medium", "label_large"]

classifier = MultilabelBERTClassifier(num_labels=NUM_CLASSIFIER_LABELS, **CONFIG["classifier_model"])
training_df = input_df.loc[:NUM_PRETRAINING_STEPS]
training_df = preprocess_dataframe(training_df, label_cols=label_cols)

train_dataset, val_dataset, tokenizer = create_bert_datasets(
	training_df,
	text_col,
	label_cols,
	model_name=CONFIG["classifier_model"]["model_id"],
	max_length=CONFIG["classifier_model"]["max_length"],
	val_ratio=CONFIG["classifier_model"]["validation_dataset_size"],
	random_seed=SEEDS[0],
)

training_stats = classifier.fit(train_dataset, val_dataset, epochs=CONFIG["classifier_model"]["epochs"], early_stopping_patience=2)

display(training_stats)


INFO:classifier.model:Using device: cpu


[2025-05-11 16:57:49,918] [zeus.device.gpu.nvidia](nvidia.py:50) pynvml is available but could not initialize NVML: NVML Shared Library Not Found.
[2025-05-11 16:57:49,939] [zeus.device.gpu.amd](amd.py:42) amdsmi is not available.
[2025-05-11 16:57:49,940] [zeus.device.cpu.rapl](rapl.py:134) RAPL is not supported on this CPU.
[2025-05-11 16:57:49,940] [zeus.monitor.energy](energy.py:208) Monitoring GPU indices [].
[2025-05-11 16:57:49,941] [zeus.monitor.energy](energy.py:209) Monitoring CPU indices []


INFO:classifier.model:Initializing custom BERTClassifier: answerdotai/ModernBERT-base with 3 labels


ValueError: need at least one array to concatenate

In [12]:
# Dataset statistics
display(input_df[NUM_PRETRAINING_STEPS:]["energy_consumption_small"].mean())
display(input_df[NUM_PRETRAINING_STEPS:]["energy_consumption_medium"].mean())
display(input_df[NUM_PRETRAINING_STEPS:]["energy_consumption_large"].mean())


48.97596491926114

200.4501996855145

1018.4225753717942

In [None]:
algorithm_config = CONFIG["algorithm"]

model_categories = [i for i in CONFIG["model_zoo"].keys()]
sample_cols = input_df.columns.tolist()

ALPHA_VALUES = algorithm_config["alpha_values"]
C_VALUES = [1.0]
V_VALUES = [0.01, 0.001, 0.0001, 0.00001, 0.000001]
R_VALUES = [1]

for alpha in ALPHA_VALUES:
	for c in C_VALUES:
		for v in V_VALUES:
			algorithm_config["V"] = v
			algorithm_config["alpha"] = alpha
			algorithm_config["c"] = c

			ACCURACY_LIST = []
			EXPLORATION_STEP_LIST = []
			ENERGY_CONSUMPTION_LIST = []
			INFERENCE_TIME_LIST = []
			MODEL_CHOSEN_LIST = []

			ENERGY_PER_MODEL = {
				"small": [0.01],
				"medium": [0.1],
				"large": [1.0],
			}

			model_category_list = [i for i in ENERGY_PER_MODEL.keys()]

			Q = 0.0
			ctr = 0

			run = wandb.init(
				project=f"mess-plus_runs_vTEST2",
				name=f"{BENCHMARK_NAME}_V={algorithm_config['V']}_a={algorithm_config['alpha']}_c={algorithm_config['c']}_r={r}",
				config=CONFIG
			)

			if wandb.run is not None:
				run.summary.update({**{f"classifier/{k}": v for k, v in training_stats.items()}})

			monitoring_dict = {}
			for idx, sample in input_df[NUM_PRETRAINING_STEPS:].iterrows():
				p_t, x_t = sample_from_bernoulli(c=algorithm_config["c"], timestamp=idx)
				EXPLORATION_STEP_LIST.append(x_t)

				if x_t == 1:
					result = sample["label_large"]
					ACCURACY_LIST.append(result)
					step_energy = sum([sample[i] for i in sample_cols if "energy" in i])
					step_time = sum([sample[i] for i in sample_cols if "inference" in i])
					ENERGY_CONSUMPTION_LIST.append(step_energy)
					INFERENCE_TIME_LIST.append(step_time)
					for i in ENERGY_PER_MODEL.keys():
						ENERGY_PER_MODEL[i] = sample[f"energy_consumption_{i}"]

					monitoring_dict[f"mess_plus/energy"] = step_energy
					monitoring_dict[f"mess_plus/chosen_model"] = len(model_category_list) - 1

				else:
					preds, probs = classifier.predict(texts=[sample["input_text"]])
					energy = pd.DataFrame(ENERGY_PER_MODEL, index=[0]).to_numpy()

					energy = np.array(energy).reshape(-1, 1)
					probs = probs.reshape(-1, 1)

					cost_fn = algorithm_config["V"] * energy + Q * (alpha - probs)
					cost_fn = cost_fn.reshape(1, -1)
					chosen_model_id = np.argmin(cost_fn)
					# print(f"STEP={ctr} - V={v} - Q={Q} - CHOSEN MODEL: {chosen_model_id} - COST FN: {cost_fn}")
					model_category_chosen = model_category_list[chosen_model_id]

					result = sample[f"label_{model_category_chosen}"]
					step_energy = sample[f"energy_consumption_{model_category_chosen}"]
					step_time = sample[f"inference_time_{model_category_chosen}"]

					INFERENCE_TIME_LIST.append(step_time)
					ENERGY_CONSUMPTION_LIST.append(step_energy)
					MODEL_CHOSEN_LIST.append(chosen_model_id)

					monitoring_dict[f"mess_plus/energy"] = step_energy
					monitoring_dict[f"mess_plus/chosen_model"] = chosen_model_id

					ACCURACY_LIST.append(result)

				Q = max(0.0, Q + algorithm_config["alpha"] - result)

				x = np.array(MODEL_CHOSEN_LIST)
				monitoring_dict.update({
					"mess_plus/p_t": p_t,
					"mess_plus/x_t": x_t,
					"mess_plus/exploration_step_ratio": sum(EXPLORATION_STEP_LIST) / (ctr + 1),
					"mess_plus/q_length": Q,
					"mess_plus/accuracy": sum(ACCURACY_LIST) / (ctr + 1),
					"mess_plus/step_time": step_time,
					"mess_plus/total_runtime": sum(INFERENCE_TIME_LIST),
					"mess_plus/step_energy_consumption": step_energy,
					"models/small_chosen": len(np.where(x == 0)[0]) / (len(x) + 1e-8),
					"models/medium_chosen": len(np.where(x == 1)[0]) / (len(x) + 1e-8),
					"models/large_chosen": len(np.where(x == 2)[0]) / (len(x) + 1e-8),
				})

				print(monitoring_dict)

				ctr += 1
				if wandb.run is not None:
					wandb.log(monitoring_dict, step=ctr)

				if ctr % 3 == 0 and ctr > 0:
					break

			if wandb.run is not None:
				wandb.finish()
				time.sleep(2)


print(f"DONE")