In [None]:
!pip install datasets codecarbon
!pip install bitsandbytes==0.45.5
!pip install transformers torch codecarbon
!pip install hf_xet

Collecting bitsandbytes==0.45.5
  Using cached bitsandbytes-0.45.5-py3-none-manylinux_2_24_x86_64.whl.metadata (5.0 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch<3,>=2.0->bitsandbytes==0.45.5)
  Using cached nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch<3,>=2.0->bitsandbytes==0.45.5)
  Using cached nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch<3,>=2.0->bitsandbytes==0.45.5)
  Using cached nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch<3,>=2.0->bitsandbytes==0.45.5)
  Using cached nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch<3,>=2.0->bitsandbytes==0.45.5)
  Using cached nvidia_cublas_cu12-12.4.5.8-py3-none-manylin

In [None]:
from huggingface_hub import (
    PyTorchModelHubMixin,
    notebook_login,)
from datasets import DatasetDict, load_dataset
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoModelForSeq2SeqLM, Gemma3ForCausalLM, BartForSequenceClassification
import torch
import pandas as pd
import numpy as np
from transformers import T5Tokenizer
from transformers import T5ForConditionalGeneration
from transformers import TrainingArguments, Trainer
from sklearn.metrics import accuracy_score, classification_report, f1_score
from codecarbon import EmissionsTracker


In [None]:
notebook_login(new_session=False)

In [None]:
encoder_decoder_models = [
    "google/flan-t5-base",
    # "bigscience/T0_3B"
    "tasksource/deberta-base-long-nli",
    "facebook/bart-base"
]

In [None]:
def preprocess(example):
    return {
        "input_text": f"{example['quote']}",
        "target_text": int(example["label"][0])
    }


dataset = load_dataset("quotaclimat/frugalaichallenge-text-train")
dataset = dataset.map(preprocess)

README.md:   0%|          | 0.00/5.80k [00:00<?, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/1.02M [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/248k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/4872 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/1219 [00:00<?, ? examples/s]

Map:   0%|          | 0/4872 [00:00<?, ? examples/s]

Map:   0%|          | 0/1219 [00:00<?, ? examples/s]

In [None]:
def compute_metrics(pred):
    preds = pred.predictions[0].argmax(-1)
    # labels are available in the PredictionOutput as well
    labels = pred.label_ids

    # Decode predictions and labels
    decoded_preds = tokenizer.batch_decode(preds, skip_special_tokens=True)
    decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)

    # Clean text
    decoded_preds = [p.strip().lower() for p in decoded_preds]
    decoded_labels = [l.strip().lower() for l in decoded_labels]

    # Compute metrics
    acc = accuracy_score(decoded_labels, decoded_preds)
    f1 = f1_score(decoded_labels, decoded_preds, average="macro")

    return {"accuracy": acc, "f1": f1}

In [None]:
tokenizer = AutoTokenizer.from_pretrained("facebook/bart-base")
# model = AutoModelForSeq2SeqLM.from_pretrained("facebook/bart-base")
model = BartForSequenceClassification.from_pretrained("facebook/bart-base", num_labels=8)


def tokenize(example):
    model_input = tokenizer(example["input_text"], truncation=True, padding=True, max_length=256)
    # label = tokenizer(example["target_text"], truncation=True, padding="max_length", max_length=10)
    model_input["labels"] = example['target_text']
    return model_input

tokenized_dataset = dataset.map(tokenize, batched=True, remove_columns=dataset["train"].column_names)
valid_label_ids = np.unique(tokenized_dataset['test']['labels'], axis=0)

config.json:   0%|          | 0.00/1.72k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/558M [00:00<?, ?B/s]

Some weights of BartForSequenceClassification were not initialized from the model checkpoint at facebook/bart-base and are newly initialized: ['classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias', 'classification_head.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/4872 [00:00<?, ? examples/s]

Map:   0%|          | 0/1219 [00:00<?, ? examples/s]

In [None]:
training_args = TrainingArguments(
    output_dir="./bart-base-classifier",
    per_device_train_batch_size=32,
    per_device_eval_batch_size=32,
    num_train_epochs=5,
    # evaluation_strategy="epoch",
    save_strategy="epoch",
    logging_dir="./logs",
    learning_rate=3e-5,
    weight_decay=0.01,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["test"],
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
)

train_tracker = EmissionsTracker()
train_tracker.start()

trainer.train()

train_emiss = train_tracker.stop()

  trainer = Trainer(
[codecarbon INFO @ 18:13:58] [setup] RAM Tracking...
[codecarbon INFO @ 18:13:58] [setup] CPU Tracking...
 Linux OS detected: Please ensure RAPL files exist at /sys/class/powercap/intel-rapl/subsystem to measure CPU

[codecarbon INFO @ 18:13:59] CPU Model on constant consumption mode: Intel(R) Xeon(R) CPU @ 2.20GHz
[codecarbon INFO @ 18:13:59] [setup] GPU Tracking...
[codecarbon INFO @ 18:13:59] Tracking Nvidia GPU via pynvml
[codecarbon INFO @ 18:13:59] The below tracking methods have been set up:
                RAM Tracking Method: RAM power estimation model
                CPU Tracking Method: global constant
                GPU Tracking Method: pynvml
            
[codecarbon INFO @ 18:13:59] >>> Tracker's metadata:
[codecarbon INFO @ 18:13:59]   Platform system: Linux-6.1.123+-x86_64-with-glibc2.35
[codecarbon INFO @ 18:13:59]   Python version: 3.11.12
[codecarbon INFO @ 18:13:59]   CodeCarbon version: 3.0.1
[codecarbon INFO @ 18:13:59]   Available RAM : 52.9

<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize?ref=models
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mt-zhang[0m to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


[codecarbon INFO @ 18:14:11] Energy consumed for RAM : 0.000167 kWh. RAM Power : 20.0 W
[codecarbon INFO @ 18:14:11] Delta energy consumed for CPU with constant : 0.000177 kWh, power : 42.5 W
[codecarbon INFO @ 18:14:11] Energy consumed for All CPU : 0.000354 kWh
[codecarbon INFO @ 18:14:11] Energy consumed for all GPUs : 0.000242 kWh. Total GPU Power : 29.1649405268149 W
[codecarbon INFO @ 18:14:11] 0.000763 kWh of electricity used since the beginning.


Step,Training Loss
500,0.8352


[codecarbon INFO @ 18:14:16] Energy consumed for RAM : 0.000083 kWh. RAM Power : 20.0 W
[codecarbon INFO @ 18:14:16] Delta energy consumed for CPU with constant : 0.000177 kWh, power : 42.5 W
[codecarbon INFO @ 18:14:16] Energy consumed for All CPU : 0.000177 kWh
[codecarbon INFO @ 18:14:16] Energy consumed for all GPUs : 0.000175 kWh. Total GPU Power : 42.011181712975436 W
[codecarbon INFO @ 18:14:16] 0.000436 kWh of electricity used since the beginning.
[codecarbon INFO @ 18:14:26] Energy consumed for RAM : 0.000083 kWh. RAM Power : 20.0 W
[codecarbon INFO @ 18:14:26] Delta energy consumed for CPU with constant : 0.000177 kWh, power : 42.5 W
[codecarbon INFO @ 18:14:26] Energy consumed for All CPU : 0.000177 kWh
[codecarbon INFO @ 18:14:26] Energy consumed for all GPUs : 0.000286 kWh. Total GPU Power : 68.61942064407586 W
[codecarbon INFO @ 18:14:26] 0.000546 kWh of electricity used since the beginning.
[codecarbon INFO @ 18:14:26] Energy consumed for RAM : 0.000250 kWh. RAM Power : 

In [None]:
sample_test_dataset = tokenized_dataset["test"].select(range(1000))

inf_tracker = EmissionsTracker()
inf_tracker.start()

preds = trainer.predict(sample_test_dataset)

inf_emission = inf_tracker.stop()

[codecarbon INFO @ 18:24:07] [setup] RAM Tracking...
[codecarbon INFO @ 18:24:07] [setup] CPU Tracking...
 Linux OS detected: Please ensure RAPL files exist at /sys/class/powercap/intel-rapl/subsystem to measure CPU

[codecarbon INFO @ 18:24:08] CPU Model on constant consumption mode: Intel(R) Xeon(R) CPU @ 2.20GHz
[codecarbon INFO @ 18:24:08] [setup] GPU Tracking...
[codecarbon INFO @ 18:24:08] Tracking Nvidia GPU via pynvml
[codecarbon INFO @ 18:24:08] The below tracking methods have been set up:
                RAM Tracking Method: RAM power estimation model
                CPU Tracking Method: global constant
                GPU Tracking Method: pynvml
            
[codecarbon INFO @ 18:24:08] >>> Tracker's metadata:
[codecarbon INFO @ 18:24:08]   Platform system: Linux-6.1.123+-x86_64-with-glibc2.35
[codecarbon INFO @ 18:24:08]   Python version: 3.11.12
[codecarbon INFO @ 18:24:08]   CodeCarbon version: 3.0.1
[codecarbon INFO @ 18:24:08]   Available RAM : 52.960 GB
[codecarbon INF

[codecarbon INFO @ 18:24:11] Energy consumed for RAM : 0.003499 kWh. RAM Power : 20.0 W
[codecarbon INFO @ 18:24:11] Delta energy consumed for CPU with constant : 0.000177 kWh, power : 42.5 W
[codecarbon INFO @ 18:24:11] Energy consumed for All CPU : 0.007435 kWh
[codecarbon INFO @ 18:24:11] Energy consumed for all GPUs : 0.012008 kWh. Total GPU Power : 59.11483618555245 W
[codecarbon INFO @ 18:24:11] 0.022941 kWh of electricity used since the beginning.
[codecarbon INFO @ 18:24:18] Energy consumed for RAM : 0.000052 kWh. RAM Power : 20.0 W
[codecarbon INFO @ 18:24:18] Delta energy consumed for CPU with constant : 0.000111 kWh, power : 42.5 W
[codecarbon INFO @ 18:24:18] Energy consumed for All CPU : 0.000111 kWh
[codecarbon INFO @ 18:24:18] Energy consumed for all GPUs : 0.000183 kWh. Total GPU Power : 70.14525982784922 W
[codecarbon INFO @ 18:24:18] 0.000345 kWh of electricity used since the beginning.


Evaluate the classification results of the model. Also account for out-of-sample results by only including predictions include expected labels.

In [None]:
# For BART

pred_labels = preds.predictions[0].argmax(-1)
actual_labels = preds.label_ids


# Evaluate model performance
accuracy = accuracy_score(actual_labels, pred_labels)
print(f"Accuracy: {accuracy:.4f}")

report = classification_report(actual_labels, pred_labels)
print("Classification Report:")
print(report)

f1_score = f1_score(actual_labels, pred_labels, average="macro")
print(f"F1 Score: {f1_score:.4f}")

Accuracy: 0.7220
Classification Report:
              precision    recall  f1-score   support

           0       0.75      0.75      0.75       251
           1       0.77      0.82      0.79       122
           2       0.72      0.78      0.75       108
           3       0.71      0.72      0.72        79
           4       0.68      0.66      0.67       127
           5       0.68      0.67      0.67       140
           6       0.74      0.67      0.70       117
           7       0.68      0.64      0.66        56

    accuracy                           0.72      1000
   macro avg       0.72      0.71      0.71      1000
weighted avg       0.72      0.72      0.72      1000

F1 Score: 0.7146


In [None]:
# For seq2seq


pred_ids = preds.predictions[0].argmax(-1)
valid_preds = [pred if pred in valid_label_ids else "unknown" for pred in pred_ids ]

pred_labels = tokenizer.batch_decode(valid_preds, skip_special_tokens=True)
true_labels = tokenizer.batch_decode(preds.label_ids, skip_special_tokens=True)

valid_pred_labels = [pred if pred in set(true_labels) else "unknown" for pred in pred_labels]

# Evaluate model performance
accuracy = accuracy_score(true_labels, valid_pred_labels)
print(f"Accuracy: {accuracy:.4f}")

report = classification_report(true_labels, valid_pred_labels)
print("Classification Report:")
print(report)

f1_score = f1_score(true_labels, valid_pred_labels, average="macro")
print(f"F1 Score: {f1_score:.4f}")

Accuracy: 0.8090
Classification Report:
              precision    recall  f1-score   support

                   0.87      0.89      0.88       560
         the       0.75      0.69      0.72       140
          to       0.61      0.68      0.64        56
           ,       0.75      0.78      0.76       117
           .       0.74      0.69      0.72       127

    accuracy                           0.81      1000
   macro avg       0.74      0.74      0.74      1000
weighted avg       0.81      0.81      0.81      1000

F1 Score: 0.7432


In [None]:
pd.array(valid_pred_labels).value_counts()

Unnamed: 0,count
unknown,258
not_relevant,188
science_unreliable,100
not_happening,94
solutions_harmful_unneces,93
proponents_biased,86
not_human,80
not_bad,65
fossil_fuels_needed,36


In [None]:
train_results =  {
        "model_name": 'BART-base',
        "batch_size": training_args.per_device_eval_batch_size,
        "epochs": training_args.num_train_epochs,
        'location': train_tracker._geo.country_iso_code,
        "energy_consumed_kwh": train_tracker._total_energy.kWh,
        "carbon_emissions_kgco2": train_emiss,
        'cpu_energy_kwh': train_tracker._total_cpu_energy.kWh,
        'gpu_energy_kwh': train_tracker._total_gpu_energy.kWh,
        'ram_energy_kwh': train_tracker._total_ram_energy.kWh,
        'hardware': train_tracker._hardware,
        "training_time": train_tracker._last_measured_time - train_tracker._start_time,
        'accuracy': accuracy,
        'f1_score': f1_score
        }

inf_results = {
    "model_name": "BART-base",  # Assuming model_config['name'] is not available
    "inf_energy_consumed_kwh": inf_tracker._total_energy.kWh,  # Use inf_tracker instead of tracker
    "inf_carbon_emissions_kgco2": inf_emission,  # Use inf_emission instead of emissions
    'inf_cpu_energy_kwh': inf_tracker._total_cpu_energy.kWh,  # Use inf_tracker
    'inf_gpu_energy_kwh': inf_tracker._total_gpu_energy.kWh,  # Use inf_tracker
    'inf_ram_energy_kwh': inf_tracker._total_ram_energy.kWh,  # Use inf_tracker
    'inf_hardware': inf_tracker._hardware,  # Use inf_tracker
    'inf_location': inf_tracker._geo.country_iso_code,  # Use inf_tracker
}

In [None]:
from google.colab import drive
drive.mount('/content/drive')

t5_results = pd.merge(pd.DataFrame([train_results]), pd.DataFrame([inf_results]), on='model_name')
t5_results.to_csv('/content/drive/MyDrive/Colab Notebooks/FrugalAI/bart_base_results.csv', index=False)

[codecarbon INFO @ 18:24:26] Energy consumed for RAM : 0.003582 kWh. RAM Power : 20.0 W
[codecarbon INFO @ 18:24:26] Delta energy consumed for CPU with constant : 0.000177 kWh, power : 42.5 W
[codecarbon INFO @ 18:24:26] Energy consumed for All CPU : 0.007612 kWh
[codecarbon INFO @ 18:24:26] Energy consumed for all GPUs : 0.012220 kWh. Total GPU Power : 50.84896830905324 W
[codecarbon INFO @ 18:24:26] 0.023413 kWh of electricity used since the beginning.
[codecarbon INFO @ 18:24:41] Energy consumed for RAM : 0.003665 kWh. RAM Power : 20.0 W
[codecarbon INFO @ 18:24:41] Delta energy consumed for CPU with constant : 0.000177 kWh, power : 42.5 W
[codecarbon INFO @ 18:24:41] Energy consumed for All CPU : 0.007789 kWh
[codecarbon INFO @ 18:24:41] Energy consumed for all GPUs : 0.012364 kWh. Total GPU Power : 34.669357463894926 W
[codecarbon INFO @ 18:24:41] 0.023818 kWh of electricity used since the beginning.
[codecarbon INFO @ 18:24:56] Energy consumed for RAM : 0.003748 kWh. RAM Power : 

Mounted at /content/drive


In [None]:
def compute_metrics(pred):
    preds = pred.predictions[0].argmax(-1)
    # labels are available in the PredictionOutput as well
    labels = pred.label_ids

    # Decode predictions and labels
    decoded_preds = tokenizer.batch_decode(preds, skip_special_tokens=True)
    decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)

    # Clean text
    decoded_preds = [p.strip().lower() for p in decoded_preds]
    decoded_labels = [l.strip().lower() for l in decoded_labels]

    # Compute metrics
    acc = accuracy_score(decoded_labels, decoded_preds)
    f1 = f1_score(decoded_labels, decoded_preds, average="macro")

    return {"accuracy": acc, "f1": f1}


compute_metrics(pred)

{'accuracy': 0.7136997538966365, 'f1': 0.1606993960208395}

In [None]:
# prompt: write a function to process the data in dataset so that it is list of dictionary with each item as {"text": "something", "label": 1}. Also add a way to sample in a balanced manner by label class. Use the built-in map function of dataset library if you can, check external documentations if necessary

import random


def process_data(dataset, sample_size_per_label=None):
    """
    Processes the dataset to create a list of dictionaries with 'text' and 'label' keys.
    Optionally samples the data in a balanced manner.
    """
    random.seed(42)

    def process_example(example):
        return {"text": example["quote"][:1024], "label": example["label"][2:]}

    processed_dataset = list(map(process_example, dataset))

    if sample_size_per_label:
        # Sample data balanced by label
        label_counts = {}
        for item in processed_dataset:
            label = item['label']
            if label not in label_counts:
                label_counts[label] = 0
            label_counts[label] += 1

        sampled_data = []
        for label in label_counts:
          label_data = [item for item in processed_dataset if item['label'] == label]
          sampled_data.extend(random.sample(label_data, min(sample_size_per_label, len(label_data))))

        return sampled_data

    return processed_dataset


# To process the entire dataset without sampling:
processed_train_data = process_data(dataset['train'], sample_size_per_label=4)
processed_test_data = process_data(dataset['test'])



In [None]:
def build_prompt(examples, input_text):
    prompt = "You are a climate misinformation detection assistant. Classify the following statements into one of these categories: 'not_bad', 'not_happening', 'proponents_biased', 'not_human', 'solutions_harmful_unnecessary', 'fossil_fuels_needed', 'science_unreliable', 'not_relevant'\n\n return only the label and nothing else \n\n"
    for example in examples:
        prompt += f"Statement: {example['text']}\nLabel: {example['label']}\n\n"
    prompt += f"\nStatement: {input_text}\nLabel:"
    return prompt

In [None]:
# prompt: clear GPU

import torch

# Clear GPU memory
if torch.cuda.is_available():
    torch.cuda.empty_cache()
    print("GPU cache cleared.")
else:
    print("No GPU available.")


GPU cache cleared.


In [None]:
decoder_models = [
    "mistralai/Mistral-7B-Instruct-v0.3",
    "Qwen/Qwen2.5-32B-Instruct",
    'mistralai/Mistral-Small-Instruct-2409',
    "meta-llama/Llama-3.2-3B-Instruct",
    "mistralai/Mistral-Small-24B-Base-2501"]

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import torch

model_id = "Qwen/Qwen2.5-32B-Instruct" #'mistralai/Mistral-Small-Instruct-2409' #"meta-llama/Llama-3.2-3B-Instruct" # # "meta-llama/Llama-3.2-3B-Instruct" #"mistralai/Mistral-Small-24B-Base-2501"

# bnb_config = BitsAndBytesConfig(
#     load_in_4bit=True,
#     bnb_4bit_use_double_quant=True,
#     bnb_4bit_quant_type="nf4",
#     bnb_4bit_compute_dtype=torch.bfloat16
# )

bnb_config = BitsAndBytesConfig(
    load_in_8bit=True,
    bnb_8bit_use_double_quant=True,
    bnb_8bit_quant_type="nf4",
    bnb_8bit_compute_dtype=torch.bfloat16
)

tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=bnb_config,
    device_map="auto",
    torch_dtype=torch.bfloat16
)

tokenizer_config.json:   0%|          | 0.00/7.30k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/2.78M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/1.67M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/7.03M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/663 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/63.2k [00:00<?, ?B/s]

Fetching 17 files:   0%|          | 0/17 [00:00<?, ?it/s]

model-00005-of-00017.safetensors:   0%|          | 0.00/3.90G [00:00<?, ?B/s]

model-00008-of-00017.safetensors:   0%|          | 0.00/3.90G [00:00<?, ?B/s]

model-00002-of-00017.safetensors:   0%|          | 0.00/3.90G [00:00<?, ?B/s]

model-00004-of-00017.safetensors:   0%|          | 0.00/3.90G [00:00<?, ?B/s]

model-00001-of-00017.safetensors:   0%|          | 0.00/3.92G [00:00<?, ?B/s]

model-00003-of-00017.safetensors:   0%|          | 0.00/3.90G [00:00<?, ?B/s]

model-00007-of-00017.safetensors:   0%|          | 0.00/3.90G [00:00<?, ?B/s]

model-00006-of-00017.safetensors:   0%|          | 0.00/3.90G [00:00<?, ?B/s]

model-00009-of-00017.safetensors:   0%|          | 0.00/3.90G [00:00<?, ?B/s]

model-00010-of-00017.safetensors:   0%|          | 0.00/3.90G [00:00<?, ?B/s]

model-00011-of-00017.safetensors:   0%|          | 0.00/3.90G [00:00<?, ?B/s]

model-00012-of-00017.safetensors:   0%|          | 0.00/3.90G [00:00<?, ?B/s]

model-00013-of-00017.safetensors:   0%|          | 0.00/3.90G [00:00<?, ?B/s]

model-00014-of-00017.safetensors:   0%|          | 0.00/3.90G [00:00<?, ?B/s]

model-00015-of-00017.safetensors:   0%|          | 0.00/3.90G [00:00<?, ?B/s]

model-00016-of-00017.safetensors:   0%|          | 0.00/3.90G [00:00<?, ?B/s]

model-00017-of-00017.safetensors:   0%|          | 0.00/3.10G [00:00<?, ?B/s]

Sliding Window Attention is enabled but not implemented for `sdpa`; unexpected results may be encountered.


ValueError: Some modules are dispatched on the CPU or the disk. Make sure you have enough GPU RAM to fit the quantized model. If you want to dispatch the model on the CPU or the disk while keeping these modules in 32-bit, you need to set `llm_int8_enable_fp32_cpu_offload=True` and pass a custom `device_map` to `from_pretrained`. Check https://huggingface.co/docs/transformers/main/en/main_classes/quantization#offload-between-cpu-and-gpu for more details. 

In [None]:
from tqdm.notebook import tqdm
from codecarbon import EmissionsTracker

true_labels = []
predicted_labels = []

tracker = EmissionsTracker()
tracker.start()

for sample in tqdm(processed_test_data[:200]):
    prompt = build_prompt(processed_train_data, sample["text"])
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=10,
            do_sample=False,
            temperature=0.0,
            top_p=1.0,
            pad_token_id=tokenizer.eos_token_id
        )

    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    prediction = generated_text.split("Label:")[-1].strip().split("\n")[0]

    true_labels.append(sample["label"])
    predicted_labels.append(prediction)

emissions = tracker.stop()

[codecarbon INFO @ 03:31:32] [setup] RAM Tracking...
[codecarbon INFO @ 03:31:32] [setup] CPU Tracking...
 Linux OS detected: Please ensure RAPL files exist at /sys/class/powercap/intel-rapl/subsystem to measure CPU

[codecarbon INFO @ 03:31:33] CPU Model on constant consumption mode: Intel(R) Xeon(R) CPU @ 2.20GHz
[codecarbon INFO @ 03:31:33] [setup] GPU Tracking...
[codecarbon INFO @ 03:31:33] Tracking Nvidia GPU via pynvml
[codecarbon INFO @ 03:31:33] The below tracking methods have been set up:
                RAM Tracking Method: RAM power estimation model
                CPU Tracking Method: global constant
                GPU Tracking Method: pynvml
            
[codecarbon INFO @ 03:31:33] >>> Tracker's metadata:
[codecarbon INFO @ 03:31:33]   Platform system: Linux-6.1.123+-x86_64-with-glibc2.35
[codecarbon INFO @ 03:31:33]   Python version: 3.11.12
[codecarbon INFO @ 03:31:33]   CodeCarbon version: 3.0.0
[codecarbon INFO @ 03:31:33]   Available RAM : 52.960 GB
[codecarbon INF

  0%|          | 0/200 [00:00<?, ?it/s]



KeyboardInterrupt: 

In [None]:
labels = set([sample['label'] for sample in processed_test_data])
list(labels)

['not_human',
 'science_unreliable',
 'fossil_fuels_needed',
 'not_relevant',
 'not_happening',
 'solutions_harmful_unnecessary',
 'not_bad',
 'proponents_biased']

In [None]:
set(predicted_labels)

{'fossil_fuels_needed',
 'not_bad',
 'not_happening',
 'not_human',
 'not_relevant',
 'proponents_biased',
 'science_unreliable',
 'solutions_harmful_unnecessary'}

In [None]:
# import torch
# from transformers import AutoTokenizer, AutoModelForSequenceClassification
# from tqdm.notebook import tqdm
# from codecarbon import EmissionsTracker

# # Load the tokenizer and model
# tokenizer = AutoTokenizer.from_pretrained("facebook/bart-large-mnli")
# model = AutoModelForSequenceClassification.from_pretrained("facebook/bart-large-mnli")
# model.eval()

# # Define the sequence and candidate labels
# sequence = "We need more fossil fuel "
# candidate_labels = list(labels)

# true_labels = []
# predicted_labels = []

# tracker = EmissionsTracker()
# tracker.start()

# # Prepare inputs for each label
# for sample in tqdm(processed_test_data):
#     sequence = sample["text"]
#     true_labels.append(sample["label"])

#     inputs = tokenizer(
#         [sequence] * len(candidate_labels),
#         [f"This example is {label}." for label in candidate_labels],
#         return_tensors='pt',
#         padding=True,
#         truncation=True
#     )

#     # Perform inference
#     with torch.no_grad():
#         outputs = model(**inputs)
#         logits = outputs.logits

#     # Extract entailment scores
#     entailment_logits = logits[:, 2]  # Index 2 corresponds to entailment
#     probs = torch.softmax(entailment_logits, dim=0)
#     label = candidate_labels[probs.argmax().item()]

#     predicted_labels.append(label)

# emission = tracker.stop()
# print(tracker.final_emissions)

# # # Display the results
# # for label, score in zip(candidate_labels, probs):
# #     print(f"{label}: {score:.4f}")

In [None]:
results

{'model_name': 'Qwen/Qwen2.5-32B-Instruct',
 'batch_size': 1,
 'epochs': 'N/A',
 'location': 'SGP',
 'energy_consumed_kwh': 0.04142341707571259,
 'carbon_emissions_kgco2': 0.0195014405611552,
 'cpu_energy_kwh': 0.013099032626220825,
 'gpu_energy_kwh': 0.022160576895113995,
 'ram_energy_kwh': 0.006163807554377795,
 'hardware': [RAM(),
  CPU(Intel(R) Xeon(R) CPU @ 2.20GHz > 85W [generic]),
  GPU() (NVIDIA L4)],
 'training_time': 1109.9072978549998}

In [None]:
import re

cleaned_pred_labels = []

for label in predicted_labels:
    match = re.search(r'\b\w+_\w+\b', label)

    if match:
        cleaned_pred_labels.append(match.group(0))

In [None]:
from sklearn.metrics import accuracy_score, classification_report

# Evaluate model performance
accuracy = accuracy_score(true_labels, cleaned_pred_labels)
print(f"Accuracy: {accuracy:.4f}")

report = classification_report(true_labels, cleaned_pred_labels)
print("Classification Report:")
print(report)

f1_score = f1_score(true_labels, cleaned_pred_labels, average="macro")
print(f"F1 Score: {f1_score:.4f}")

Accuracy: 0.6050
Classification Report:
                               precision    recall  f1-score   support

          fossil_fuels_needed       0.31      1.00      0.48        10
                      not_bad       0.67      0.32      0.43        19
                not_happening       0.83      0.76      0.79        25
                    not_human       0.26      0.35      0.30        17
                 not_relevant       0.77      0.68      0.72        44
            proponents_biased       0.93      0.45      0.61        31
           science_unreliable       0.56      0.78      0.65        32
solutions_harmful_unnecessary       0.79      0.50      0.61        22

                     accuracy                           0.60       200
                    macro avg       0.64      0.61      0.57       200
                 weighted avg       0.69      0.60      0.61       200



In [None]:
results = {
        "model_name": model_id,
        "batch_size": 1,
        "epochs": 'N/A',
        'location': tracker._geo.country_iso_code,
        "energy_consumed_kwh": tracker._total_energy.kWh,
        "carbon_emissions_kgco2": emissions,
        'cpu_energy_kwh': tracker._total_cpu_energy.kWh,
        'gpu_energy_kwh': tracker._total_gpu_energy.kWh,
        'ram_energy_kwh': tracker._total_ram_energy.kWh,
        'hardware': tracker._hardware,
        "training_time": tracker._last_measured_time - tracker._start_time,
        'accuracy': accuracy,
        'f1_score': f1,
    }