# Train and Finetune ML and BERT models

## Set up

In [None]:
!pip install -U datasets
!pip install codecarbon
!pip install contractions



In [None]:
from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
from datetime import datetime
import pandas as pd
import numpy as np
import torch
from torch import nn
from transformers import (AutoTokenizer,
                          AutoModelForSequenceClassification,
                          AutoModel,
                          BertTokenizer,
                          BertModel,
                          DistilBertModel,
                          RobertaModel,
                          DebertaModel,
                          DebertaV2Model)
from huggingface_hub import (
    PyTorchModelHubMixin,
    notebook_login,
    ModelCard,
    ModelCardData,
    EvalResult,
)
from datasets import DatasetDict, load_dataset
from torch.utils.data import Dataset, DataLoader

from codecarbon import (
    EmissionsTracker,
    track_emissions,
)



In [None]:
notebook_login(new_session=False)

## Create custom dataset class

In [None]:
# Defining the TextDataset class

class TextDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_length=256):
        self.encodings = tokenizer(
            texts,
            truncation=True,
            padding=True,
            max_length=max_length,
            return_tensors="pt",
        )
        self.labels = torch.tensor([int(l[0]) for l in labels])

    def __getitem__(self, idx):
        item = {key: val[idx] for key, val in self.encodings.items()}
        item["labels"] = self.labels[idx]
        return item

    def __len__(self) -> int:
        return len(self.labels)

## Define text preprocessing helper function

This function is used to preprocess text as part of the classical NLP pipeline

In [None]:
import nltk
from nltk.stem import WordNetLemmatizer
import contractions
import re

nltk.download('punkt_tab')
nltk.download('wordnet')

def preprocess(X):
    lemmatizer  = WordNetLemmatizer()
    preprocessed_texts = []
    for doc in X:
        # Expand contractions
        expanded = contractions.fix(doc)
        # Remove special characters
        expanded = re.sub(r"[^a-zA-Z0-9]", " ", expanded)
        # Lowercase
        lowered = expanded.lower()
        # Tokenize and lemmatize
        lemmatized = " ".join([lemmatizer.lemmatize(word) for word in nltk.word_tokenize(lowered)])
        preprocessed_texts.append(lemmatized)
    return preprocessed_texts

[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


## Model set up

In [None]:
from xgboost import XGBClassifier
from sklearn.pipeline import Pipeline
from sklearn.metrics import classification_report
from sklearn.feature_extraction.text import TfidfVectorizer

In [None]:
class TransformerClassifier(nn.Module, PyTorchModelHubMixin):
    def __init__(self, num_labels=8,
                 model_name="bert-base-uncased",
                 dropout_rate=0.05):
        super().__init__()
        self.model_name = model_name

        # Load appropriate transformer backbone
        if model_name.startswith("distilbert"):
            self.backbone = DistilBertModel.from_pretrained(model_name)
        elif model_name.startswith("distilroberta"):
            self.backbone = DistilBertModel.from_pretrained(model_name)
        elif model_name.startswith("roberta"):
            self.backbone = RobertaModel.from_pretrained(model_name)
        elif "deberta-v2" in model_name or "deberta-v3" in model_name:
            self.backbone = DebertaV2Model.from_pretrained(model_name)
        elif model_name.startswith("bert"):
            self.backbone = BertModel.from_pretrained(model_name)
        else:
            raise ValueError(f"Unsupported model: {model_name}")

        self.config = self.backbone.config
        self.config.num_labels = num_labels
        self.dropout = nn.Dropout(dropout_rate)
        self.classifier = nn.Linear(self.config.hidden_size, num_labels)

    def forward(self, input_ids, attention_mask):
        outputs = self.backbone(input_ids=input_ids, attention_mask=attention_mask)

        # Get [CLS] representation
        if hasattr(outputs, "pooler_output") and outputs.pooler_output is not None:
            pooled_output = outputs.pooler_output
        else:
            pooled_output = outputs.last_hidden_state[:, 0, :]  # [CLS] token

        pooled_output = self.dropout(pooled_output)
        logits = self.classifier(pooled_output)
        return logits


class TransformerFreezeClassifier(nn.Module, PyTorchModelHubMixin):
    """
    A transformer-based classifier with optional frozen layers.
    Supports BERT, DistilBERT, RoBERTa, and DeBERTa(v1/v2).
    """
    def __init__(self, num_labels=8,
                 model_name="bert-base-uncased",
                 dropout_rate=0.05,
                 freeze_layers=10):
        super().__init__()
        self.model_name = model_name

        # Load the appropriate transformer backbone
        if model_name.startswith("distilbert"):
            self.backbone = DistilBertModel.from_pretrained(model_name)
        elif model_name.startswith("roberta"):
            self.backbone = RobertaModel.from_pretrained(model_name)
        elif "deberta-v2" in model_name or "deberta-v3" in model_name:
            self.backbone = DebertaV2Model.from_pretrained(model_name)
        elif model_name.startswith("bert"):
            self.backbone = BertModel.from_pretrained(model_name)
        else:
            raise ValueError(f"Unsupported model: {model_name}")

        self.config = self.backbone.config
        self.config.num_labels = num_labels

        self.dropout = nn.Dropout(dropout_rate)
        self.classifier = nn.Linear(self.config.hidden_size, num_labels)

        # Freeze layers as requested
        self.freeze_model_layers(freeze_layers)

    def freeze_model_layers(self, freeze_layers):
        """
        Freeze the embedding and first N encoder layers.
        """
        backbone_type = type(self.backbone).__name__.lower()

        if "distilbert" in backbone_type:
            encoder_prefix = "transformer.layer"
        elif "bert" in backbone_type or "roberta" in backbone_type:
            encoder_prefix = "encoder.layer"
        elif "deberta" in backbone_type:
            encoder_prefix = "encoder.layer"
        else:
            raise ValueError("Unknown backbone type for freezing.")

        for name, param in self.backbone.named_parameters():
            if name.startswith("embeddings") or any(
                name.startswith(f"{encoder_prefix}.{i}") for i in range(freeze_layers)
            ):
                param.requires_grad = False

    def forward(self, input_ids, attention_mask):
        outputs = self.backbone(input_ids=input_ids, attention_mask=attention_mask)

        # Handle pooled output across different model types
        if hasattr(outputs, "pooler_output") and outputs.pooler_output is not None:
            pooled_output = outputs.pooler_output
        else:
            pooled_output = outputs.last_hidden_state[:, 0, :]  # Use [CLS] token

        pooled_output = self.dropout(pooled_output)
        logits = self.classifier(pooled_output)
        return logits


class TfidfXGBClassifier():
    def __init__(self, num_labels=8):
        self.xgb_pipeline = Pipeline([
            # TF-IDF vectorization
            ("tfidf", TfidfVectorizer(max_features=2000, min_df=2, ngram_range=(1, 3))),
            ("classifier", XGBClassifier(objective="multi:softmax",
                                        eval_metric='merror',
                                        num_class=num_labels,
                                        random_state=42))  # XGBoost
        ])

    def fit(self, X, y):
        self.xgb_pipeline.fit(X, y)
        return self

    def predict(self, X):
        return self.xgb_pipeline.predict(X)


class ConspiracyClassifier768(
    nn.Module,
    PyTorchModelHubMixin
):
    def __init__(self, num_classes=8):
        super().__init__()
        self.h1 = nn.Linear(768, 512)
        self.bn1 = nn.BatchNorm1d(512)

        self.h2 = nn.Linear(512, 512)
        self.bn2 = nn.BatchNorm1d(512)

        self.h3 = nn.Linear(512, 256)
        self.bn3 = nn.BatchNorm1d(256)

        self.h4 = nn.Linear(256, 128)
        self.bn4 = nn.BatchNorm1d(128)

        self.h5 = nn.Linear(128, num_classes)
        self.dropout = nn.Dropout(0.2)
        self.activation = nn.ReLU()

        self.initialize_weights()


    def forward(self, input_texts):
        outputs = self.h1(input_texts)
        outputs = self.bn1(outputs)
        outputs = self.activation(outputs)
        outputs = self.h2(outputs)
        outputs = self.bn2(outputs)
        outputs = self.activation(outputs)
        outputs = self.h3(outputs)
        outputs = self.bn3(outputs)
        outputs = self.activation(outputs)
        outputs = self.dropout(outputs)
        outputs = self.h4(outputs)
        outputs = self.bn4(outputs)
        outputs = self.activation(outputs)
        outputs = self.h5(outputs)

        return outputs

    def initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Linear):
                nn.init.kaiming_normal_(m.weight, nonlinearity="relu")
                if m.bias is not None:
                    nn.init.zeros_(m.bias)

### Train XGB

In [None]:
class TfidfXGBTrainer():
    def __init__(self, model, dataset):
        self.model = model
        self.dataset = dataset
        self.train_texts, self.train_labels, self.val_texts, self.val_labels = self.prepare_dataset_tfidf()

    def prepare_dataset_tfidf(self):
        print("Loading dataset...")
        # Load the dataset
        dataset = self.dataset

        # Split dataset
        train_texts = dataset["train"]["quote"]
        train_labels = [int(l[0]) for l in dataset["train"]["label"]]
        val_texts = dataset["test"]["quote"]
        val_labels = [int(l[0]) for l in dataset["test"]["label"]]

        train_texts_preprocessed = preprocess(train_texts)
        val_texts_preprocessed = preprocess(val_texts)

        print("Dataset loaded and dataloaders created.")
        return train_texts_preprocessed, train_labels, val_texts_preprocessed, val_labels

    def train(self):
        print("Training XGBoost model...")
        self.model.fit(self.train_texts, self.train_labels)
        print("XGBoost model trained.")


    def evaluate(self):
        print("Evaluating XGBoost model...")
        predictions = self.model.predict(self.val_texts)
        accuracy = accuracy_score(self.val_labels, predictions)
        f1 = f1_score(self.val_labels, predictions, average="weighted")
        print(f"Accuracy: {accuracy:.4f}, F1 Score: {f1:.4f}")
        return accuracy, f1


In [None]:
class BertTrainer():
    def __init__(self, model, tokenizer, dataset, batch_size=32, epochs=3, test=False):
        self.model = model
        self.tokenizer = AutoTokenizer.from_pretrained(tokenizer)
        self.embedding_model = AutoModel.from_pretrained(tokenizer)
        self.dataset = dataset
        self.batch_size = batch_size
        self.epochs = epochs
        self.training_progress = []
        self.test = test

        self.train_dataloader, self.val_dataloader = self.prepare_dataset_bert()

        if torch.backends.mps.is_available():
            self.device = torch.device("mps")
            torch.mps.empty_cache()
        elif torch.cuda.is_available():
            self.device = torch.device("cuda")
        else:
            self.device = torch.device("cpu")

        self.model.to(self.device)
        self.embedding_model.to(self.device)


    def prepare_dataset_bert(self):
        print("Loading dataset...")
        # Load the dataset
        dataset = self.dataset

        # Split dataset
        train_texts = dataset["train"]["quote"]
        train_labels = dataset["train"]["label"]
        val_texts = dataset["test"]["quote"]
        val_labels = dataset["test"]["label"]


        # Create the datasets
        train_dataset = TextDataset(train_texts, train_labels, self.tokenizer)
        val_dataset = TextDataset(val_texts, val_labels, self.tokenizer)

        # Create the dataloaders
        train_dataloader = DataLoader(train_dataset, batch_size=self.batch_size, shuffle=True)
        val_dataloader = DataLoader(val_dataset, batch_size=self.batch_size, shuffle=False)

        print("Dataset loaded and dataloaders created.")
        return train_dataloader, val_dataloader


    def custom_print_time(self, additional_text):
        """
        Custom function to print the current time with a message.
        """
        time_str = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        print(f"{time_str} {additional_text}")


    def model_metrics(self, model, dataloader):
        criterion = nn.CrossEntropyLoss()
        model.eval()
        with torch.no_grad():
            total_loss = 0
            total_correct = 0
            total_length = 0
            for batch in dataloader:
                input_ids = batch["input_ids"].to(self.device)
                attention_mask = batch["attention_mask"].to(self.device)
                labels = batch["labels"].to(self.device)

                if isinstance(self.model, ConspiracyClassifier768):
                    embeddings = self.get_embeddings(input_ids, attention_mask, self.embedding_model)
                    outputs = self.model(embeddings)
                else:
                    outputs = model(input_ids=input_ids, attention_mask=attention_mask)

                loss = criterion(outputs, labels)

                predicted = torch.argmax(outputs, 1).cpu().numpy()
                labels = labels.cpu().numpy()

                # Calculate accuracy
                total_correct += (predicted == labels).sum().item()
                total_length += len(labels)
                total_loss += loss.item()

            average_loss = total_loss / len(dataloader)
            average_acc = total_correct / total_length

        model.train()
        return average_loss, average_acc

    def eval_model_status(self, epoch):
        train_loss, train_acc = self.model_metrics(self.model, self.train_dataloader)
        test_loss, test_acc = self.model_metrics(self.model, self.val_dataloader)
        loss_str = f"Loss: Train {train_loss:0.3f}, Test {test_loss:0.3f}"
        acc_str = f"Acc: Train {train_acc:0.3f}, Test {test_acc:0.3f}"
        self.custom_print_time(f"Epoch {epoch+1:2}/{self.epochs} done. {loss_str}; and {acc_str}")
        metrics = dict(
            epoch=epoch,
            train_loss=train_loss,
            train_acc=train_acc,
            test_loss=test_loss,
            test_acc=test_acc,
        )
        return metrics

    def get_embeddings(self, input_ids, attention_mask, embedding_model):
        with torch.no_grad():
            embeddings = embedding_model(input_ids=input_ids, attention_mask=attention_mask).pooler_output
        return embeddings

    def train_model(self, lr=2e-5):
        # Define the optimizer
        optimizer = torch.optim.AdamW(self.model.parameters(), lr)
        criterion = nn.CrossEntropyLoss()

        self.model.train()

        print("Training the model...")
        # Training loop
        if not self.test:
            for epoch in range(self.epochs):
                total_loss = 0
                self.custom_print_time(f"Epoch {epoch + 1}/{self.epochs}")
                for batch in self.train_dataloader:
                    input_ids = batch["input_ids"].to(self.device)
                    attention_mask = batch["attention_mask"].to(self.device)
                    labels = batch["labels"].to(self.device)

                    if isinstance(self.model, ConspiracyClassifier768):
                        embeddings = self.get_embeddings(input_ids, attention_mask, self.embedding_model)
                        outputs = self.model(embeddings)
                    else:
                        outputs = self.model(input_ids=input_ids, attention_mask=attention_mask)

                    loss = criterion(outputs, labels)

                    loss.backward()
                    optimizer.step()
                    optimizer.zero_grad()

                eval_metrics = self.eval_model_status(epoch)
                self.training_progress.append(eval_metrics)
                total_loss += loss.item()

        else:
            print("Test mode enabled, skipping training.")
            eval_metrics = self.eval_model_status(0)
            self.training_progress.append(eval_metrics)

        print("Training complete.")

## Model Config

In [None]:
# Configuration of models to be experimented with

model_configs = {
    # "tfidf-xgboost": {
    #     "name": "TFIDF + XGBoost",
    #     "tokenizer": "tfidf",
    #     "model": TfidfXGBClassifier()
    # },
    # "bert-base-uncased": {
    #     "name": "BERT Base Uncased",
    #     "tokenizer": "bert-base-uncased",
    #     "model": TransformerClassifier(model_name="bert-base-uncased"),
    # },
    # "bert-base-uncased-freeze-10": {
    #     "name": "BERT Base Uncased with 10 Frozen Layers",
    #     "tokenizer": "bert-base-uncased",
    #     "model": TransformerFreezeClassifier(model_name="bert-base-uncased", freeze_layers=10),
    # },
    #     "bert-base-uncased-freeze-8": {
    #     "name": "BERT Base Uncased with 8 Frozen Layers",
    #     "tokenizer": "bert-base-uncased",
    #     "model": TransformerFreezeClassifier(model_name="bert-base-uncased", freeze_layers=8),
    # },
    # "distilbert-base-uncased": {
    #     "name": "DistilBERT",
    #     "tokenizer": "distilbert-base-uncased",
    #     "model": TransformerClassifier(model_name="distilbert-base-uncased"),
    # },
    # "roberta-base": {
    #     "name": "RoBERTa Base",
    #     "tokenizer": "roberta-base",
    #     "model": TransformerClassifier(model_name="roberta-base"),
    # },
    # "roberta-base-freeze": {
    #     "name": "RoBERTa Base with 8 frozen layers",
    #     "tokenizer": "roberta-base",
    #     "model": TransformerFreezeClassifier(model_name="roberta-base", freeze_layers=8),
    # },
    "roberta-base-freeze": {
        "name": "RoBERTa Base with 10 frozen layers",
        "tokenizer": "roberta-base",
        "model": TransformerFreezeClassifier(model_name="roberta-base", freeze_layers=10),
    },
    # "distilroberta-base": {
    #     "name": "DistilRoBERTa Base",
    #     "tokenizer": "distilroberta-base",
    #     "model": TransformerClassifier(model_name="distilroberta-base"),
    # },
    # 'deberta-v3-base': {
    #     "name": "DeBERTa v3 base",
    #     "tokenizer": "microsoft/deberta-v3-base",
    #     "model": TransformerClassifier(model_name="microsoft/deberta-v3-base"),
    # },
    # 'deberta-v3-base-freeze': {
    #     "name": "DeBERTa v3 base with 8 frozen layers",
    #     "tokenizer": "microsoft/deberta-v3-base",
    #     "model": TransformerFreezeClassifier(model_name="microsoft/deberta-v3-base", freeze_layers=8),
    # },
    'deberta-v3-base-freeze': {
        "name": "DeBERTa v3 base with 10 frozen layers",
        "tokenizer": "microsoft/deberta-v3-base",
        "model": TransformerFreezeClassifier(model_name="microsoft/deberta-v3-base", freeze_layers=10),
    },
    # "conspiracy-768": {
    #     "name": "custom MLP 768 model",
    #     "tokenizer": "sentence-transformers/all-distilroberta-v1",
    #     "model": ConspiracyClassifier768(),
    # },
}

models = model_configs

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


# Measure carbon

## Measure training carbon

In [None]:
from codecarbon import EmissionsTracker
import time

In [None]:
from sklearn.metrics import accuracy_score, f1_score

def evaluate_model_performance(model, device, dataloader):
    """
    Evaluate the performance of a trained model on a given dataloader.

    Args:
        model: Trained PyTorch model.
        dataloader: DataLoader containing the evaluation dataset.

    Returns:
        int: Accuracy of the model on the evaluation dataset.
        int: F1 score of the model on the evaluation dataset.
    """
    model.eval()
    all_predictions = []
    all_labels = []

    with torch.no_grad():
        for batch in dataloader:
            input_ids = batch["input_ids"].to(device)
            attention_mask = batch["attention_mask"].to(device)
            labels = batch["labels"].to(device)

            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            predictions = torch.argmax(outputs, dim=1).cpu().numpy()
            all_predictions.extend(predictions)
            all_labels.extend(labels.cpu().numpy())

    accuracy = accuracy_score(all_labels, all_predictions)
    f1 = f1_score(all_labels, all_predictions, average="weighted")

    return accuracy, f1

In [None]:
from sklearn.metrics import accuracy_score, f1_score

def evaluate_model_performance_embed(model, embedding_model, device, dataloader):
    """
    Evaluate the performance of a trained model that requires a separate embedding model.

    Args:
        model: Trained PyTorch model.
        dataloader: DataLoader containing the evaluation dataset.

    Returns:
        int: Accuracy of the model on the evaluation dataset.
        int: F1 score of the model on the evaluation dataset.
    """
    model.eval()
    all_predictions = []
    all_labels = []

    with torch.no_grad():
        for batch in dataloader:

            input_ids = batch["input_ids"].to(device)
            attention_mask = batch["attention_mask"].to(device)
            labels = batch["labels"].to(device)

            embeddings = embedding_model(input_ids=input_ids, attention_mask=attention_mask).pooler_output.to(device)
            outputs = model(embeddings)

            predictions = torch.argmax(outputs, dim=1).cpu().numpy()
            all_predictions.extend(predictions)
            all_labels.extend(labels.cpu().numpy())

    accuracy = accuracy_score(all_labels, all_predictions)
    f1 = f1_score(all_labels, all_predictions, average="weighted")

    return accuracy, f1

In [None]:
# Run models and measure carbon footprint
def train_eval_model_with_tracking(model_config, dataset, batch_size=32, epochs=3, test_mode=False, lr=2e-5):
    """
    Train and evaluate a model while tracking its carbon footprint.

    Args:
        model: The model to be trained.
        tokenizer: The tokenizer to be used with the model.
        dataset: The dataset to be used for training and evaluation.
        batch_size: The batch size for training.
        epochs: The number of epochs for training.
        test_mode: If True, the model will not be trained, only evaluated.

    Returns:
        dict: A dictionary containing the model, energy consumed, carbon emissions, hardware used, training time, accuracy, and F1 score.
    """

    # Train NN models

    if 'tfidf' in model_config['name'].lower():
        training_run = TfidfXGBTrainer(
            model=model_config["model"],
            dataset=dataset,
        )
        start_time = time.time()
        tracker = EmissionsTracker(project_name="FrugalAI_model_carbon",
                                measure_power_secs=10)
        tracker.start()

        training_run.train()


        end_time = time.time()
        emissions = tracker.stop()

        # Evaluate the model
        accuracy, f1 = training_run.evaluate()

        batch_size = 'N/A'
        epochs = 'N/A'

    else:
        training_run = BertTrainer(
            model=model_config["model"],
            tokenizer=model_config["tokenizer"],
            dataset=dataset,
            batch_size=batch_size,
            epochs=epochs,
            test=test_mode
        )

        start_time = time.time()
        tracker = EmissionsTracker(project_name="FrugalAI_model_carbon",
                                measure_power_secs=10)
        tracker.start()

        training_run.train_model(lr=lr)

        end_time = time.time()
        emissions = tracker.stop()


        if isinstance(training_run.model, ConspiracyClassifier768):
            accuracy, f1 = evaluate_model_performance_embed(
                training_run.model, training_run.embedding_model, training_run.device, training_run.val_dataloader
            )

        else:
            accuracy, f1 = evaluate_model_performance(
                    training_run.model, training_run.device, training_run.val_dataloader
                )

    results = {
        "model_name": model_config['name'],
        "batch_size": batch_size,
        "epochs": epochs,
        'location': tracker._geo.country_iso_code,
        "energy_consumed_kwh": tracker._total_energy.kWh,
        "carbon_emissions_kgco2": emissions,
        'cpu_energy_kwh': tracker._total_cpu_energy.kWh,
        'gpu_energy_kwh': tracker._total_gpu_energy.kWh,
        'ram_energy_kwh': tracker._total_ram_energy.kWh,
        'hardware': tracker._hardware,
        "training_time": end_time - start_time,
        'accuracy': accuracy,
        'f1_score': f1,
    }


    return training_run.model, results

In [None]:
# trained_models = {}
# results_list = []

# dataset = load_dataset("quotaclimat/frugalaichallenge-text-train")

# for model_name, model_config in models.items():
#     print(f"Training {model_name}")

#     model, results = train_eval_model_with_tracking(
#         model_config=model_config,
#         dataset=dataset,
#         batch_size=32,
#         epochs=3,
#     )

#     trained_models[model_name] = model
#     results_list.append(results)
#     print(f"Results for {model_name}: {results}")

In [None]:
import pandas as pd
import os
from datetime import datetime
from tqdm.notebook import tqdm

trained_models = {}
results_list = []

dataset = load_dataset("quotaclimat/frugalaichallenge-text-train")

# Create a directory to store the CSV files if it doesn't exist
timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
output_dir = f"/content/drive/MyDrive/Colab Notebooks/FrugalAI/results/train_results_{timestamp}"
os.makedirs(output_dir, exist_ok=True)

# Define the CSV filename for all results
all_results_filename = os.path.join(output_dir, "running_results.csv")

# Create or open the CSV file in append mode
csv_file_exists = os.path.exists(all_results_filename)

for model_name, model_config in tqdm(models.items()):
    print(f"Training {model_name}")

    model, results = train_eval_model_with_tracking(
        model_config=model_config,
        dataset=dataset,
        batch_size=32,
        epochs=5,
        lr=2e-5
    )

    trained_models[model_name] = model
    results_list.append(results)
    print(f"Results for {model_name}: {results}")

    # Convert results to DataFrame and append to CSV
    results_df = pd.DataFrame([results])  # Create DataFrame for current results
    results_df.to_csv(all_results_filename, mode='a', header=not csv_file_exists, index=False)  # Append to CSV
    csv_file_exists = True  # Set to True after the first write
    print(f"Results for {model_name} appended to {all_results_filename}")

# Save all results to a single CSV file
all_results_df = pd.DataFrame(results_list)
all_results_filename = os.path.join(output_dir, f"{timestamp}_final_results.csv")
all_results_df.to_csv(all_results_filename, index=False)
print(f"All results saved to {all_results_filename}")

  0%|          | 0/2 [00:00<?, ?it/s]

Training roberta-base-freeze


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Loading dataset...
Dataset loaded and dataloaders created.


[codecarbon INFO @ 13:12:08] [setup] RAM Tracking...
[codecarbon INFO @ 13:12:08] [setup] CPU Tracking...
 Linux OS detected: Please ensure RAPL files exist at /sys/class/powercap/intel-rapl/subsystem to measure CPU

[codecarbon INFO @ 13:12:09] CPU Model on constant consumption mode: Intel(R) Xeon(R) CPU @ 2.20GHz
[codecarbon INFO @ 13:12:09] [setup] GPU Tracking...
[codecarbon INFO @ 13:12:09] Tracking Nvidia GPU via pynvml
[codecarbon INFO @ 13:12:09] The below tracking methods have been set up:
                RAM Tracking Method: RAM power estimation model
                CPU Tracking Method: global constant
                GPU Tracking Method: pynvml
            
[codecarbon INFO @ 13:12:09] >>> Tracker's metadata:
[codecarbon INFO @ 13:12:09]   Platform system: Linux-6.1.123+-x86_64-with-glibc2.35
[codecarbon INFO @ 13:12:09]   Python version: 3.11.12
[codecarbon INFO @ 13:12:09]   CodeCarbon version: 3.0.1
[codecarbon INFO @ 13:12:09]   Available RAM : 52.960 GB
[codecarbon INF

Training the model...
2025-05-16 13:12:09 Epoch 1/5


[codecarbon INFO @ 13:12:19] Energy consumed for RAM : 0.000056 kWh. RAM Power : 20.0 W
[codecarbon INFO @ 13:12:19] Delta energy consumed for CPU with constant : 0.000118 kWh, power : 42.5 W
[codecarbon INFO @ 13:12:19] Energy consumed for All CPU : 0.000118 kWh
[codecarbon INFO @ 13:12:19] Energy consumed for all GPUs : 0.000188 kWh. Total GPU Power : 67.75860648294241 W
[codecarbon INFO @ 13:12:19] 0.000362 kWh of electricity used since the beginning.
[codecarbon INFO @ 13:12:29] Energy consumed for RAM : 0.000111 kWh. RAM Power : 20.0 W
[codecarbon INFO @ 13:12:29] Delta energy consumed for CPU with constant : 0.000118 kWh, power : 42.5 W
[codecarbon INFO @ 13:12:29] Energy consumed for All CPU : 0.000236 kWh
[codecarbon INFO @ 13:12:29] Energy consumed for all GPUs : 0.000388 kWh. Total GPU Power : 71.95427449268223 W
[codecarbon INFO @ 13:12:29] 0.000735 kWh of electricity used since the beginning.
[codecarbon INFO @ 13:12:39] Energy consumed for RAM : 0.000167 kWh. RAM Power : 2

2025-05-16 13:13:14 Epoch  1/5 done. Loss: Train 1.967, Test 1.997; and Acc: Train 0.269, Test 0.252
2025-05-16 13:13:14 Epoch 2/5


[codecarbon INFO @ 13:13:19] Energy consumed for RAM : 0.000389 kWh. RAM Power : 20.0 W
[codecarbon INFO @ 13:13:19] Delta energy consumed for CPU with constant : 0.000118 kWh, power : 42.5 W
[codecarbon INFO @ 13:13:19] Energy consumed for All CPU : 0.000826 kWh
[codecarbon INFO @ 13:13:19] Energy consumed for all GPUs : 0.001389 kWh. Total GPU Power : 71.9391045042819 W
[codecarbon INFO @ 13:13:19] 0.002604 kWh of electricity used since the beginning.
[codecarbon INFO @ 13:13:29] Energy consumed for RAM : 0.000444 kWh. RAM Power : 20.0 W
[codecarbon INFO @ 13:13:29] Delta energy consumed for CPU with constant : 0.000118 kWh, power : 42.5 W
[codecarbon INFO @ 13:13:29] Energy consumed for All CPU : 0.000944 kWh
[codecarbon INFO @ 13:13:29] Energy consumed for all GPUs : 0.001589 kWh. Total GPU Power : 71.94025017409693 W
[codecarbon INFO @ 13:13:29] 0.002977 kWh of electricity used since the beginning.
[codecarbon INFO @ 13:13:29] 0.017518 g.CO2eq/s mean an estimation of 552.460523764

2025-05-16 13:14:21 Epoch  2/5 done. Loss: Train 1.943, Test 1.982; and Acc: Train 0.269, Test 0.252
2025-05-16 13:14:21 Epoch 3/5


[codecarbon INFO @ 13:14:29] Energy consumed for RAM : 0.000777 kWh. RAM Power : 20.0 W
[codecarbon INFO @ 13:14:29] Delta energy consumed for CPU with constant : 0.000118 kWh, power : 42.5 W
[codecarbon INFO @ 13:14:29] Energy consumed for All CPU : 0.001652 kWh
[codecarbon INFO @ 13:14:29] Energy consumed for all GPUs : 0.002788 kWh. Total GPU Power : 71.97397949009832 W
[codecarbon INFO @ 13:14:29] 0.005218 kWh of electricity used since the beginning.
[codecarbon INFO @ 13:14:39] Energy consumed for RAM : 0.000833 kWh. RAM Power : 20.0 W
[codecarbon INFO @ 13:14:39] Delta energy consumed for CPU with constant : 0.000118 kWh, power : 42.5 W
[codecarbon INFO @ 13:14:39] Energy consumed for All CPU : 0.001770 kWh
[codecarbon INFO @ 13:14:39] Energy consumed for all GPUs : 0.002988 kWh. Total GPU Power : 71.96602851019304 W
[codecarbon INFO @ 13:14:39] 0.005591 kWh of electricity used since the beginning.
[codecarbon INFO @ 13:14:49] Energy consumed for RAM : 0.000888 kWh. RAM Power : 2

2025-05-16 13:15:28 Epoch  3/5 done. Loss: Train 1.929, Test 1.970; and Acc: Train 0.269, Test 0.252
2025-05-16 13:15:28 Epoch 4/5


[codecarbon INFO @ 13:15:29] Energy consumed for RAM : 0.001111 kWh. RAM Power : 20.0 W
[codecarbon INFO @ 13:15:29] Delta energy consumed for CPU with constant : 0.000118 kWh, power : 42.5 W
[codecarbon INFO @ 13:15:29] Energy consumed for All CPU : 0.002360 kWh
[codecarbon INFO @ 13:15:29] Energy consumed for all GPUs : 0.003987 kWh. Total GPU Power : 71.93062708313823 W
[codecarbon INFO @ 13:15:29] 0.007457 kWh of electricity used since the beginning.
[codecarbon INFO @ 13:15:39] Energy consumed for RAM : 0.001166 kWh. RAM Power : 20.0 W
[codecarbon INFO @ 13:15:39] Delta energy consumed for CPU with constant : 0.000118 kWh, power : 42.5 W
[codecarbon INFO @ 13:15:39] Energy consumed for All CPU : 0.002478 kWh
[codecarbon INFO @ 13:15:39] Energy consumed for all GPUs : 0.004186 kWh. Total GPU Power : 71.96207848515341 W
[codecarbon INFO @ 13:15:39] 0.007831 kWh of electricity used since the beginning.
[codecarbon INFO @ 13:15:49] Energy consumed for RAM : 0.001222 kWh. RAM Power : 2

2025-05-16 13:16:35 Epoch  4/5 done. Loss: Train 1.916, Test 1.959; and Acc: Train 0.269, Test 0.252
2025-05-16 13:16:35 Epoch 5/5


[codecarbon INFO @ 13:16:39] Energy consumed for RAM : 0.001499 kWh. RAM Power : 20.0 W
[codecarbon INFO @ 13:16:39] Delta energy consumed for CPU with constant : 0.000118 kWh, power : 42.5 W
[codecarbon INFO @ 13:16:39] Energy consumed for All CPU : 0.003186 kWh
[codecarbon INFO @ 13:16:39] Energy consumed for all GPUs : 0.005385 kWh. Total GPU Power : 71.95186664829232 W
[codecarbon INFO @ 13:16:39] 0.010071 kWh of electricity used since the beginning.
[codecarbon INFO @ 13:16:49] Energy consumed for RAM : 0.001555 kWh. RAM Power : 20.0 W
[codecarbon INFO @ 13:16:49] Delta energy consumed for CPU with constant : 0.000118 kWh, power : 42.5 W
[codecarbon INFO @ 13:16:49] Energy consumed for All CPU : 0.003304 kWh
[codecarbon INFO @ 13:16:49] Energy consumed for all GPUs : 0.005585 kWh. Total GPU Power : 71.98227861519253 W
[codecarbon INFO @ 13:16:49] 0.010444 kWh of electricity used since the beginning.
[codecarbon INFO @ 13:16:59] Energy consumed for RAM : 0.001610 kWh. RAM Power : 2

2025-05-16 13:17:43 Epoch  5/5 done. Loss: Train 1.903, Test 1.947; and Acc: Train 0.269, Test 0.252
Training complete.
Results for roberta-base-freeze: {'model_name': 'RoBERTa Base with 8 frozen layers', 'batch_size': 32, 'epochs': 5, 'location': 'SGP', 'energy_consumed_kwh': 0.012443777693952045, 'carbon_emissions_kgco2': 0.005858318994091826, 'cpu_energy_kwh': 0.003935813959234715, 'gpu_energy_kwh': 0.006656020047034, 'ram_energy_kwh': 0.0018519436876833302, 'hardware': [RAM(), CPU(Intel(R) Xeon(R) CPU @ 2.20GHz > 85W [generic]), GPU() (NVIDIA L4)], 'training_time': 335.02156925201416, 'accuracy': 0.25184577522559476, 'f1_score': 0.10133244167006238}
Results for roberta-base-freeze appended to /content/drive/MyDrive/Colab Notebooks/FrugalAI/results/train_results_20250516131155/running_results.csv
Training deberta-v3-base-freeze


tokenizer_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

spm.model:   0%|          | 0.00/2.46M [00:00<?, ?B/s]



Loading dataset...
Dataset loaded and dataloaders created.


[codecarbon INFO @ 13:17:56] [setup] RAM Tracking...
[codecarbon INFO @ 13:17:56] [setup] CPU Tracking...
 Linux OS detected: Please ensure RAPL files exist at /sys/class/powercap/intel-rapl/subsystem to measure CPU

[codecarbon INFO @ 13:17:57] CPU Model on constant consumption mode: Intel(R) Xeon(R) CPU @ 2.20GHz
[codecarbon INFO @ 13:17:57] [setup] GPU Tracking...
[codecarbon INFO @ 13:17:57] Tracking Nvidia GPU via pynvml
[codecarbon INFO @ 13:17:57] The below tracking methods have been set up:
                RAM Tracking Method: RAM power estimation model
                CPU Tracking Method: global constant
                GPU Tracking Method: pynvml
            
[codecarbon INFO @ 13:17:57] >>> Tracker's metadata:
[codecarbon INFO @ 13:17:57]   Platform system: Linux-6.1.123+-x86_64-with-glibc2.35
[codecarbon INFO @ 13:17:57]   Python version: 3.11.12
[codecarbon INFO @ 13:17:57]   CodeCarbon version: 3.0.1
[codecarbon INFO @ 13:17:57]   Available RAM : 52.960 GB
[codecarbon INF

Training the model...
2025-05-16 13:17:57 Epoch 1/5


[codecarbon INFO @ 13:18:07] Energy consumed for RAM : 0.000056 kWh. RAM Power : 20.0 W
[codecarbon INFO @ 13:18:07] Delta energy consumed for CPU with constant : 0.000118 kWh, power : 42.5 W
[codecarbon INFO @ 13:18:07] Energy consumed for All CPU : 0.000118 kWh
[codecarbon INFO @ 13:18:07] Energy consumed for all GPUs : 0.000193 kWh. Total GPU Power : 69.27360374781422 W
[codecarbon INFO @ 13:18:07] 0.000366 kWh of electricity used since the beginning.
[codecarbon INFO @ 13:18:17] Energy consumed for RAM : 0.000111 kWh. RAM Power : 20.0 W
[codecarbon INFO @ 13:18:17] Delta energy consumed for CPU with constant : 0.000118 kWh, power : 42.5 W
[codecarbon INFO @ 13:18:17] Energy consumed for All CPU : 0.000236 kWh
[codecarbon INFO @ 13:18:17] Energy consumed for all GPUs : 0.000392 kWh. Total GPU Power : 71.93355308880794 W
[codecarbon INFO @ 13:18:17] 0.000739 kWh of electricity used since the beginning.
[codecarbon INFO @ 13:18:27] Energy consumed for RAM : 0.000167 kWh. RAM Power : 2

2025-05-16 13:20:54 Epoch  1/5 done. Loss: Train 2.061, Test 2.082; and Acc: Train 0.132, Test 0.114
2025-05-16 13:20:54 Epoch 2/5


[codecarbon INFO @ 13:20:57] Energy consumed for RAM : 0.001000 kWh. RAM Power : 20.0 W
[codecarbon INFO @ 13:20:57] Delta energy consumed for CPU with constant : 0.000118 kWh, power : 42.5 W
[codecarbon INFO @ 13:20:57] Energy consumed for All CPU : 0.002124 kWh
[codecarbon INFO @ 13:20:57] Energy consumed for all GPUs : 0.003585 kWh. Total GPU Power : 71.94096555930652 W
[codecarbon INFO @ 13:20:57] 0.006709 kWh of electricity used since the beginning.
[codecarbon INFO @ 13:21:07] Energy consumed for RAM : 0.001055 kWh. RAM Power : 20.0 W
[codecarbon INFO @ 13:21:07] Delta energy consumed for CPU with constant : 0.000118 kWh, power : 42.5 W
[codecarbon INFO @ 13:21:07] Energy consumed for All CPU : 0.002242 kWh
[codecarbon INFO @ 13:21:07] Energy consumed for all GPUs : 0.003785 kWh. Total GPU Power : 71.86250673357338 W
[codecarbon INFO @ 13:21:07] 0.007082 kWh of electricity used since the beginning.
[codecarbon INFO @ 13:21:17] Energy consumed for RAM : 0.001111 kWh. RAM Power : 2

2025-05-16 13:23:51 Epoch  2/5 done. Loss: Train 1.988, Test 2.022; and Acc: Train 0.268, Test 0.251
2025-05-16 13:23:51 Epoch 3/5


[codecarbon INFO @ 13:23:57] Energy consumed for RAM : 0.001999 kWh. RAM Power : 20.0 W
[codecarbon INFO @ 13:23:57] Delta energy consumed for CPU with constant : 0.000118 kWh, power : 42.5 W
[codecarbon INFO @ 13:23:57] Energy consumed for All CPU : 0.004248 kWh
[codecarbon INFO @ 13:23:57] Energy consumed for all GPUs : 0.007178 kWh. Total GPU Power : 71.81265447200411 W
[codecarbon INFO @ 13:23:57] 0.013426 kWh of electricity used since the beginning.
[codecarbon INFO @ 13:24:07] Energy consumed for RAM : 0.002054 kWh. RAM Power : 20.0 W
[codecarbon INFO @ 13:24:07] Delta energy consumed for CPU with constant : 0.000118 kWh, power : 42.5 W
[codecarbon INFO @ 13:24:07] Energy consumed for All CPU : 0.004366 kWh
[codecarbon INFO @ 13:24:07] Energy consumed for all GPUs : 0.007378 kWh. Total GPU Power : 71.83676625797332 W
[codecarbon INFO @ 13:24:07] 0.013799 kWh of electricity used since the beginning.
[codecarbon INFO @ 13:24:17] Energy consumed for RAM : 0.002110 kWh. RAM Power : 2

2025-05-16 13:26:47 Epoch  3/5 done. Loss: Train 1.966, Test 2.007; and Acc: Train 0.269, Test 0.252
2025-05-16 13:26:47 Epoch 4/5


[codecarbon INFO @ 13:26:57] Energy consumed for RAM : 0.002998 kWh. RAM Power : 20.0 W
[codecarbon INFO @ 13:26:57] Delta energy consumed for CPU with constant : 0.000118 kWh, power : 42.5 W
[codecarbon INFO @ 13:26:57] Energy consumed for All CPU : 0.006372 kWh
[codecarbon INFO @ 13:26:57] Energy consumed for all GPUs : 0.010771 kWh. Total GPU Power : 71.92320152628398 W
[codecarbon INFO @ 13:26:57] 0.020141 kWh of electricity used since the beginning.
[codecarbon INFO @ 13:27:07] Energy consumed for RAM : 0.003054 kWh. RAM Power : 20.0 W
[codecarbon INFO @ 13:27:07] Delta energy consumed for CPU with constant : 0.000118 kWh, power : 42.5 W
[codecarbon INFO @ 13:27:07] Energy consumed for All CPU : 0.006490 kWh
[codecarbon INFO @ 13:27:07] Energy consumed for all GPUs : 0.010972 kWh. Total GPU Power : 72.48072689938952 W
[codecarbon INFO @ 13:27:07] 0.020516 kWh of electricity used since the beginning.
[codecarbon INFO @ 13:27:17] Energy consumed for RAM : 0.003109 kWh. RAM Power : 2

2025-05-16 13:29:43 Epoch  4/5 done. Loss: Train 1.962, Test 2.008; and Acc: Train 0.269, Test 0.252
2025-05-16 13:29:43 Epoch 5/5


[codecarbon INFO @ 13:29:47] Energy consumed for RAM : 0.003942 kWh. RAM Power : 20.0 W
[codecarbon INFO @ 13:29:47] Delta energy consumed for CPU with constant : 0.000118 kWh, power : 42.5 W
[codecarbon INFO @ 13:29:47] Energy consumed for All CPU : 0.008378 kWh
[codecarbon INFO @ 13:29:47] Energy consumed for all GPUs : 0.014165 kWh. Total GPU Power : 71.87463944273982 W
[codecarbon INFO @ 13:29:47] 0.026486 kWh of electricity used since the beginning.
[codecarbon INFO @ 13:29:57] Energy consumed for RAM : 0.003998 kWh. RAM Power : 20.0 W
[codecarbon INFO @ 13:29:57] Delta energy consumed for CPU with constant : 0.000118 kWh, power : 42.5 W
[codecarbon INFO @ 13:29:57] Energy consumed for All CPU : 0.008496 kWh
[codecarbon INFO @ 13:29:57] Energy consumed for all GPUs : 0.014364 kWh. Total GPU Power : 71.82402240888322 W
[codecarbon INFO @ 13:29:57] 0.026859 kWh of electricity used since the beginning.
[codecarbon INFO @ 13:29:57] 0.017572 g.CO2eq/s mean an estimation of 554.14182378

2025-05-16 13:32:40 Epoch  5/5 done. Loss: Train 1.960, Test 2.009; and Acc: Train 0.269, Test 0.252
Training complete.
Results for deberta-v3-base-freeze: {'model_name': 'DeBERTa v3 base with 8 frozen layers', 'batch_size': 32, 'epochs': 5, 'location': 'SGP', 'energy_consumed_kwh': 0.032937857461862084, 'carbon_emissions_kgco2': 0.015506583349467818, 'cpu_energy_kwh': 0.01041841919782986, 'gpu_energy_kwh': 0.01761721409376, 'ram_energy_kwh': 0.004902224170272229, 'hardware': [RAM(), CPU(Intel(R) Xeon(R) CPU @ 2.20GHz > 85W [generic]), GPU() (NVIDIA L4)], 'training_time': 884.3596179485321, 'accuracy': 0.25184577522559476, 'f1_score': 0.10133244167006238}
Results for deberta-v3-base-freeze appended to /content/drive/MyDrive/Colab Notebooks/FrugalAI/results/train_results_20250516131155/running_results.csv
All results saved to /content/drive/MyDrive/Colab Notebooks/FrugalAI/results/train_results_20250516131155/20250516131155_final_results.csv


### Saving models and results

In [None]:
import pickle
import os
from datetime import datetime

def pickle_models(models, results_list):
  """Pickles trained models and saves results.

  Args:
    models: A dictionary of trained models.
    results_list: A list of dictionaries containing model results.
  """
  for model_name, trained_model in models.items():
    filename = os.path.join(output_dir, f"{model_name}.pkl")
    try:
      with open(filename, "wb") as file:
        pickle.dump(trained_model, file)
      print(f"Model '{model_name}' pickled successfully to '{filename}'")
    except Exception as e:
      print(f"Error pickling model '{model_name}': {e}")

  # Save results to a file
  results_filename = os.path.join(output_dir, "results.pkl")
  try:
    with open(results_filename, "wb") as file:
      pickle.dump(results_list, file)
    print(f"Results pickled successfully to '{results_filename}'")
  except Exception as e:
      print(f"Error pickling results: {e}")


def load_pickled_models(directory):
    """Loads pickled models from a directory.

    Args:
        directory: The directory containing the pickled models.

    Returns:
        A dictionary of loaded models.
    """
    loaded_models = {}
    for filename in os.listdir(directory):
        if filename.endswith(".pkl") and filename != "results.pkl":
            filepath = os.path.join(directory, filename)
            try:
                with open(filepath, "rb") as file:
                    model_name = filename[:-4]  # Remove the .pkl extension
                    loaded_models[model_name] = pickle.load(file)
                    print(f"Model '{model_name}' loaded successfully from '{filepath}'")
            except Exception as e:
                print(f"Error loading model from '{filepath}': {e}")
    return loaded_models

In [None]:
# Create a directory to store the pickled models and results if it doesn't exist
timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
output_dir = f"/content/drive/MyDrive/Colab Notebooks/FrugalAI/saved_models/pickled_models_{timestamp}"
os.makedirs(output_dir, exist_ok=True)

pickle_models(trained_models, results_list)

Model 'roberta-base-freeze' pickled successfully to '/content/drive/MyDrive/Colab Notebooks/FrugalAI/saved_models/pickled_models_20250516135021/roberta-base-freeze.pkl'
Model 'deberta-v3-base-freeze' pickled successfully to '/content/drive/MyDrive/Colab Notebooks/FrugalAI/saved_models/pickled_models_20250516135021/deberta-v3-base-freeze.pkl'
Error pickling results: cannot pickle '_thread.RLock' object


## Measure inference consumption

In [None]:
models_directory = "/content/drive/MyDrive/Colab Notebooks/FrugalAI/saved_models/pickled_models_20250429025429" #  Replace with the actual directory

loaded_models = load_pickled_models(models_directory)


Error loading model from '/content/drive/MyDrive/Colab Notebooks/FrugalAI/saved_models/pickled_models_20250429025429/bert-base-uncased-freeze-10.pkl': Can't get attribute 'BertFreezeClassifier' on <module '__main__'>
Error loading model from '/content/drive/MyDrive/Colab Notebooks/FrugalAI/saved_models/pickled_models_20250429025429/bert-base-uncased-freeze-8.pkl': Can't get attribute 'BertFreezeClassifier' on <module '__main__'>


In [None]:
loaded_models = trained_models

# Assign the trained model to the model_config
for name, model in loaded_models.items():
    models[name]['model'] = model

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def measure_inference_carbon(model_config, device, dataset, n_samples=1000):
    model = model_config['model']

    model.to(device)

    tokenizer = AutoTokenizer.from_pretrained(model_config['tokenizer'])
    embedding_model = AutoModel.from_pretrained(model_config['tokenizer']).to(device)

    tracker = EmissionsTracker(project_name="FrugalAI_inference_carbon", measure_power_secs=10)
    tracker.start()

    inf_dataset = TextDataset(dataset['test']['quote'][:n_samples], dataset['test']['label'][:n_samples], tokenizer)
    inf_dataloader = DataLoader(inf_dataset, batch_size=32)


    with torch.no_grad():
        for batch in inf_dataloader:
            input_ids = batch["input_ids"].to(device)
            attention_mask = batch["attention_mask"].to(device)

            if isinstance(model, ConspiracyClassifier768):
                embeddings = embedding_model(input_ids=input_ids, attention_mask=attention_mask).pooler_output
                outputs = model(embeddings)
            else:
                outputs = model(input_ids=input_ids, attention_mask=attention_mask)

            predictions = torch.argmax(outputs, dim=1).cpu().numpy()
            # print(predictions)

    emissions = tracker.stop()

    results = {
        "model_name": model_config['name'],
        "inf_energy_consumed_kwh": tracker._total_energy.kWh,
        "inf_carbon_emissions_kgco2": emissions,
        'inf_cpu_energy_kwh': tracker._total_cpu_energy.kWh,
        'inf_gpu_energy_kwh': tracker._total_gpu_energy.kWh,
        'inf_ram_energy_kwh': tracker._total_ram_energy.kWh,
        'inf_hardware': tracker._hardware,
        'inf_location': tracker._geo.country_iso_code,
    }
    return results


In [None]:
dataset = load_dataset("quotaclimat/frugalaichallenge-text-train")

inf_energy_results_list = []
for model_config in models.values():
    if not 'tfidf' in model_config['name'].lower():
        # print(model_config)
        inf_energy_results = measure_inference_carbon(model_config, device, dataset, n_samples=1000)
        print(inf_energy_results)
        inf_energy_results_list.append(inf_energy_results)

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
[codecarbon INFO @ 13:50:45] [setup] RAM Tracking...
[codecarbon INFO @ 13:50:45] [setup] CPU Tracking...
 Linux OS detected: Please ensure RAPL files exist at /sys/class/powercap/intel-rapl/subsystem to measure CPU

[codecarbon INFO @ 13:50:46] CPU Model on constant consumption mode: Intel(R) Xeon(R) CPU @ 2.20GHz
[codecarbon INFO @ 13:50:46] [setup] GPU Tracking...
[codecarbon INFO @ 13:50:46] Tracking Nvidia GPU via pynvml
[codecarbon INFO @ 13:50:46] The below tracking methods have been set up:
                RAM Tracking Method: RAM power estimation model
                CPU Tracking Method: global constant
                GPU Tracking Method: pynvml
            
[codecarbon INFO @ 13:50:46] >>> Tracker's meta

{'model_name': 'RoBERTa Base with 8 frozen layers', 'inf_energy_consumed_kwh': 0.0002118903419699449, 'inf_carbon_emissions_kgco2': 9.975437086363657e-05, 'inf_cpu_energy_kwh': 6.742011543194275e-05, 'inf_gpu_energy_kwh': 0.00011274897908800097, 'inf_ram_energy_kwh': 3.17212474500012e-05, 'inf_hardware': [RAM(), CPU(Intel(R) Xeon(R) CPU @ 2.20GHz > 85W [generic]), GPU() (NVIDIA L4)], 'inf_location': 'SGP'}


[codecarbon INFO @ 13:50:55] [setup] RAM Tracking...
[codecarbon INFO @ 13:50:55] [setup] CPU Tracking...
 Linux OS detected: Please ensure RAPL files exist at /sys/class/powercap/intel-rapl/subsystem to measure CPU

[codecarbon INFO @ 13:50:56] CPU Model on constant consumption mode: Intel(R) Xeon(R) CPU @ 2.20GHz
[codecarbon INFO @ 13:50:56] [setup] GPU Tracking...
[codecarbon INFO @ 13:50:56] Tracking Nvidia GPU via pynvml
[codecarbon INFO @ 13:50:56] The below tracking methods have been set up:
                RAM Tracking Method: RAM power estimation model
                CPU Tracking Method: global constant
                GPU Tracking Method: pynvml
            
[codecarbon INFO @ 13:50:56] >>> Tracker's metadata:
[codecarbon INFO @ 13:50:56]   Platform system: Linux-6.1.123+-x86_64-with-glibc2.35
[codecarbon INFO @ 13:50:56]   Python version: 3.11.12
[codecarbon INFO @ 13:50:56]   CodeCarbon version: 3.0.1
[codecarbon INFO @ 13:50:56]   Available RAM : 52.960 GB
[codecarbon INF

{'model_name': 'DeBERTa v3 base with 8 frozen layers', 'inf_energy_consumed_kwh': 0.0003832693277842747, 'inf_carbon_emissions_kgco2': 0.0001804366839422642, 'inf_cpu_energy_kwh': 0.00012218553378471692, 'inf_gpu_energy_kwh': 0.00020359682954400327, 'inf_ram_energy_kwh': 5.74869644555545e-05, 'inf_hardware': [RAM(), CPU(Intel(R) Xeon(R) CPU @ 2.20GHz > 85W [generic]), GPU() (NVIDIA L4)], 'inf_location': 'SGP'}


In [None]:
# prompt: Evaluate the carbon intensity of the tfidf xgb model and produce the results in the same structureas measure_inference_carbon

def measure_inference_carbon_tfidf(model_config, dataset, n_samples=200):
    model = model_config['model']
    tracker = EmissionsTracker(project_name="FrugalAI_inference_carbon", measure_power_secs=10)
    tracker.start()

    predictions = model.predict(dataset['test']['quote'][:n_samples])
    emissions = tracker.stop()

    results = {
        "model_name": model_config['name'],
        "inf_energy_consumed_kwh": tracker._total_energy.kWh,
        "inf_carbon_emissions_kgco2": emissions,
        'inf_cpu_energy_kwh': tracker._total_cpu_energy.kWh,
        'inf_gpu_energy_kwh': tracker._total_gpu_energy.kWh,
        'inf_ram_energy_kwh': tracker._total_ram_energy.kWh,
        'inf_hardware': tracker._hardware,
        'inf_location': tracker._geo.country_iso_code,
    }
    return results

inf_energy_results_tfidf = measure_inference_carbon_tfidf(models['tfidf-xgboost'], dataset, n_samples=1000)
print(inf_energy_results_tfidf)
inf_energy_results_list.append(inf_energy_results_tfidf)


[codecarbon INFO @ 20:54:32] [setup] RAM Tracking...
[codecarbon INFO @ 20:54:32] [setup] CPU Tracking...
 Linux OS detected: Please ensure RAPL files exist at /sys/class/powercap/intel-rapl/subsystem to measure CPU

[codecarbon INFO @ 20:54:33] CPU Model on constant consumption mode: Intel(R) Xeon(R) CPU @ 2.20GHz
[codecarbon INFO @ 20:54:33] [setup] GPU Tracking...
[codecarbon INFO @ 20:54:33] Tracking Nvidia GPU via pynvml
[codecarbon INFO @ 20:54:33] The below tracking methods have been set up:
                RAM Tracking Method: RAM power estimation model
                CPU Tracking Method: global constant
                GPU Tracking Method: pynvml
            
[codecarbon INFO @ 20:54:33] >>> Tracker's metadata:
[codecarbon INFO @ 20:54:33]   Platform system: Linux-6.1.123+-x86_64-with-glibc2.35
[codecarbon INFO @ 20:54:33]   Python version: 3.11.12
[codecarbon INFO @ 20:54:33]   CodeCarbon version: 3.0.1
[codecarbon INFO @ 20:54:33]   Available RAM : 52.960 GB
[codecarbon INF

{'model_name': 'TFIDF + XGBoost', 'inf_energy_consumed_kwh': 2.721015781491085e-06, 'inf_carbon_emissions_kgco2': 1.2810079726577175e-06, 'inf_cpu_energy_kwh': 1.2034305430650724e-06, 'inf_gpu_energy_kwh': 9.57778543975163e-07, 'inf_ram_energy_kwh': 5.598066944508497e-07, 'inf_hardware': [RAM(), CPU(Intel(R) Xeon(R) CPU @ 2.20GHz > 85W [generic]), GPU() (NVIDIA L4)], 'inf_location': 'SGP'}


In [None]:
inf_energy_df = pd.DataFrame(inf_energy_results_list)
train_energy_df = pd.DataFrame(results_list)

#.to_csv("/content/drive/MyDrive/Colab Notebooks/FrugalAI/0605_freeze_inference_results.csv")
# .to_csv("/content/drive/MyDrive/Colab Notebooks/FrugalAI/0506_freeze_results.csv")

In [None]:
merged_df = pd.merge(train_energy_df, inf_energy_df, on='model_name')
merged_df

Unnamed: 0,model_name,batch_size,epochs,location,energy_consumed_kwh,carbon_emissions_kgco2,cpu_energy_kwh,gpu_energy_kwh,ram_energy_kwh,hardware,training_time,accuracy,f1_score,inf_energy_consumed_kwh,inf_carbon_emissions_kgco2,inf_cpu_energy_kwh,inf_gpu_energy_kwh,inf_ram_energy_kwh,inf_hardware,inf_location
0,RoBERTa Base with 8 frozen layers,32,5,SGP,0.012444,0.005858,0.003936,0.006656,0.001852,"[RAM(), CPU(Intel(R) Xeon(R) CPU @ 2.20GHz > 8...",335.021569,0.251846,0.101332,0.000212,0.0001,6.7e-05,0.000113,3.2e-05,"[RAM(), CPU(Intel(R) Xeon(R) CPU @ 2.20GHz > 8...",SGP
1,DeBERTa v3 base with 8 frozen layers,32,5,SGP,0.032938,0.015507,0.010418,0.017617,0.004902,"[RAM(), CPU(Intel(R) Xeon(R) CPU @ 2.20GHz > 8...",884.359618,0.251846,0.101332,0.000383,0.00018,0.000122,0.000204,5.7e-05,"[RAM(), CPU(Intel(R) Xeon(R) CPU @ 2.20GHz > 8...",SGP


In [None]:
merged_df = pd.merge(train_energy_df, inf_energy_df, on='model_name')

# Specify the desired file path
file_path = "/content/drive/MyDrive/Colab Notebooks/FrugalAI/results/0516_merged_results.csv"

# Save the DataFrame to a CSV file
merged_df.to_csv(file_path, index=False)  # Set index=False to avoid saving row indices
print(f"DataFrame saved to {file_path}")


DataFrame saved to /content/drive/MyDrive/Colab Notebooks/FrugalAI/results/0516_merged_results.csv
