In [None]:
import os
import random
import functools
import csv
import pandas as pd
import numpy as np
import torch
import torch.nn.functional as F
import evaluate
import re
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import f1_score, confusion_matrix, classification_report, balanced_accuracy_score, accuracy_score

from scipy.stats import pearsonr
from datasets import Dataset, DatasetDict
from peft import LoraConfig, prepare_model_for_kbit_training, get_peft_model

from transformers import (
    AutoModelForSequenceClassification,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    Trainer,
    DataCollatorWithPadding
)

df1 = pd.read_csv('filtered_labeled.csv')
df2 = pd.read_csv('synthetic_data.csv')
df = pd.concat([df1, df2], ignore_index=True)


def clean_text(text):
    cleaned_text = re.sub(r'(\n[a-zA-Z])', '', text)
    cleaned_text = re.sub(r'\s+', ' ', cleaned_text).strip()
    return cleaned_text

df['text'] = df['text'].apply(clean_text)
df['Institutional_Form'] = df['Institutional_Form'].astype('category')
df['Institutional_Form_category'] = df['Institutional_Form'].cat.codes
category_map = {code: category for code, category in enumerate(df['Institutional_Form'].cat.categories)}

df_train, df_val = train_test_split(df, train_size=0.8, test_size=0.2, random_state=42)


def generate_features_with_prompt(df):
    # Define the instruction
    instruction = (
        "You are a contract classification assistant. Your task is to classify the contract text "
        "into one of the predefined categories. Here are the criteria for each category:\n"
        "- Joint Operations: Partnership arrangements to jointly produce services with one or more organizations.\n"
        "- New Joint Entities: Two or more organizations creating a separate new entity to manage or govern a shared asset or service.\n"
        "- Resource Sharing: Sharing of information, personnel, equipment, etc., between governments or community organizations to provide services.\n"
        "- Service Contracts: Agreements with outside entities, public or private, for provision or support services.\n"
        "Analyze the given text carefully and respond with the appropriate category."
    )
    df['Institutional_Form'] = df['Institutional_Form'].astype(str)
    # Generate the `input` column by combining instruction, text, and category
    df['input'] = (
        instruction
        + "\n\nContract Text: "
        + df['text']
        + "\n\nInstitutional Form Category: "
        + df['Institutional_Form']
    )

    # Add the numerical category column if not already present
    if 'Institutional_Form_category' not in df.columns:
        df['Institutional_Form'] = df['Institutional_Form'].astype('category')
        df['Institutional_Form_category'] = df['Institutional_Form'].cat.codes

    return df

generate_features_with_prompt(df_train)
generate_features_with_prompt(df_val)

dataset_train = Dataset.from_pandas(df_train.reset_index(drop=True))
dataset_val = Dataset.from_pandas(df_val.reset_index(drop=True))

dataset = DatasetDict({
    'train': dataset_train,
    'val': dataset_val,
})

#Since our classes are not balanced let's calculate class weights based on inverse value counts
#Convert to pytorch tensor since we will need it
df_train.Institutional_Form_category.value_counts(normalize=True)
class_weights=(1/df_train.Institutional_Form.value_counts(normalize=True).sort_index()).tolist()
class_weights=torch.tensor(class_weights)
class_weights=class_weights/class_weights.sum()

model_name = "meta-llama/Llama-3.1-8B"

#Quantization Config (for QLORA)
quantization_config = BitsAndBytesConfig(
    load_in_4bit = True, # enable 4-bit quantization
    bnb_4bit_quant_type = 'nf4', # information theoretically optimal dtype for normally distributed weights
    bnb_4bit_use_double_quant = True, # quantize quantized weights //insert xzibit meme
    bnb_4bit_compute_dtype = torch.bfloat16 # optimized fp format for ML
)

#
lora_config = LoraConfig(
    r = 16, # the dimension of the low-rank matrices
    lora_alpha = 8, # scaling factor for LoRA activations vs pre-trained weight activations
    target_modules = ['q_proj', 'k_proj', 'v_proj', 'o_proj'],
    lora_dropout = 0.05, # dropout probability of the LoRA layers
    bias = 'none', # wether to train bias weights, set to 'none' for attention layers
    task_type = 'SEQ_CLS'
)

model = AutoModelForSequenceClassification.from_pretrained(
    model_name,
    quantization_config=quantization_config,
    num_labels=len(category_map)
)

#prepare_model_for_kbit_training() function to preprocess the quantized model for training.
model = prepare_model_for_kbit_training(model)
model = get_peft_model(model, lora_config)

tokenizer = AutoTokenizer.from_pretrained(model_name, add_prefix_space=True)
tokenizer.pad_token_id = tokenizer.eos_token_id
tokenizer.pad_token = tokenizer.eos_token

#Must use .cache = False as below or it crashes from my experience
model.config.pad_token_id = tokenizer.pad_token_id
model.config.use_cache = False
model.config.pretraining_tp = 1

def llama_preprocessing_function(examples):
    return tokenizer(examples['input'], truncation=True, max_length=512)

tokenized_datasets = dataset.map(llama_preprocessing_function, batched=True)
tokenized_datasets = tokenized_datasets.rename_column("Institutional_Form_category", "label")
tokenized_datasets.set_format("torch")

#Purpose: Automatically pads text data to the longest sequence in a batch
collate_fn = DataCollatorWithPadding(tokenizer=tokenizer)

def compute_metrics(eval_pred):
    predictions, labels = eval_pred

    try:
        # it's a classification task, take the argmax
        predictions_processed = np.argmax(predictions, axis=1)

        # Calculate Pearson correlation
        pearson, _ = pearsonr(predictions_processed, labels)

        return {'pearson': pearson}
    except Exception as e:
        print(f"Error in compute_metrics: {e}")
        return {'pearson': None}
    
#We will have a custom loss function that deals with the class weights and have class weights as additional argument in constructor
class CustomTrainer(Trainer):
    def __init__(self, *args, class_weights=None, **kwargs):
        super().__init__(*args, **kwargs)
        # Ensure label_weights is a tensor
        if class_weights is not None:
            self.class_weights = torch.tensor(class_weights, dtype=torch.float32).to(self.args.device)
        else:
            self.class_weights = None

    def compute_loss(self, model, inputs, num_items_in_batch=None, return_outputs=False):
        # Extract labels and convert them to long type for cross_entropy
        labels = inputs.pop("labels").long()

        # Forward pass
        outputs = model(**inputs)

        # Extract logits assuming they are directly outputted by the model
        logits = outputs.get('logits')

        # Compute custom loss with class weights for imbalanced data handling
        if self.class_weights is not None:
            loss = F.cross_entropy(logits, labels, weight=self.class_weights)
        else:
            loss = F.cross_entropy(logits, labels)

        return (loss, outputs) if return_outputs else loss
    

def make_predictions(model, df):
    

    sentences = df.input.tolist()

      # Define the batch size
    batch_size = 32  # You can adjust this based on your system's memory capacity

      # Initialize an empty list to store the model outputs
    all_outputs = []

      # Process the sentences in batches
    for i in range(0, len(sentences), batch_size):
          # Get the batch of sentences
        batch_sentences = sentences[i:i + batch_size]

          # Tokenize the batch
        inputs = tokenizer(batch_sentences, return_tensors="pt", padding=True, truncation=True, max_length=512)

          # Move tensors to the device where the model is (e.g., GPU or CPU)
        inputs = {k: v.to('cuda' if torch.cuda.is_available() else 'cpu') for k, v in inputs.items()}

          # Perform inference and store the logits
        with torch.no_grad():
            outputs = model(**inputs)
            all_outputs.append(outputs['logits'])

    final_outputs = torch.cat(all_outputs, dim=0)
    df['predictions']=final_outputs.argmax(axis=1).cpu().numpy()
    #df['predictions']=df['predictions'].apply(lambda l:category_map[l])

    return df


    
def get_performance_metrics(df):
    y_test = df.Institutional_Form_category
    y_pred = df.predictions
    print(f"comparing test {y_test} and pred {y_pred}")

    print("Confusion Matrix:")
    print(confusion_matrix(y_test, y_pred))

    print("\nClassification Report:")
    print(classification_report(y_test, y_pred))

    print("Balanced Accuracy Score:", balanced_accuracy_score(y_test, y_pred))
    print("Accuracy Score:", accuracy_score(y_test, y_pred))
    
# Define hyperparameter grid
learning_rates = [1e-3]
weight_decays = [0.01, 0.1, 0.2]

# Keep track of results
results = []

for lr in learning_rates:
    for wd in weight_decays:
        print(f"\nTraining with learning_rate={lr}, weight_decay={wd}\n")

        # Update training arguments
        training_args = TrainingArguments(
            output_dir=f'sequence_classification_lr{lr}_wd{wd}',
            learning_rate=lr,
            per_device_train_batch_size=16,
            per_device_eval_batch_size=16,
            num_train_epochs=3,
            weight_decay=wd,
            evaluation_strategy='epoch',
            save_strategy='epoch',
            load_best_model_at_end=True,
            logging_dir=f'logs_lr{lr}_wd{wd}',
            logging_steps=10
        )

        # Initialize trainer
        trainer = CustomTrainer(
            model=model,
            args=training_args,
            train_dataset=tokenized_datasets['train'],
            eval_dataset=tokenized_datasets['val'],
            tokenizer=tokenizer,
            data_collator=collate_fn,
            compute_metrics=compute_metrics,
            class_weights=class_weights,
        )

        # Train the model
        train_result = trainer.train()

        # Record training loss and evaluation metrics
        eval_metrics = trainer.evaluate()
        train_loss = train_result.training_loss

        # Make predictions and compute performance metrics
        df_val = make_predictions(model, df_val)
        performance_metrics = get_performance_metrics(df_val)

        # Append the metrics to results
        results.append({
            "learning_rate": lr,
            "weight_decay": wd,
            "train_loss": train_loss,
            "eval_metrics": eval_metrics,
            "performance_metrics": performance_metrics
        })

        # Print results for the current model
        print(f"Results for learning_rate={lr}, weight_decay={wd}")
        print(f"Train Loss: {train_loss}")
        print(f"Evaluation Metrics: {eval_metrics}")
        print(f"Performance Metrics: {performance_metrics}")

# Summarize all results
print("\nSummary of all results:")
for res in results:
    print(res)


  from .autonotebook import tqdm as notebook_tqdm
`low_cpu_mem_usage` was None, now default to True since model is quantized.
Loading checkpoint shards: 100%|██████████| 4/4 [00:20<00:00,  5.18s/it]
Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.1-8B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 1592/1592 [00:02<00:00, 698.60 examples/s]
Map: 100%|██████████| 398/398 [00:00<00:00, 758.55 examples/s]
  super().__init__(*args, **kwargs)
Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.



Training with learning_rate=0.001, weight_decay=0.01



  self.class_weights = torch.tensor(class_weights, dtype=torch.float32).to(self.args.device)
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mmghasemizade97[0m ([33mmghasemizade97-university-of-vermont[0m). Use [1m`wandb login --relogin`[0m to force relogin


  return fn(*args, **kwargs)


Epoch,Training Loss,Validation Loss,Pearson
1,1.8013,1.192398,0.3194
2,0.5021,0.720931,0.489979
3,0.1197,0.663513,0.61644


  return fn(*args, **kwargs)
  return fn(*args, **kwargs)


  super().__init__(*args, **kwargs)
Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


comparing test 887     2
1670    3
414     1
1080    0
1102    2
       ..
1753    0
907     3
261     0
1403    3
1543    1
Name: Institutional_Form_category, Length: 398, dtype: int8 and pred 887     2
1670    3
414     3
1080    0
1102    2
       ..
1753    0
907     3
261     0
1403    0
1543    1
Name: predictions, Length: 398, dtype: int64
Confusion Matrix:
[[ 61   1   6  19]
 [  3  55   1   3]
 [  2   1  51  10]
 [ 21   7   4 153]]

Classification Report:
              precision    recall  f1-score   support

           0       0.70      0.70      0.70        87
           1       0.86      0.89      0.87        62
           2       0.82      0.80      0.81        64
           3       0.83      0.83      0.83       185

    accuracy                           0.80       398
   macro avg       0.80      0.80      0.80       398
weighted avg       0.80      0.80      0.80       398

Balanced Accuracy Score: 0.8030370566269829
Accuracy Score: 0.8040201005025126
Results for learni

  self.class_weights = torch.tensor(class_weights, dtype=torch.float32).to(self.args.device)
  return fn(*args, **kwargs)


Epoch,Training Loss,Validation Loss,Pearson
1,0.3964,0.903241,0.540523
2,0.0933,0.777591,0.613224
3,0.0299,0.726971,0.600256


  return fn(*args, **kwargs)
  return fn(*args, **kwargs)


  super().__init__(*args, **kwargs)
Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


comparing test 887     2
1670    3
414     1
1080    0
1102    2
       ..
1753    0
907     3
261     0
1403    3
1543    1
Name: Institutional_Form_category, Length: 398, dtype: int8 and pred 887     2
1670    3
414     3
1080    0
1102    2
       ..
1753    0
907     3
261     0
1403    3
1543    1
Name: predictions, Length: 398, dtype: int64
Confusion Matrix:
[[ 58   1   5  23]
 [  2  56   1   3]
 [  1   0  57   6]
 [ 21   4   6 154]]

Classification Report:
              precision    recall  f1-score   support

           0       0.71      0.67      0.69        87
           1       0.92      0.90      0.91        62
           2       0.83      0.89      0.86        64
           3       0.83      0.83      0.83       185

    accuracy                           0.82       398
   macro avg       0.82      0.82      0.82       398
weighted avg       0.82      0.82      0.82       398

Balanced Accuracy Score: 0.823237476387678
Accuracy Score: 0.8165829145728644
Results for learnin

  self.class_weights = torch.tensor(class_weights, dtype=torch.float32).to(self.args.device)
  return fn(*args, **kwargs)


Epoch,Training Loss,Validation Loss


In [3]:
import os
import random
import functools
import csv
import pandas as pd
import numpy as np
import torch
import torch.nn.functional as F
import evaluate
import re
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import f1_score, confusion_matrix, classification_report, balanced_accuracy_score, accuracy_score

from scipy.stats import pearsonr
from datasets import Dataset, DatasetDict
from peft import LoraConfig, prepare_model_for_kbit_training, get_peft_model

from transformers import (
    AutoModelForSequenceClassification,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    Trainer,
    DataCollatorWithPadding
)

df1 = pd.read_csv('filtered_labeled.csv')
df2 = pd.read_csv('synthetic_data.csv')
df = pd.concat([df1, df2], ignore_index=True)


def clean_text(text):
    cleaned_text = re.sub(r'(\n[a-zA-Z])', '', text)
    cleaned_text = re.sub(r'\s+', ' ', cleaned_text).strip()
    return cleaned_text

df['text'] = df['text'].apply(clean_text)
df['Institutional_Form'] = df['Institutional_Form'].astype('category')
df['Institutional_Form_category'] = df['Institutional_Form'].cat.codes
category_map = {code: category for code, category in enumerate(df['Institutional_Form'].cat.categories)}

df_train, df_val = train_test_split(df, train_size=0.8, test_size=0.2, random_state=42)


def generate_features_with_prompt(df):
    # Define the instruction
    instruction = (
        "You are a contract classification assistant. Your task is to classify the contract text "
        "into one of the predefined categories. Here are the criteria for each category:\n"
        "- Joint Operations: Partnership arrangements to jointly produce services with one or more organizations.\n"
        "- New Joint Entities: Two or more organizations creating a separate new entity to manage or govern a shared asset or service.\n"
        "- Resource Sharing: Sharing of information, personnel, equipment, etc., between governments or community organizations to provide services.\n"
        "- Service Contracts: Agreements with outside entities, public or private, for provision or support services.\n"
        "Analyze the given text carefully and respond with the appropriate category."
    )
    df['Institutional_Form'] = df['Institutional_Form'].astype(str)
    # Generate the `input` column by combining instruction, text, and category
    df['input'] = (
        instruction
        + "\n\nContract Text: "
        + df['text']
        + "\n\nInstitutional Form Category: "
        + df['Institutional_Form']
    )

    # Add the numerical category column if not already present
    if 'Institutional_Form_category' not in df.columns:
        df['Institutional_Form'] = df['Institutional_Form'].astype('category')
        df['Institutional_Form_category'] = df['Institutional_Form'].cat.codes

    return df

generate_features_with_prompt(df_train)
generate_features_with_prompt(df_val)

dataset_train = Dataset.from_pandas(df_train.reset_index(drop=True))
dataset_val = Dataset.from_pandas(df_val.reset_index(drop=True))

dataset = DatasetDict({
    'train': dataset_train,
    'val': dataset_val,
})

#Since our classes are not balanced let's calculate class weights based on inverse value counts
#Convert to pytorch tensor since we will need it
df_train.Institutional_Form_category.value_counts(normalize=True)
class_weights=(1/df_train.Institutional_Form.value_counts(normalize=True).sort_index()).tolist()
class_weights=torch.tensor(class_weights)
class_weights=class_weights/class_weights.sum()

model_name = "meta-llama/Llama-3.1-8B"

#Quantization Config (for QLORA)
quantization_config = BitsAndBytesConfig(
    load_in_4bit = True, # enable 4-bit quantization
    bnb_4bit_quant_type = 'nf4', # information theoretically optimal dtype for normally distributed weights
    bnb_4bit_use_double_quant = True, # quantize quantized weights //insert xzibit meme
    bnb_4bit_compute_dtype = torch.bfloat16 # optimized fp format for ML
)

#
lora_config = LoraConfig(
    r = 16, # the dimension of the low-rank matrices
    lora_alpha = 8, # scaling factor for LoRA activations vs pre-trained weight activations
    target_modules = ['q_proj', 'k_proj', 'v_proj', 'o_proj'],
    lora_dropout = 0.05, # dropout probability of the LoRA layers
    bias = 'none', # wether to train bias weights, set to 'none' for attention layers
    task_type = 'SEQ_CLS'
)

model = AutoModelForSequenceClassification.from_pretrained(
    model_name,
    quantization_config=quantization_config,
    num_labels=len(category_map)
)

#prepare_model_for_kbit_training() function to preprocess the quantized model for training.
model = prepare_model_for_kbit_training(model)
model = get_peft_model(model, lora_config)

tokenizer = AutoTokenizer.from_pretrained(model_name, add_prefix_space=True)
tokenizer.pad_token_id = tokenizer.eos_token_id
tokenizer.pad_token = tokenizer.eos_token

#Must use .cache = False as below or it crashes from my experience
model.config.pad_token_id = tokenizer.pad_token_id
model.config.use_cache = False
model.config.pretraining_tp = 1

def llama_preprocessing_function(examples):
    return tokenizer(examples['input'], truncation=True, max_length=512)

tokenized_datasets = dataset.map(llama_preprocessing_function, batched=True)
tokenized_datasets = tokenized_datasets.rename_column("Institutional_Form_category", "label")
tokenized_datasets.set_format("torch")

#Purpose: Automatically pads text data to the longest sequence in a batch
collate_fn = DataCollatorWithPadding(tokenizer=tokenizer)

def compute_metrics(eval_pred):
    predictions, labels = eval_pred

    try:
        # it's a classification task, take the argmax
        predictions_processed = np.argmax(predictions, axis=1)

        # Calculate Pearson correlation
        pearson, _ = pearsonr(predictions_processed, labels)

        return {'pearson': pearson}
    except Exception as e:
        print(f"Error in compute_metrics: {e}")
        return {'pearson': None}
    
#We will have a custom loss function that deals with the class weights and have class weights as additional argument in constructor
class CustomTrainer(Trainer):
    def __init__(self, *args, class_weights=None, **kwargs):
        super().__init__(*args, **kwargs)
        # Ensure label_weights is a tensor
        if class_weights is not None:
            self.class_weights = torch.tensor(class_weights, dtype=torch.float32).to(self.args.device)
        else:
            self.class_weights = None

    def compute_loss(self, model, inputs, num_items_in_batch=None, return_outputs=False):
        # Extract labels and convert them to long type for cross_entropy
        labels = inputs.pop("labels").long()

        # Forward pass
        outputs = model(**inputs)

        # Extract logits assuming they are directly outputted by the model
        logits = outputs.get('logits')

        # Compute custom loss with class weights for imbalanced data handling
        if self.class_weights is not None:
            loss = F.cross_entropy(logits, labels, weight=self.class_weights)
        else:
            loss = F.cross_entropy(logits, labels)

        return (loss, outputs) if return_outputs else loss
    

def make_predictions(model, df):
    

    sentences = df.input.tolist()

      # Define the batch size
    batch_size = 32  # You can adjust this based on your system's memory capacity

      # Initialize an empty list to store the model outputs
    all_outputs = []

      # Process the sentences in batches
    for i in range(0, len(sentences), batch_size):
          # Get the batch of sentences
        batch_sentences = sentences[i:i + batch_size]

          # Tokenize the batch
        inputs = tokenizer(batch_sentences, return_tensors="pt", padding=True, truncation=True, max_length=512)

          # Move tensors to the device where the model is (e.g., GPU or CPU)
        inputs = {k: v.to('cuda' if torch.cuda.is_available() else 'cpu') for k, v in inputs.items()}

          # Perform inference and store the logits
        with torch.no_grad():
            outputs = model(**inputs)
            all_outputs.append(outputs['logits'])

    final_outputs = torch.cat(all_outputs, dim=0)
    df['predictions']=final_outputs.argmax(axis=1).cpu().numpy()
    #df['predictions']=df['predictions'].apply(lambda l:category_map[l])

    return df

def get_performance_metrics(df):
    y_test = df.Institutional_Form_category
    y_pred = df.predictions
    print(f"comparing test {y_test} and pred {y_pred}")

    print("Confusion Matrix:")
    print(confusion_matrix(y_test, y_pred))

    print("\nClassification Report:")
    print(classification_report(y_test, y_pred))

    print("Balanced Accuracy Score:", balanced_accuracy_score(y_test, y_pred))
    print("Accuracy Score:", accuracy_score(y_test, y_pred))
    
def get_performance_metrics(df):
    y_test = df.Institutional_Form_category
    y_pred = df.predictions
    print(f"comparing test {y_test} and pred {y_pred}")

    print("Confusion Matrix:")
    print(confusion_matrix(y_test, y_pred))

    print("\nClassification Report:")
    print(classification_report(y_test, y_pred))

    print("Balanced Accuracy Score:", balanced_accuracy_score(y_test, y_pred))
    print("Accuracy Score:", accuracy_score(y_test, y_pred))
    
# Define hyperparameter grid
learning_rates = [1e-3]
weight_decays = [0.1, 0.2]

# Keep track of results
results = []

for lr in learning_rates:
    for wd in weight_decays:
        print(f"\nTraining with learning_rate={lr}, weight_decay={wd}\n")

        # Update training arguments
        training_args = TrainingArguments(
            output_dir=f'sequence_classification_lr{lr}_wd{wd}',
            learning_rate=lr,
            per_device_train_batch_size=16,
            per_device_eval_batch_size=16,
            num_train_epochs=3,
            weight_decay=wd,
            evaluation_strategy='epoch',
            save_strategy='epoch',
            load_best_model_at_end=True,
            logging_dir=f'logs_lr{lr}_wd{wd}',
            logging_steps=10
        )

        # Initialize trainer
        trainer = CustomTrainer(
            model=model,
            args=training_args,
            train_dataset=tokenized_datasets['train'],
            eval_dataset=tokenized_datasets['val'],
            tokenizer=tokenizer,
            data_collator=collate_fn,
            compute_metrics=compute_metrics,
            class_weights=class_weights,
        )

        # Train the model
        train_result = trainer.train()

        # Record training loss and evaluation metrics
        eval_metrics = trainer.evaluate()
        train_loss = train_result.training_loss

        # Make predictions and compute performance metrics
        df_val = make_predictions(model, df_val)
        performance_metrics = get_performance_metrics(df_val)

        # Append the metrics to results
        results.append({
            "learning_rate": lr,
            "weight_decay": wd,
            "train_loss": train_loss,
            "eval_metrics": eval_metrics,
            "performance_metrics": performance_metrics
        })

        # Print results for the current model
        print(f"Results for learning_rate={lr}, weight_decay={wd}")
        print(f"Train Loss: {train_loss}")
        print(f"Evaluation Metrics: {eval_metrics}")
        print(f"Performance Metrics: {performance_metrics}")

# Summarize all results
print("\nSummary of all results:")
for res in results:
    print(res)


`low_cpu_mem_usage` was None, now default to True since model is quantized.
Loading checkpoint shards: 100%|██████████| 4/4 [00:05<00:00,  1.34s/it]
Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.1-8B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 1592/1592 [00:02<00:00, 749.40 examples/s]
Map: 100%|██████████| 398/398 [00:00<00:00, 792.97 examples/s]
  super().__init__(*args, **kwargs)
Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
  self.class_weights = torch.tensor(class_weights, dtype=torch.float32).to(self.args.device)



Training with learning_rate=0.001, weight_decay=0.1



  return fn(*args, **kwargs)


Epoch,Training Loss,Validation Loss


KeyboardInterrupt: 