In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from sklearn.model_selection import train_test_split
from datasets import Dataset

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

data = pd.read_csv('/kaggle/input/kaggle-llm-science-exam/train.csv', index_col='id')
features = ['prompt', 'A', 'B', 'C', 'D', 'E']
y = data['answer']
X = data[features]

train_dataset = Dataset.from_pandas(data)
train_X, val_X, train_y, val_y = train_test_split(X, y, random_state=1)

In [None]:
from transformers import AutoTokenizer
model_dir = 'bert-base-uncased'
tokenizer = AutoTokenizer.from_pretrained(model_dir)

In [None]:
options = 'ABCDE'
indices = list(range(5))

option_index = {option: index for option, index in zip(options, indices)}
index_option = {index: option for option, index in zip(options, indices)}

def preprocess(examples):
    first_sentences = [examples['prompt']] * 5
    second_sentences = []
    for option in options:
        second_sentences.append(examples[option])
    tokenized_examples = tokenizer(first_sentences, second_sentences, truncation=True)
    tokenized_examples['label'] = option_index[examples['answer']]
    return tokenized_examples

tokenized_train_ds = train_dataset.map(preprocess, batched=False, remove_columns=['prompt', 'A', 'B', 'C', 'D', 'E', 'answer'])

In [None]:
[tokenizer.decode(tokenized_train_ds[0]["input_ids"][i]) for i in range(5)]

In [None]:
# Following datacollator (adapted from https://huggingface.co/docs/transformers/tasks/multiple_choice)
# will dynamically pad our questions at batch-time so we don't have to make every question the length
# of our longest question.
from dataclasses import dataclass
from transformers.tokenization_utils_base import PreTrainedTokenizerBase, PaddingStrategy
from typing import Optional, Union
import torch

@dataclass
class DataCollatorForMultipleChoice:
    tokenizer: PreTrainedTokenizerBase
    padding: Union[bool, str, PaddingStrategy] = True
    max_length: Optional[int] = None
    pad_to_multiple_of: Optional[int] = None
    
    def __call__(self, features):
        label_name = "label" if 'label' in features[0].keys() else 'labels'
        labels = [feature.pop(label_name) for feature in features]
        batch_size = len(features)
        num_choices = len(features[0]['input_ids'])
        flattened_features = [
            [{k: v[i] for k, v in feature.items()} for i in range(num_choices)] for feature in features
        ]
        flattened_features = sum(flattened_features, [])
        
        batch = self.tokenizer.pad(
            flattened_features,
            padding=self.padding,
            max_length=self.max_length,
            pad_to_multiple_of=self.pad_to_multiple_of,
            return_tensors='pt',
        )
        batch = {k: v.view(batch_size, num_choices, -1) for k, v in batch.items()}
        batch['labels'] = torch.tensor(labels, dtype=torch.int64)
        return batch

In [None]:
from transformers import AutoModelForMultipleChoice, TrainingArguments, Trainer
model = AutoModelForMultipleChoice.from_pretrained(model_dir)

In [None]:
from transformers import TrainerCallback

class BestModelTracker(TrainerCallback):
    def __init__(self):
        self.best_loss = float('inf')
        self.best_epoch = 0
        self.best_model_path = None

    def on_evaluate(self, args, state, control, metrics=None, **kwargs):
        # Check if the current eval loss is lower than the best loss
        if metrics and metrics.get("eval_loss", float('inf')) < self.best_loss:
            self.best_loss = metrics["eval_loss"]
            self.best_epoch = state.epoch
            self.best_model_path = f'{args.output_dir}/checkpoint-{state.global_step}'
            print(f"New best model found at epoch {self.best_epoch} with eval loss: {self.best_loss}")

In [None]:
best_model_tracker = BestModelTracker()

In [None]:
model_dir = 'ft_bert-base'
training_args = TrainingArguments(
    output_dir=model_dir,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    learning_rate=5e-5,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    num_train_epochs=15,
    weight_decay=0.01,
    report_to='none',
    logging_steps=50,
)

In [None]:
!pip install evaluate

In [None]:
import evaluate

accuracy = evaluate.load("accuracy")

from sklearn.metrics import accuracy_score, f1_score

def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    f1 = f1_score(labels, preds, average="weighted")
    acc = accuracy_score(labels, preds)
    return {"accuracy": acc, "f1": f1}

In [None]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train_ds,
    eval_dataset=tokenized_train_ds,
    tokenizer=tokenizer,
    data_collator=DataCollatorForMultipleChoice(tokenizer=tokenizer),
    compute_metrics=compute_metrics,
    callbacks=[best_model_tracker]
)

In [None]:
!pip install psutil GPUtil

In [None]:
import threading
import psutil
import GPUtil
import time
import matplotlib.pyplot as plt

# Function to monitor CPU and GPU usage
def monitor_resources(stop_event, interval=1):
    cpu_usage = []
    gpu_usage = []
    timestamps = []

    while not stop_event.is_set():
        timestamps.append(time.time())
        cpu_usage.append(psutil.cpu_percent(interval=None))

        gpus = GPUtil.getGPUs()
        gpu_usage.append(gpus[0].load * 100 if gpus else 0)  # Assumes one GPU

        time.sleep(interval)

    plt.figure(figsize=(10, 6))
    plt.plot(timestamps, cpu_usage, label='CPU Usage (%)')
    plt.plot(timestamps, gpu_usage, label='GPU Usage (%)')
    plt.xlabel('Time (s)')
    plt.ylabel('Usage (%)')
    plt.title('CPU and GPU Usage Over Time')
    plt.legend()
    plt.grid(True)
    plt.show()
    
    print(timestamps[-1] - timestamps[0])
    print(max(cpu_usage))
    print(max(gpu_usage))

# Monitoring setup
stop_event = threading.Event()
monitor_thread = threading.Thread(target=monitor_resources, args=(stop_event,))

# Start monitoring
monitor_thread.start()

# Start training
trainer.train()

# Stop monitoring
stop_event.set()
monitor_thread.join()

# At this point, the monitoring thread has completed
print("Training and monitoring completed.")

In [None]:
predictions = trainer.predict(tokenized_train_ds)

In [None]:
import numpy as np
def predictions_to_map_output(predictions):
    sorted_answer_indices = np.argsort(-predictions)
    top_answer_indices = sorted_answer_indices[:,:3] # Get the first three answers in each row
    top_answers = np.vectorize(index_option.get)(top_answer_indices)
    return np.apply_along_axis(lambda row: ' '.join(row), 1, top_answers)

In [None]:
def map_at_3(predictions, true_answers):
    # Convert predictions to top 3 answers
    top_3_predictions = predictions_to_map_output(predictions.predictions)

    # Calculate average precision for each instance
    average_precisions = []
    for i in range(len(true_answers)):
        true_answer = true_answers[i]
        true_answer = options[true_answer]
        predicted_answers = top_3_predictions[i].split(" ")

        if true_answer in predicted_answers:
            index_of_true_answer = predicted_answers.index(true_answer)
            precision_at_index = 1 / (index_of_true_answer + 1)
            average_precisions.append(precision_at_index)
        else:
            average_precisions.append(0)

    # Calculate mean average precision at 3
    map_3 = np.mean(average_precisions)
    return map_3

true_answers = tokenized_train_ds['label']
map_3_score = map_at_3(predictions, true_answers)
print(f"MAP@3 Score: {map_3_score}")