<a href="https://colab.research.google.com/github/rimbarbar/LL-LLM-Project/blob/main/LL_LLM_Project_Resub_Optimization.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

4: Optimization

In [None]:
# Imports
!pip install bayesian-optimization
import torch
from transformers import Trainer, TrainingArguments, AutoModelForSequenceClassification
from datasets import load_from_disk
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from bayes_opt import BayesianOptimization



In [None]:
# Load dataset (already subset to 500 train + 500 test)
dataset = load_from_disk("./embeddings_imdb")

In [None]:
# Define metrics
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = torch.argmax(torch.tensor(logits), dim=-1)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, predictions, average='binary')
    acc = accuracy_score(labels, predictions)
    return {"accuracy": acc, "precision": precision, "recall": recall, "f1": f1}

# Optimization function
def optimize_model(learning_rate, batch_size, weight_decay):
    batch_size = int(batch_size)  # Convert to integer
    model = AutoModelForSequenceClassification.from_pretrained("./fine_tuned_bert")

    training_args = TrainingArguments(
        output_dir="./optimized_results",
        evaluation_strategy="epoch",
        save_strategy="epoch",
        per_device_train_batch_size=batch_size,
        per_device_eval_batch_size=batch_size,
        num_train_epochs=3,
        learning_rate=learning_rate,
        weight_decay=weight_decay,
        logging_dir="./optimized_logs",
    )

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=dataset["train"],
        eval_dataset=dataset["test"],
        compute_metrics=compute_metrics
    )

    trainer.train()
    eval_results = trainer.evaluate()
    return eval_results["eval_accuracy"]

In [None]:
# Define parameter bounds
pbounds = {
    "learning_rate": (1e-5, 5e-5),
    "batch_size": (4, 16),
    "weight_decay": (0.001, 0.1)
}

# Perform Bayesian Optimization with fewer iterations for speed
optimizer = BayesianOptimization(f=optimize_model, pbounds=pbounds, random_state=1)
optimizer.maximize(init_points=2, n_iter=2)  # Reduced from 3 to 2 iterations to enhance speed

|   iter    |  target   | batch_... | learni... | weight... |
-------------------------------------------------------------




Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,No log,0.687809,0.51,0.50108,0.943089,0.654443
2,No log,0.668073,0.582,0.544794,0.914634,0.682853
3,No log,0.552562,0.732,0.737288,0.707317,0.721992


| [39m1        [39m | [39m0.732    [39m | [39m9.004    [39m | [39m3.881e-05[39m | [39m0.001011 [39m |




Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,No log,0.68499,0.546,0.524804,0.817073,0.63911
2,No log,0.737421,0.546,0.521445,0.939024,0.670537
3,No log,0.6268,0.658,0.627986,0.747967,0.682746


| [39m2        [39m | [39m0.658    [39m | [39m7.628    [39m | [39m1.587e-05[39m | [39m0.01014  [39m |




Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,No log,0.688287,0.526,1.0,0.036585,0.070588
2,No log,0.75945,0.66,0.596447,0.955285,0.734375
3,No log,0.379062,0.848,0.863248,0.821138,0.841667


| [35m3        [39m | [35m0.848    [39m | [35m8.51     [39m | [35m4.16e-05 [39m | [35m0.07767  [39m |




Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,No log,0.69292,0.51,1.0,0.004065,0.008097
2,No log,0.693135,0.508,0.0,0.0,0.0
3,No log,0.69346,0.492,0.492,1.0,0.659517


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


| [39m4        [39m | [39m0.492    [39m | [39m10.39    [39m | [39m4.15e-05 [39m | [39m0.01832  [39m |


In [None]:
# Train with best parameters
best_params = optimizer.max["params"]
print("Best parameters:", best_params)

model = AutoModelForSequenceClassification.from_pretrained("./fine_tuned_bert")
training_args = TrainingArguments(
    output_dir="./optimized_results",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    per_device_train_batch_size=int(best_params["batch_size"]),
    per_device_eval_batch_size=int(best_params["batch_size"]),
    num_train_epochs=3,
    learning_rate=best_params["learning_rate"],
    weight_decay=best_params["weight_decay"],
    logging_dir="./optimized_logs",
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset["train"],
    eval_dataset=dataset["test"],
    compute_metrics=compute_metrics
)

trainer.train()
results = trainer.evaluate()
print("Final evaluation results:", results)

Best parameters: {'batch_size': np.float64(8.510084569172001), 'learning_rate': np.float64(4.159514140104103e-05), 'weight_decay': np.float64(0.07767024330743347)}




Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,No log,0.688287,0.526,1.0,0.036585,0.070588
2,No log,0.75945,0.66,0.596447,0.955285,0.734375
3,No log,0.379062,0.848,0.863248,0.821138,0.841667


Final evaluation results: {'eval_loss': 0.37906205654144287, 'eval_accuracy': 0.848, 'eval_precision': 0.8632478632478633, 'eval_recall': 0.8211382113821138, 'eval_f1': 0.8416666666666667, 'eval_runtime': 14.181, 'eval_samples_per_second': 35.258, 'eval_steps_per_second': 4.443, 'epoch': 3.0}
