In [4]:
import sys
sys.path.append('../')

import optuna
import torch
import pytorch_lightning as pl
from src.model.stsb import RNN
from src.data.stsb_custom_dataset import GLUESTSBDataset
from src.utils.stsb_loss_logger import LossLogger
from src.data.stsb_embedding import GloveEmbeddings



In [None]:
TX = 30
embed_model = GloveEmbeddings(path='../data/embeddings/glove6b50d/glove.6B.50d.txt')
embed_matrix = embed_model.get_embedding_matrix()
train_dataset = GLUESTSBDataset(data_path='../data/sts_b/train.tsv',
                            embed_model=embed_model,
                            tx=TX)
val_dataset = GLUESTSBDataset(data_path='../data/sts_b/train.tsv',
                            embed_model=embed_model,
                            tx=TX)

train_loader = train_dataset.get_data_loader()
val_loader = val_dataset.get_data_loader()

In [None]:
def objective(trial):
    # Hyperparameters to be optimized
    hidden_size = trial.suggest_int('hidden_size', 32, 256)
    lr = trial.suggest_float('lr', 3e-5, 3e-2, log=True)
    # optimizer_name = trial.suggest_categorical('optimizer_name', ['adam', 'sgd'])
    # tx = trial.suggest_int('tx', 5, 50)

    # Load datasets with current `tx` value
    # train_dataset = GLUESTSBDataset(data_path='/kaggle/input/glue-sts-b-dataset/train.tsv',
    #                                 embed_model=embed_model,
    #                                 tx=tx)
    # val_dataset = GLUESTSBDataset(data_path='/kaggle/input/glue-sts-b-dataset/dev.tsv',
    #                               embed_model=embed_model,
    #                               tx=tx)
    # train_loader = train_dataset.get_data_loader()
    # val_loader = val_dataset.get_data_loader()

    # Define the model
    model = RNN(input_size=50, hidden_size=hidden_size, num_layers=1, embed_model=embed_model, lr=lr, optimizer_name='adam')

    # Trainer configuration
    loss_logger = LossLogger()
    trainer = pl.Trainer(max_epochs=30, callbacks=[loss_logger], accelerator="auto", devices="auto")

    # Training
    trainer.fit(model, train_loader, val_loader)

    val_pearson_corr = trainer.callback_metrics["pearson_corr"].item()
    return val_pearson_corr


# Optuna study
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=20)

# Best trial
print("Best trial:")
trial = study.best_trial

print(f"  Hidden Size: {trial.params['hidden_size']}")
print(f"  Num Layers: {trial.params['num_layers']}")
print(f"  Learning Rate: {trial.params['lr']}")
# print(f"  Optimizer: {trial.params['optimizer_name']}")
# print(f"  TX: {trial.params['tx']}")