In [None]:
!python /content/drive/MyDrive/causality-guided-Transformer/PAIN_Main_MAVEN_ERE.py \
 -data /content/drive/MyDrive/causality-guided-Transformer/data/MAVEN_ERE/  \
 -prior /content/drive/MyDrive/causality-guided-Transformer/prior/MAVEN_ERE/sparse/ \
 -epoch 1\
 -batch_size 16\
 -d_model 512 \
 -d_inner 256 \
 -d_k 256 \
 -d_v 256 \
 -n_head 4 \
 -n_layers 4 \
 -dropout 0.1 \
 -lr 1e-4 \
 -num_samples 1 \
 -event_interest 7 \
 -threshold 0.3

In [None]:
from google.colab import drive
import shutil
import os
import pandas as pd
import optuna
import subprocess

drive.mount('/content/drive')

drive_folder = '/content/drive/MyDrive/PAIN_results/'
os.makedirs(drive_folder, exist_ok=True)
csv_file = 'PAIN_fine_tuning_loglikelihood_results.csv'
drive_csv_path = os.path.join(drive_folder, csv_file)

if not os.path.exists(csv_file):
    columns = [
        'epoch', 'batch_size', 'd_model', 'd_inner', 'd_k', 'd_v', 'n_head',
        'n_layers', 'dropout', 'lr', 'num_samples', 'event_interest',
        'threshold', 'val_loglikelihood'
    ]
    df = pd.DataFrame(columns=columns)
    df.to_csv(csv_file, index=False)
    shutil.copy(csv_file, drive_csv_path)

def save_to_drive():
    shutil.copy(csv_file, drive_csv_path)
    print(f"Updated {csv_file} saved to Google Drive at {drive_csv_path}.")

def fine_tune_model(epoch, batch_size, d_model, d_inner, d_k, d_v, n_head,
                    n_layers, dropout, lr, num_samples, event_interest, threshold):
    print(f"Running fine-tuning with parameters: "
          f"epoch={epoch}, batch_size={batch_size}, d_model={d_model}, "
          f"d_inner={d_inner}, d_k={d_k}, d_v={d_v}, n_head={n_head}, n_layers={n_layers}, "
          f"dropout={dropout}, lr={lr}, num_samples={num_samples}, "
          f"event_interest={event_interest}, threshold={threshold}")

    def run_command(command):
        print(f"Running command: {' '.join(command)}")
        result = subprocess.run(command, capture_output=True, text=True, check=True)
        output = result.stdout
        return output

    command = [
        'python', '/content/drive/MyDrive/causality-guided-Transformer/PAIN_Main_MAVEN_ERE.py',
        '-data', '/content/drive/MyDrive/causality-guided-Transformer/data/MAVEN_ERE/',
        '-prior', '/content/drive/MyDrive/causality-guided-Transformer/prior/MAVEN_ERE/sparse/',
        '-epoch', str(epoch),
        '-batch_size', str(batch_size),
        '-d_model', str(d_model),
        '-d_inner', str(d_inner),
        '-d_k', str(d_k),
        '-d_v', str(d_v),
        '-n_head', str(n_head),
        '-n_layers', str(n_layers),
        '-dropout', str(dropout),
        '-lr', str(lr),
        '-num_samples', str(num_samples),
        '-event_interest', str(event_interest),
        '-threshold', str(threshold)
    ]

    output = run_command(command)

    train_loglikelihood = None
    val_loglikelihood = None
    test_loglikelihood = None

    for line in output.splitlines():
        if '(Training)' in line:
            train_loglikelihood = float(line.split('loglikelihood:')[-1].split(',')[0].strip())
        elif '(Validation)' in line:
            val_loglikelihood = float(line.split('loglikelihood:')[-1].split(',')[0].strip())
        elif '(Test)' in line:
            test_loglikelihood = float(line.split('loglikelihood:')[-1].split(',')[0].strip())

    return val_loglikelihood

def objective(trial):
    epoch = trial.suggest_categorical('epoch', [5, 10, 20])
    batch_size = trial.suggest_categorical('batch_size', [16, 32, 64])
    d_model = trial.suggest_categorical('d_model', [64, 128, 256])
    d_inner = trial.suggest_categorical('d_inner', [128, 256, 512])
    d_k = trial.suggest_categorical('d_k', [32, 64, 128])
    d_v = trial.suggest_categorical('d_v', [32, 64, 128])
    n_head = trial.suggest_categorical('n_head', [2, 4, 8])
    n_layers = trial.suggest_categorical('n_layers', [2, 4, 6])
    dropout = trial.suggest_float('dropout', 0.1, 0.5)
    lr = trial.suggest_loguniform('lr', 1e-5, 5e-4)
    num_samples = trial.suggest_categorical('num_samples', [1, 5, 10])
    event_interest = trial.suggest_categorical('event_interest', [7])
    threshold = trial.suggest_float('threshold', 0.2, 0.5)

    val_loglikelihood = fine_tune_model(epoch, batch_size,
                                        d_model, d_inner,
                                        d_k, d_v, n_head,
                                        n_layers, dropout,
                                        lr, num_samples,
                                        event_interest,
                                        threshold)

    trial_result = {
        'epoch': epoch,
        'batch_size': batch_size,
        'd_model': d_model,
        'd_inner': d_inner,
        'd_k': d_k,
        'd_v': d_v,
        'n_head': n_head,
        'n_layers': n_layers,
        'dropout': dropout,
        'lr': lr,
        'num_samples': num_samples,
        'event_interest': event_interest,
        'threshold': threshold,
        'val_loglikelihood': val_loglikelihood
    }
    trial_df = pd.DataFrame([trial_result])
    trial_df.to_csv(csv_file, mode='a', header=False, index=False)

    save_to_drive()

    return val_loglikelihood

study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=50)

print("Best hyperparameters:", study.best_params)
print("Best validation loglikelihood:", study.best_value)
