In [3]:
!pip install optuna

Collecting optuna
  Downloading optuna-4.3.0-py3-none-any.whl.metadata (17 kB)
Collecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.15.2-py3-none-any.whl.metadata (7.3 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.9.0-py3-none-any.whl.metadata (10 kB)
Downloading optuna-4.3.0-py3-none-any.whl (386 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m386.6/386.6 kB[0m [31m31.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading alembic-1.15.2-py3-none-any.whl (231 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m231.9/231.9 kB[0m [31m23.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorlog-6.9.0-py3-none-any.whl (11 kB)
Installing collected packages: colorlog, alembic, optuna
Successfully installed alembic-1.15.2 colorlog-6.9.0 optuna-4.3.0


In [4]:
!pip install tqdm plotly torch

Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5

In [12]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
import plotly.graph_objects as go
from sklearn.preprocessing import StandardScaler
import json
import optuna
import math
import os

os.makedirs('/content/models', exist_ok=True)
os.makedirs('/content/results', exist_ok=True)
class TimeSeries(Dataset):
    def __init__(self, X: np.ndarray, y: np.ndarray, scale_y: bool = True):
        assert X.shape[0] == y.shape[0], "Mismatched number of samples"

        self.n, self.t, self.f = X.shape

        self.x_scaler = StandardScaler()
        X_flat= X.reshape(-1, self.f)
        self.X_scaled = self.x_scaler.fit_transform(X_flat).reshape(self.n, self.t, self.f)

        self.scale_y = scale_y
        if y.ndim == 1:
            y = y.reshape(-1, 1)
        if scale_y:
            self.y_scaler =StandardScaler()
            self.y_scaled = self.y_scaler.fit_transform(y)
        else:
            self.y_scaled = y

    def __len__(self):
        return self.n

    def __getitem__(self, idx):
        return (
            torch.tensor(self.X_scaled[idx], dtype=torch.float32),
            torch.tensor(self.y_scaled[idx], dtype=torch.float32).view(-1)
        )

    def inverse_transform_y(self, y_tensor: torch.Tensor) -> np.ndarray:
        if self.scale_y:
            return self.y_scaler.inverse_transform(y_tensor.detach().cpu().numpy())
        else:
            return y_tensor.detach().cpu().numpy()
class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_seq_length=100):
        super(PositionalEncoding, self).__init__()

        pe =torch.zeros(max_seq_length, d_model)
        position = torch.arange(0, max_seq_length, dtype=torch.float).unsqueeze(1)
        div_term= torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))

        pe[:, 0::2]= torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)

        self.register_buffer('pe', pe.unsqueeze(0))

    def forward(self, x):
        return x + self.pe[:, :x.size(1)]
class TimeSeriesTransformer(nn.Module):
    def __init__(self, input_size, d_model, nhead, num_layers, dim_feedforward, output_size, dropout=0.1):
        super(TimeSeriesTransformer, self).__init__()
        if d_model % nhead != 0:
            new_d_model = (d_model // nhead) * nhead
            print(f"Warning: d_model ({d_model}) is not divisible by nhead ({nhead}). Adjusting to {new_d_model}.")
            d_model = new_d_model

        self.input_projection = nn.Linear(input_size, d_model)
        self.positional_encoding = PositionalEncoding(d_model)

        encoder_layer =nn.TransformerEncoderLayer(
            d_model=d_model,
            nhead=nhead,
            dim_feedforward=dim_feedforward,
            dropout=dropout,
            batch_first=True
        )

        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        self.output_projection = nn.Linear(d_model, output_size)

    def forward(self, x):
        x = self.input_projection(x)
        x = self.positional_encoding(x)
        output = self.transformer_encoder(x)
        output = output[:, -1, :]

        return self.output_projection(output)

def train(model, criterion, optimizer, train_dataloader, device):
    model.train()
    epoch_train_loss = 0.0
    for batch_X, batch_y in train_dataloader:
        batch_X, batch_y = batch_X.to(device), batch_y.to(device)
        optimizer.zero_grad()
        outputs= model(batch_X)
        loss = criterion(outputs, batch_y)
        loss.backward()
        optimizer.step()
        epoch_train_loss += loss.item()

    return epoch_train_loss / len(train_dataloader)

def validate(model, val_dataloader, device):
    model.eval()
    preds, targets = [], []
    with torch.no_grad():
        for batch_X, batch_y in val_dataloader:
            batch_X, batch_y = batch_X.to(device), batch_y.to(device)
            outputs = model(batch_X)
            targets.append(batch_y)
            preds.append(outputs)

    preds = torch.cat(preds, dim=0).cpu()
    targets = torch.cat(targets, dim=0).cpu()

    mse = mean_squared_error(targets.numpy(), preds.numpy())
    r2 = r2_score(targets.numpy(), preds.numpy())
    return mse, r2

def transformer_objective(trial, features, targets, index, device):
    d_model= trial.suggest_categorical("d_model", [64, 128, 192, 256])
    nhead = trial.suggest_categorical("nhead", [2, 4, 8])
    num_layers= trial.suggest_int("num_layers", 2, 6)
    dim_feedforward = trial.suggest_int("dim_feedforward", 64, 256, step=64)
    dropout = trial.suggest_float("dropout", 0.0, 0.5)
    learning_rate= trial.suggest_float("learning_rate", 1e-4, 1e-2, log=True)
    batch_size = trial.suggest_categorical("batch_size", [16, 32, 64])
    num_epochs= trial.suggest_int("num_epochs", 25, 100, step=5)
    if d_model % nhead != 0:
        d_model = (d_model // nhead) * nhead
    X_train, X_test, y_train, y_test = train_test_split(
        features, targets, test_size=0.2, shuffle=True, stratify=index[:, 1]
    )

    train_data = TimeSeries(X_train, y_train)
    test_data= TimeSeries(X_test, y_test)
    train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False)
    model = TimeSeriesTransformer(
        input_size=features.shape[2],
        d_model=d_model,
        nhead=nhead,
        num_layers=num_layers,
        dim_feedforward=dim_feedforward,
        output_size=1,
        dropout=dropout
    ).to(device)

    optimizer= optim.Adam(model.parameters(), lr=learning_rate)
    criterion= nn.MSELoss()

    epoch_mse = []
    for epoch in range(num_epochs):
        train(model, criterion, optimizer, train_loader, device)
        mse, _ = validate(model, test_loader, device)
        epoch_mse.append(mse)

    return np.mean(epoch_mse)

def run_transformer_experiment(model_type='lung', n_trials=30):

    features= np.load(f'/content/{model_type}_features.npy')
    targets= np.load(f'/content/{model_type}_target.npy')
    index = np.load(f'/content/{model_type}_index.npy')

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f"Using device: {device}")

    study = optuna.create_study(direction="minimize")
    study.optimize(lambda trial: transformer_objective(trial, features, targets, index, device), n_trials=n_trials)

    print("Best trial:")
    print(study.best_trial)

    with open(f'/content/models/{model_type}_transformer_params.json', 'w') as file:
        json.dump(study.best_trial.params, file)
    best_params = study.best_trial.params
    X_train, X_test, y_train, y_test, _, index_test = train_test_split(
        features, targets, index, test_size=0.2, shuffle=True, stratify=index[:, 1]
    )

    train_data = TimeSeries(X_train, y_train)
    test_data= TimeSeries(X_test, y_test)
    train_loader =DataLoader(train_data, batch_size=best_params['batch_size'], shuffle=True)
    test_loader =DataLoader(test_data, batch_size=best_params['batch_size'], shuffle=False)

    d_model = best_params['d_model']
    nhead = best_params['nhead']

    if d_model % nhead != 0:
        adjusted_d_model = (d_model // nhead) * nhead
        print(f"Warning: Adjusting d_model from {d_model} to {adjusted_d_model} to ensure divisibility by nhead={nhead}")
        d_model =adjusted_d_model

    model = TimeSeriesTransformer(
        input_size=features.shape[2],
        d_model=d_model,
        nhead=nhead,
        num_layers=best_params['num_layers'],
        dim_feedforward=best_params['dim_feedforward'],
        output_size=1,
        dropout=best_params['dropout']
    ).to(device)

    optimizer =torch.optim.Adam(model.parameters(), lr=best_params['learning_rate'])
    criterion= nn.MSELoss()

    epoch_mse = []
    epoch_r2= []
    epoch_train_loss = []
    for epoch in tqdm(range(best_params['num_epochs'])):
        epoch_train_loss.append(train(model, criterion, optimizer, train_loader, device))
        mse, r2 = validate(model, test_loader, device)
        epoch_mse.append(mse)
        epoch_r2.append(r2)

    fig = go.Figure()
    fig.add_trace(
        go.Scatter(
            x=list(range(1, len(epoch_mse) + 1)),
            y=epoch_mse,
            mode='lines',
            name='MSE'
        )
    )
    fig.update_layout(
        title=f'MSE vs Epochs ({model_type} - Transformer)',
        xaxis_title='Epoch',
        yaxis_title='MSE'
    )
    fig.show()

    fig = go.Figure()
    fig.add_trace(
        go.Scatter(
            x=list(range(1, len(epoch_r2) + 1)),
            y=epoch_r2,
            mode='lines',
            name='R²'
        )
    )
    fig.update_layout(
        title=f'R² vs Epochs ({model_type} - Transformer)',
        xaxis_title='Epoch',
        yaxis_title='R²'
    )
    fig.show()

    model.eval()
    predictions = []
    targets = []

    with torch.no_grad():
        for X_batch, y_batch in test_loader:
            X_batch = X_batch.to(device)
            outputs= model(X_batch).cpu()
            predictions.append(outputs)
            targets.append(y_batch)

    y_pred_tensor =torch.cat(predictions)
    y_true_tensor = torch.cat(targets)

    y_pred = test_data.inverse_transform_y(y_pred_tensor)
    y_true = test_data.inverse_transform_y(y_true_tensor)

    df = pd.DataFrame(index_test, columns=["state", "year"])
    df['year'] = df['year'].astype(int)
    df["y_true"] = y_true.flatten()
    df["y_pred"] = y_pred.flatten()

    yg = df.sort_values('year').groupby('year')[['y_true', 'y_pred']].mean().reset_index()

    fig= go.Figure()
    fig.add_scatter(x=yg["year"], y=yg["y_pred"], mode="markers+lines", name="Transformer Pred", marker=dict(symbol="x"))
    fig.add_scatter(x=yg["year"], y=yg["y_true"], mode="markers+lines", name="True")

    r2 = r2_score(yg["y_true"], yg["y_pred"])
    corr = np.corrcoef(yg["y_true"], yg["y_pred"])[0, 1]
    fig.update_layout(
        title=f"Transformer Model Performance: R² = {r2:.3f}, Correlation = {corr:.3f}"
    )
    fig.show()

    print(f"\n===== {model_type.upper()} CANCER TRANSFORMER MODEL =====")
    print(f"R² = {r2:.6f}")
    print(f"MSE = {mean_squared_error(yg['y_true'], yg['y_pred']):.6f}")

    torch.save(model, f"/content/models/{model_type}_transformer_model.pth")
    df.to_csv(f'/content/results/{model_type}_transformer_results.csv', index=False)

    return model, df

if __name__ == "__main__":
    print("Running Transformer model for lung cancer data...")
    lung_model, _ = run_transformer_experiment(model_type='lung', n_trials=30)

    print("\nRunning Transformer model for bladder cancer data...")
    bladder_model, _ = run_transformer_experiment(model_type='bladder', n_trials=30)

[I 2025-04-25 04:28:55,022] A new study created in memory with name: no-name-e9e30c03-60bd-4100-bffd-5979a818b591


Running Transformer model for lung cancer data...
Using device: cuda


[I 2025-04-25 04:29:09,667] Trial 0 finished with value: 0.9883556050913674 and parameters: {'d_model': 64, 'nhead': 4, 'num_layers': 2, 'dim_feedforward': 128, 'dropout': 0.15309987324066315, 'learning_rate': 0.00045519829041461404, 'batch_size': 32, 'num_epochs': 70}. Best is trial 0 with value: 0.9883556050913674.
[I 2025-04-25 04:30:06,586] Trial 1 finished with value: 0.8704286399640535 and parameters: {'d_model': 64, 'nhead': 2, 'num_layers': 4, 'dim_feedforward': 192, 'dropout': 0.3752047275557606, 'learning_rate': 0.00024871494401130254, 'batch_size': 16, 'num_epochs': 95}. Best is trial 1 with value: 0.8704286399640535.
[I 2025-04-25 04:30:43,852] Trial 2 finished with value: 1.0058929330424258 and parameters: {'d_model': 192, 'nhead': 2, 'num_layers': 5, 'dim_feedforward': 192, 'dropout': 0.2189375329510826, 'learning_rate': 0.007028193587014256, 'batch_size': 32, 'num_epochs': 95}. Best is trial 1 with value: 0.8704286399640535.
[I 2025-04-25 04:30:53,983] Trial 3 finished w

Best trial:
FrozenTrial(number=4, state=1, values=[0.6129135769147139], datetime_start=datetime.datetime(2025, 4, 25, 4, 30, 53, 984629), datetime_complete=datetime.datetime(2025, 4, 25, 4, 31, 48, 376735), params={'d_model': 64, 'nhead': 8, 'num_layers': 6, 'dim_feedforward': 256, 'dropout': 0.4787611893281539, 'learning_rate': 0.00017857110870649423, 'batch_size': 16, 'num_epochs': 65}, user_attrs={}, system_attrs={}, intermediate_values={}, distributions={'d_model': CategoricalDistribution(choices=(64, 128, 192, 256)), 'nhead': CategoricalDistribution(choices=(2, 4, 8)), 'num_layers': IntDistribution(high=6, log=False, low=2, step=1), 'dim_feedforward': IntDistribution(high=256, log=False, low=64, step=64), 'dropout': FloatDistribution(high=0.5, log=False, low=0.0, step=None), 'learning_rate': FloatDistribution(high=0.01, log=True, low=0.0001, step=None), 'batch_size': CategoricalDistribution(choices=(16, 32, 64)), 'num_epochs': IntDistribution(high=100, log=False, low=25, step=5)},

100%|██████████| 65/65 [00:54<00:00,  1.19it/s]


[I 2025-04-25 04:43:34,141] A new study created in memory with name: no-name-5851e5fd-22f8-4bba-acd2-7815fbbbc5d9



===== LUNG CANCER TRANSFORMER MODEL =====
R² = -0.162882
MSE = 0.000000

Running Transformer model for bladder cancer data...
Using device: cuda


[I 2025-04-25 04:43:46,339] Trial 0 finished with value: 0.5439476615190506 and parameters: {'d_model': 128, 'nhead': 2, 'num_layers': 5, 'dim_feedforward': 64, 'dropout': 0.4695488873787147, 'learning_rate': 0.00011627865197669387, 'batch_size': 64, 'num_epochs': 50}. Best is trial 0 with value: 0.5439476615190506.
[I 2025-04-25 04:44:26,207] Trial 1 finished with value: 0.848176338672638 and parameters: {'d_model': 256, 'nhead': 4, 'num_layers': 2, 'dim_feedforward': 192, 'dropout': 0.4183877732380892, 'learning_rate': 0.0009996819167669552, 'batch_size': 16, 'num_epochs': 100}. Best is trial 0 with value: 0.5439476615190506.
[I 2025-04-25 04:44:39,545] Trial 2 finished with value: 0.757267107963562 and parameters: {'d_model': 192, 'nhead': 8, 'num_layers': 3, 'dim_feedforward': 64, 'dropout': 0.25067577271743513, 'learning_rate': 0.0005132738806171636, 'batch_size': 16, 'num_epochs': 25}. Best is trial 0 with value: 0.5439476615190506.
[I 2025-04-25 04:44:51,494] Trial 3 finished wi

Best trial:
FrozenTrial(number=0, state=1, values=[0.5439476615190506], datetime_start=datetime.datetime(2025, 4, 25, 4, 43, 34, 141930), datetime_complete=datetime.datetime(2025, 4, 25, 4, 43, 46, 338879), params={'d_model': 128, 'nhead': 2, 'num_layers': 5, 'dim_feedforward': 64, 'dropout': 0.4695488873787147, 'learning_rate': 0.00011627865197669387, 'batch_size': 64, 'num_epochs': 50}, user_attrs={}, system_attrs={}, intermediate_values={}, distributions={'d_model': CategoricalDistribution(choices=(64, 128, 192, 256)), 'nhead': CategoricalDistribution(choices=(2, 4, 8)), 'num_layers': IntDistribution(high=6, log=False, low=2, step=1), 'dim_feedforward': IntDistribution(high=256, log=False, low=64, step=64), 'dropout': FloatDistribution(high=0.5, log=False, low=0.0, step=None), 'learning_rate': FloatDistribution(high=0.01, log=True, low=0.0001, step=None), 'batch_size': CategoricalDistribution(choices=(16, 32, 64)), 'num_epochs': IntDistribution(high=100, log=False, low=25, step=5)},

100%|██████████| 50/50 [00:12<00:00,  4.13it/s]



===== BLADDER CANCER TRANSFORMER MODEL =====
R² = 0.540713
MSE = 0.000000


In [14]:
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error, r2_score
import os

def calculate_transformer_metrics():
    """Calculate and print MSE and R² for transformer models only."""
    results = {}
    transformer_lung_path = '/content/results/lung_transformer_results.csv'

    if os.path.exists(transformer_lung_path):
        transformer_lung = pd.read_csv(transformer_lung_path)
        tf_lung_r2 = r2_score(transformer_lung['y_true'], transformer_lung['y_pred'])
        tf_lung_mse = mean_squared_error(transformer_lung['y_true'], transformer_lung['y_pred'])

        results['lung'] = {'R²': tf_lung_r2, 'MSE': tf_lung_mse}

        print("\n===== LUNG CANCER TRANSFORMER MODEL =====")
        print(f"R² = {tf_lung_r2:.6f}")
        print(f"MSE = {tf_lung_mse:.6f}")
    else:
        print("Lung cancer transformer results not found")
    transformer_bladder_path = '/content/results/bladder_transformer_results.csv'

    if os.path.exists(transformer_bladder_path):
        transformer_bladder = pd.read_csv(transformer_bladder_path)
        tf_bladder_r2 = r2_score(transformer_bladder['y_true'], transformer_bladder['y_pred'])
        tf_bladder_mse = mean_squared_error(transformer_bladder['y_true'], transformer_bladder['y_pred'])

        results['bladder'] = {'R²': tf_bladder_r2, 'MSE': tf_bladder_mse}
        print("\n===== BLADDER CANCER TRANSFORMER MODEL =====")
        print(f"R² = {tf_bladder_r2:.6f}")
        print(f"MSE = {tf_bladder_mse:.6f}")
    else:
        print("Bladder cancer transformer results not found")

    return results
if __name__ == "__main__":
    calculate_transformer_metrics()


===== LUNG CANCER TRANSFORMER MODEL =====
R² = 0.294330
MSE = 0.000001

===== BLADDER CANCER TRANSFORMER MODEL =====
R² = 0.341352
MSE = 0.000000
