In [1]:
from sklearn.model_selection import StratifiedKFold
import optuna
from torch.utils.data import DataLoader, Subset
import sys
sys.path.insert(1, '../src/')
from config import raw_data_path, univariate_data_path, processed_data_path
from preprocessing_modules import create_time_windows_with_labels
from CNN_classifier_model import CNNClassifier, train_model
import os
from torch.utils.data import DataLoader, TensorDataset, random_split
from sklearn.utils.class_weight import compute_class_weight

from collections import Counter
import torch
import numpy as np

  from .autonotebook import tqdm as notebook_tqdm


In [2]:

train_dir = os.path.join(univariate_data_path, "target_univariate_train.npy")
train_data = np.load(train_dir, allow_pickle=True)
# train_data = pd.DataFrame(train_data)

train_windows = create_time_windows_with_labels(train_data)



# Example dataset with windows and labels
train_windows, train_labels = create_time_windows_with_labels(train_data)

# Count label distribution
train_label_counts = Counter(train_labels)

# Print the counts
print("Train label distribution:")
print(f"  Term (0): {train_label_counts[0]}")
print(f"  Preterm (1): {train_label_counts[1]}")



# Convert to tensors
train_windows_tensor = torch.tensor(train_windows, dtype=torch.float32)
train_labels_tensor = torch.tensor(train_labels, dtype=torch.float32)
train_dataset = TensorDataset(train_windows_tensor, train_labels_tensor)
print(train_windows_tensor.shape)  
print(train_labels_tensor.shape)  



Train label distribution:
  Term (0): 319
  Preterm (1): 752
torch.Size([1071, 12000])
torch.Size([1071])


In [3]:
from sklearn.metrics import average_precision_score
from torch.utils.data import DataLoader, Subset
from sklearn.model_selection import StratifiedKFold

def objective(trial):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Hyperparameter suggestions
    lr = trial.suggest_loguniform("lr", 1e-5, 1e-2)
    num_layers = trial.suggest_int("num_layers", 1, 4)
    base_channels = trial.suggest_categorical("base_channels", [8, 16, 32])

    batch_size = trial.suggest_categorical("batch_size", [16, 32, 64])
    epochs = 5 

    kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
    fold_ap_scores = []

    for train_idx, val_idx in kfold.split(train_windows_tensor, train_labels_tensor):
        # Create data loaders
        train_subset = Subset(train_dataset, train_idx)
        val_subset = Subset(train_dataset, val_idx)

        train_loader = DataLoader(train_subset, batch_size=batch_size, shuffle=True)
        val_loader = DataLoader(val_subset, batch_size=batch_size, shuffle=False)

        # Instantiate model
        model = CNNClassifier(input_length=12000, num_layers=num_layers, base_channels=base_channels)

        # Train model
        trained_model = train_model(model, train_loader, val_loader, epochs=epochs, lr=lr, device=device)

        # Evaluate on validation set using AP
        y_true_all = []
        y_probs_all = []
        trained_model.eval()
        with torch.no_grad():
            for x_batch, y_batch in val_loader:
                x_batch = x_batch.unsqueeze(1).to("cuda")
                y_batch = y_batch.to("cuda")

                outputs = trained_model(x_batch)
                probs = torch.sigmoid(outputs).squeeze().cpu().numpy()

                y_true_all.extend(y_batch.cpu().numpy())
                y_probs_all.extend(probs)

        ap = average_precision_score(y_true_all, y_probs_all)
        fold_ap_scores.append(ap)

    return np.mean(fold_ap_scores)


In [4]:
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=25)

print("Best trial:")
print(study.best_trial)


[I 2025-04-25 16:15:43,576] A new study created in memory with name: no-name-80512cc2-e650-4dd5-a95a-baba1d356583
  lr = trial.suggest_loguniform("lr", 1e-5, 1e-2)


In [None]:
path = os.path.join(models_path, "cnn_baseline_optuna.pkl")
joblib.dump(study, path)
print(f"Saved Optuna study to {path}")