In [None]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

possible_n_vals = [10, 12, 14, 16]
possible_e_vals = [1, 2, 3, 4, 5]


def run_poly_logistic_regression(n, e):

    X = np.load('Datasets/kryptonite-%s-X.npy'%(n))
    y = np.load('Datasets/kryptonite-%s-y.npy'%(n))

    # Shuffle and split the data
    X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.6, random_state=42)  # 60% training
    X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)  # 20% validation, 20% test
    print(X_train.shape)
    # Create polynomial features (set degree as desired)
    degree = e
    poly = PolynomialFeatures(degree)
    X_train_poly = poly.fit_transform(X_train)
    X_val_poly = poly.transform(X_val)
    X_test_poly = poly.transform(X_test)
    print(X_train_poly.shape)
    features = X_train_poly.shape[-1]
    print("Created features")

    # Initialize and fit logistic regression
    logreg = LogisticRegression(max_iter=100, solver='sag', C=0.85)
    logreg.fit(X_train_poly, y_train)
    print("Fit Model")

    # Evaluate on the validation set
    y_val_pred = logreg.predict(X_val_poly)
    val_accuracy = accuracy_score(y_val, y_val_pred)
    print(f"Validation Accuracy: {val_accuracy:.4f}")

    # Evaluate on the test set
    y_test_pred = logreg.predict(X_test_poly)
    test_accuracy = accuracy_score(y_test, y_test_pred)
    print(f"Test Accuracy: {test_accuracy:.4f}")
    return test_accuracy, features
        

In [1]:
!pip install torch

Collecting torch
  Downloading torch-2.9.0-cp311-none-macosx_11_0_arm64.whl.metadata (30 kB)
Collecting filelock (from torch)
  Downloading filelock-3.20.0-py3-none-any.whl.metadata (2.1 kB)
Collecting sympy>=1.13.3 (from torch)
  Downloading sympy-1.14.0-py3-none-any.whl.metadata (12 kB)
Collecting networkx>=2.5.1 (from torch)
  Using cached networkx-3.5-py3-none-any.whl.metadata (6.3 kB)
Collecting jinja2 (from torch)
  Downloading jinja2-3.1.6-py3-none-any.whl.metadata (2.9 kB)
Collecting fsspec>=0.8.5 (from torch)
  Downloading fsspec-2025.9.0-py3-none-any.whl.metadata (10 kB)
Collecting mpmath<1.4,>=1.1.0 (from sympy>=1.13.3->torch)
  Downloading mpmath-1.3.0-py3-none-any.whl.metadata (8.6 kB)
Collecting MarkupSafe>=2.0 (from jinja2->torch)
  Downloading markupsafe-3.0.3-cp311-cp311-macosx_11_0_arm64.whl.metadata (2.7 kB)
Downloading torch-2.9.0-cp311-none-macosx_11_0_arm64.whl (74.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m74.5/74.5 MB[0m [31m10.7 MB/s

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class SimpleNN(nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, 64),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.net(x)


In [None]:
from tqdm import tqdm

acc_by_n = []
feat_by_n = []
for n in tqdm(possible_n_vals):
    single_n = []
    single_feat = []
    for e in tqdm(possible_e_vals):
        acc, feat = run_poly_logistic_regression(n, e)
        single_n.append(acc)
        single_feat.append(feat)
    acc_by_n.append(single_n)
    feat_by_n.append(single_feat)

print(acc_by_n)


In [12]:
# ============================================================
# PyTorch + Optuna: Binary classification optimizing accuracy
# ============================================================

import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.datasets import make_classification
import optuna

# ------------------------------------------------------------
# 1. (Example) Generate or load your dataset
# ------------------------------------------------------------
# Replace this with your actual dataset
n = 20

X = np.load('Datasets/kryptonite-%s-X.npy'%(n))
y = np.load('Datasets/kryptonite-%s-y.npy'%(n))

# First split: train 60%, temp 40%
X_train, X_temp, y_train, y_temp = train_test_split(
    X, y, test_size=0.4, stratify=y, random_state=42
)
# Second split: val 20%, test 20%
X_val, X_test, y_val, y_test = train_test_split(
    X_temp, y_temp, test_size=0.5, stratify=y_temp, random_state=42
)

print(f"Train: {len(X_train)}, Val: {len(X_val)}, Test: {len(X_test)}")

# ------------------------------------------------------------
# 2. Preprocessing
# ------------------------------------------------------------
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)

X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)
X_val = torch.tensor(X_val, dtype=torch.float32)
y_val = torch.tensor(y_val, dtype=torch.float32).view(-1, 1)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32).view(-1, 1)


# ------------------------------------------------------------
# 3. Define flexible MLP model
# ------------------------------------------------------------
class MLP(nn.Module):
    def __init__(self, input_dim, hidden_dims, dropout_rate, activation_fn):
        super().__init__()
        layers = []
        prev_dim = input_dim
        for hdim in hidden_dims:
            layers.append(nn.Linear(prev_dim, hdim))
            layers.append(activation_fn())
            layers.append(nn.Dropout(dropout_rate))
            prev_dim = hdim
        layers.append(nn.Linear(prev_dim, 1))
        self.net = nn.Sequential(*layers)
        
    def forward(self, x):
        return torch.sigmoid(self.net(x))


# ------------------------------------------------------------
# 4. Training + Validation function
# ------------------------------------------------------------
def train_and_evaluate(model, optimizer, criterion,
                       X_train, y_train, X_val, y_val,
                       epochs=50, batch_size=64):
    n = len(X_train)
    for epoch in range(epochs):
        model.train()
        idx = torch.randperm(n)
        X_train, y_train = X_train[idx], y_train[idx]
        
        for i in range(0, n, batch_size):
            xb = X_train[i:i+batch_size]
            yb = y_train[i:i+batch_size]
            optimizer.zero_grad()
            preds = model(xb)
            loss = criterion(preds, yb)
            loss.backward()
            optimizer.step()

    # Validation phase
    model.eval()
    with torch.no_grad():
        preds_val = model(X_val)
    preds_bin = (preds_val > 0.5).float()
    acc = accuracy_score(y_val, preds_bin)
    return acc


# ------------------------------------------------------------
# 5. Optuna Objective Function (optimize validation accuracy)
# ------------------------------------------------------------
def objective(trial):
    n_layers = trial.suggest_int("n_layers", 1, 5)
    hidden_dim = trial.suggest_categorical("hidden_dim", [128, 256, 512, 768, 1024])
    dropout_rate = trial.suggest_float("dropout_rate", 0.0, 0.5)
    lr = trial.suggest_float("lr", 1e-4, 1e-2, log=True)
    weight_decay = trial.suggest_float("weight_decay", 1e-6, 1e-3, log=True)
    activation_name = trial.suggest_categorical("activation", ["ReLU", "Tanh", "GELU"])
    
    activation_fn = {"ReLU": nn.ReLU, "Tanh": nn.Tanh, "GELU": nn.GELU}[activation_name]
    hidden_dims = [hidden_dim] * n_layers
    
    model = MLP(input_dim=X_train.shape[1],
                hidden_dims=hidden_dims,
                dropout_rate=dropout_rate,
                activation_fn=activation_fn)
    
    criterion = nn.BCELoss()
    optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
    
    acc = train_and_evaluate(model, optimizer, criterion, X_train, y_train, X_val, y_val)
    return acc


# ------------------------------------------------------------
# 6. Run Optuna optimization
# ------------------------------------------------------------
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=30)

best_trial = study.best_trial
print("\nBest Validation Results")
print("------------------------")
print(f"Validation Accuracy: {best_trial.value:.4f}")
for k, v in best_trial.params.items():
    print(f"  {k}: {v}")


# ------------------------------------------------------------
# 7. Evaluate on Test Set using Best Params
# ------------------------------------------------------------
def train_full_and_test(params):
    n_layers = params["n_layers"]
    hidden_dim = params["hidden_dim"]
    dropout_rate = params["dropout_rate"]
    lr = params["lr"]
    weight_decay = params["weight_decay"]
    activation_fn = {"ReLU": nn.ReLU, "Tanh": nn.Tanh, "GELU": nn.GELU}[params["activation"]]
    
    model = MLP(input_dim=X_train.shape[1],
                hidden_dims=[hidden_dim] * n_layers,
                dropout_rate=dropout_rate,
                activation_fn=activation_fn)
    
    criterion = nn.BCELoss()
    optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
    
    # Train on train + val (80%) for better generalization
    X_combined = torch.cat([X_train, X_val], dim=0)
    y_combined = torch.cat([y_train, y_val], dim=0)
    
    _ = train_and_evaluate(model, optimizer, criterion, X_combined, y_combined, X_test, y_test)
    
    model.eval()
    with torch.no_grad():
        preds_test = model(X_test)
    preds_bin = (preds_test > 0.5).float()
    test_acc = accuracy_score(y_test, preds_bin)
    return test_acc

test_acc = train_full_and_test(best_trial.params)
print("\nFinal Test Accuracy (using best params): {:.4f}".format(test_acc))

[I 2025-10-28 19:12:30,988] A new study created in memory with name: no-name-a69ea27a-81d9-4af3-94ad-69d6d615db5f


Train: 24000, Val: 8000, Test: 8000


[I 2025-10-28 19:12:55,146] Trial 0 finished with value: 0.499125 and parameters: {'n_layers': 3, 'hidden_dim': 256, 'dropout_rate': 0.43038441293451746, 'lr': 0.00013432816576831912, 'weight_decay': 8.829359118759904e-05, 'activation': 'GELU'}. Best is trial 0 with value: 0.499125.
[I 2025-10-28 19:13:34,318] Trial 1 finished with value: 0.505125 and parameters: {'n_layers': 5, 'hidden_dim': 256, 'dropout_rate': 0.017527353437521986, 'lr': 0.0002612559722668915, 'weight_decay': 0.0005395501137190133, 'activation': 'GELU'}. Best is trial 1 with value: 0.505125.
[I 2025-10-28 19:15:35,786] Trial 2 finished with value: 0.94925 and parameters: {'n_layers': 3, 'hidden_dim': 1024, 'dropout_rate': 0.042366486743149634, 'lr': 0.0007384888819631559, 'weight_decay': 6.772946771940114e-06, 'activation': 'Tanh'}. Best is trial 2 with value: 0.94925.
[I 2025-10-28 19:15:46,468] Trial 3 finished with value: 0.649 and parameters: {'n_layers': 1, 'hidden_dim': 768, 'dropout_rate': 0.14245629527355536


Best Validation Results
------------------------
Validation Accuracy: 0.9493
  n_layers: 3
  hidden_dim: 1024
  dropout_rate: 0.042366486743149634
  lr: 0.0007384888819631559
  weight_decay: 6.772946771940114e-06
  activation: Tanh

Final Test Accuracy (using best params): 0.9621


### For n = 10
Validation Accuracy: 0.9627

  n_layers: 1
  hidden_dim: 256
  dropout_rate: 0.4027371651972238
  lr: 0.008265555263166885
  weight_decay: 9.054101276144208e-06
  activation: ReLU

Final Test Accuracy (using best params): 0.9615


### For n = 12
Validation Accuracy: 0.9606
  n_layers: 2
  hidden_dim: 256
  dropout_rate: 0.29257082699124565
  lr: 0.001426579019898136
  weight_decay: 3.7383700968473337e-06
  activation: Tanh

Final Test Accuracy (using best params): 0.9421

### For n = 14
Validation Accuracy: 0.9646

  n_layers: 2
  hidden_dim: 256
  dropout_rate: 0.18930988641398677
  lr: 0.0005284962214684265
  weight_decay: 1.8689152926603513e-05
  activation: ReLU

Final Test Accuracy (using best params): 0.9634

### For n = 16
Validation Accuracy: 0.9619

  n_layers: 3
  hidden_dim: 512
  dropout_rate: 0.22305400020665447
  lr: 0.0003932872869573933
  weight_decay: 5.556403819066895e-06
  activation: Tanh

Final Test Accuracy (using best params): 0.9470

### For n = 18
Validation Accuracy: 0.9211

  n_layers: 1
  hidden_dim: 1024
  dropout_rate: 0.007703409696213498
  lr: 0.000777485254347479
  weight_decay: 0.00013896378432597418
  activation: ReLU

Final Test Accuracy (using best params): 0.9471

### For n = 20
Validation Accuracy: 0.9493

  n_layers: 3
  hidden_dim: 1024
  dropout_rate: 0.042366486743149634
  lr: 0.0007384888819631559
  weight_decay: 6.772946771940114e-06
  activation: Tanh

Final Test Accuracy (using best params): 0.9621


In [13]:
# ============================================================
# Tree-based models (RF, GB, XGB) + Optuna hyperparameter tuning
# Optimizing validation accuracy with 60/20/20 split
# ============================================================

import numpy as np
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
import optuna

# Optional: XGBoost (comment out if not installed)
try:
    from xgboost import XGBClassifier
    HAS_XGB = True
except ImportError:
    HAS_XGB = False


# ------------------------------------------------------------
# 1. Create or load your dataset
# ------------------------------------------------------------
n = 10

X = np.load('Datasets/kryptonite-%s-X.npy'%(n))
y = np.load('Datasets/kryptonite-%s-y.npy'%(n))

# 60/20/20 split
X_train, X_temp, y_train, y_temp = train_test_split(
    X, y, test_size=0.4, stratify=y, random_state=42
)
X_val, X_test, y_val, y_test = train_test_split(
    X_temp, y_temp, test_size=0.5, stratify=y_temp, random_state=42
)

print(f"Train: {len(X_train)}, Val: {len(X_val)}, Test: {len(X_test)}")

# ------------------------------------------------------------
# 2. Scale numeric features
# ------------------------------------------------------------
# Tree models are generally scale-invariant, but scaling helps consistency.
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)


# ------------------------------------------------------------
# 3. Optuna Objective Function
# ------------------------------------------------------------
def objective(trial):
    model_type = trial.suggest_categorical("model_type", ["RandomForest", "GradientBoosting"] + (["XGBoost"] if HAS_XGB else []))

    if model_type == "RandomForest":
        n_estimators = trial.suggest_int("n_estimators", 100, 500)
        max_depth = trial.suggest_int("max_depth", 3, 20)
        min_samples_split = trial.suggest_int("min_samples_split", 2, 10)
        min_samples_leaf = trial.suggest_int("min_samples_leaf", 1, 5)
        max_features = trial.suggest_categorical("max_features", ["sqrt", "log2", None])
        model = RandomForestClassifier(
            n_estimators=n_estimators,
            max_depth=max_depth,
            min_samples_split=min_samples_split,
            min_samples_leaf=min_samples_leaf,
            max_features=max_features,
            random_state=42,
            n_jobs=-1
        )

    elif model_type == "GradientBoosting":
        n_estimators = trial.suggest_int("n_estimators", 100, 500)
        learning_rate = trial.suggest_float("learning_rate", 0.01, 0.3)
        max_depth = trial.suggest_int("max_depth", 3, 10)
        subsample = trial.suggest_float("subsample", 0.6, 1.0)
        model = GradientBoostingClassifier(
            n_estimators=n_estimators,
            learning_rate=learning_rate,
            max_depth=max_depth,
            subsample=subsample,
            random_state=42
        )

    elif model_type == "XGBoost" and HAS_XGB:
        n_estimators = trial.suggest_int("n_estimators", 100, 500)
        learning_rate = trial.suggest_float("learning_rate", 0.01, 0.3)
        max_depth = trial.suggest_int("max_depth", 3, 10)
        subsample = trial.suggest_float("subsample", 0.6, 1.0)
        colsample_bytree = trial.suggest_float("colsample_bytree", 0.6, 1.0)
        reg_lambda = trial.suggest_float("reg_lambda", 1e-3, 10, log=True)
        reg_alpha = trial.suggest_float("reg_alpha", 1e-3, 10, log=True)
        model = XGBClassifier(
            n_estimators=n_estimators,
            learning_rate=learning_rate,
            max_depth=max_depth,
            subsample=subsample,
            colsample_bytree=colsample_bytree,
            reg_lambda=reg_lambda,
            reg_alpha=reg_alpha,
            random_state=42,
            n_jobs=-1,
            use_label_encoder=False,
            eval_metric="logloss"
        )

    else:
        raise ValueError("Unsupported model type")

    # Train and validate
    model.fit(X_train, y_train)
    preds_val = model.predict(X_val)
    acc = accuracy_score(y_val, preds_val)
    return acc


# ------------------------------------------------------------
# 4. Run Optuna Study
# ------------------------------------------------------------
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=50)

best_trial = study.best_trial
print("\nBest Validation Results")
print("------------------------")
print(f"Validation Accuracy: {best_trial.value:.4f}")
for k, v in best_trial.params.items():
    print(f"  {k}: {v}")


# ------------------------------------------------------------
# 5. Retrain Best Model on Train + Val, Evaluate on Test Set
# ------------------------------------------------------------
def train_full_and_test(params):
    model_type = params["model_type"]

    if model_type == "RandomForest":
        model = RandomForestClassifier(
            n_estimators=params["n_estimators"],
            max_depth=params["max_depth"],
            min_samples_split=params["min_samples_split"],
            min_samples_leaf=params["min_samples_leaf"],
            max_features=params["max_features"],
            random_state=42,
            n_jobs=-1
        )

    elif model_type == "GradientBoosting":
        model = GradientBoostingClassifier(
            n_estimators=params["n_estimators"],
            learning_rate=params["learning_rate"],
            max_depth=params["max_depth"],
            subsample=params["subsample"],
            random_state=42
        )

    elif model_type == "XGBoost" and HAS_XGB:
        model = XGBClassifier(
            n_estimators=params["n_estimators"],
            learning_rate=params["learning_rate"],
            max_depth=params["max_depth"],
            subsample=params["subsample"],
            colsample_bytree=params["colsample_bytree"],
            reg_lambda=params["reg_lambda"],
            reg_alpha=params["reg_alpha"],
            random_state=42,
            n_jobs=-1,
            use_label_encoder=False,
            eval_metric="logloss"
        )
    else:
        raise ValueError("Unsupported model type")

    # Retrain on 80% (train + val)
    X_combined = np.vstack([X_train, X_val])
    y_combined = np.concatenate([y_train, y_val])
    model.fit(X_combined, y_combined)

    preds_test = model.predict(X_test)
    test_acc = accuracy_score(y_test, preds_test)
    return test_acc


test_acc = train_full_and_test(best_trial.params)
print("\nFinal Test Accuracy (using best params): {:.4f}".format(test_acc))


[I 2025-10-29 09:58:41,786] A new study created in memory with name: no-name-5035d61e-a35c-4f8d-a631-40fca4b32917


Train: 12000, Val: 4000, Test: 4000


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
[I 2025-10-29 09:58:43,451] Trial 0 finished with value: 0.5165 and parameters: {'model_type': 'XGBoost', 'n_estimators': 417, 'learning_rate': 0.053670510861832735, 'max_depth': 10, 'subsample': 0.8066624939979578, 'colsample_bytree': 0.9388283744981742, 'reg_lambda': 0.15489223417538414, 'reg_alpha': 7.556749926701239}. Best is trial 0 with value: 0.5165.
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
[I 2025-10-29 09:58:43,912] Trial 1 finished with value: 0.4995 and parameters: {'model_type': 'XGBoost', 'n_estimators': 298, 'learning_rate': 0.0859816760167959, 'max_depth': 5, 'subsample': 0.7700559163878782, 'colsample_bytree': 0.8736236104091871, 'reg_lambda': 2.300218330034362, 'reg_alpha': 0.024489472491352228}. Best is trial 0 with value: 0.5165.
[I 2025-10-29 09:58:49,494] Trial 2 finished with value: 0.4975 and parameters: {'model_type


Best Validation Results
------------------------
Validation Accuracy: 0.5393
  model_type: XGBoost
  n_estimators: 376
  learning_rate: 0.22644178044161917
  max_depth: 10
  subsample: 0.836599052260243
  colsample_bytree: 0.960831543984434
  reg_lambda: 0.1302571317546935
  reg_alpha: 4.644859724356098

Final Test Accuracy (using best params): 0.5387


### n = 10
Validation Accuracy: 0.9483

  kernel: rbf
  C: 65.70854569546646
  gamma: auto

Final Test Accuracy (using best params): 0.9505

### n = 12

In [None]:
# ============================================================
# Logistic Regression (PyTorch) + Multiple Optimizers
# Comparing SGD, SGD+Momentum, Adam, RMSprop, LBFGS
# with 60/20/20 train/val/test split
# ============================================================

import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

# ------------------------------------------------------------
# 1. (Example) Create or load your dataset
# ------------------------------------------------------------
n = 10

X = np.load('Datasets/kryptonite-%s-X.npy'%(n))
y = np.load('Datasets/kryptonite-%s-y.npy'%(n))

# 60 / 20 / 20 split
X_train, X_temp, y_train, y_temp = train_test_split(
    X, y, test_size=0.4, stratify=y, random_state=42
)
X_val, X_test, y_val, y_test = train_test_split(
    X_temp, y_temp, test_size=0.5, stratify=y_temp, random_state=42
)

print(f"Train: {len(X_train)}, Val: {len(X_val)}, Test: {len(X_test)}")

# ------------------------------------------------------------
# 2. Preprocessing
# ------------------------------------------------------------
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)

X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)
X_val = torch.tensor(X_val, dtype=torch.float32)
y_val = torch.tensor(y_val, dtype=torch.float32).view(-1, 1)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32).view(-1, 1)

# ------------------------------------------------------------
# 3. Logistic Regression Model
# ------------------------------------------------------------
class LogisticRegressionModel(nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        self.linear = nn.Linear(input_dim, 1)

    def forward(self, x):
        return torch.sigmoid(self.linear(x))


# ------------------------------------------------------------
# 4. Training & Evaluation Function
# ------------------------------------------------------------
def train_model(optimizer_name, model, X_train, y_train, X_val, y_val,
                lr=1e-3, weight_decay=0.0, momentum=0.9, epochs=50, batch_size=64):
    criterion = nn.BCELoss()

    # Initialize optimizer
    if optimizer_name == "SGD":
        optimizer = optim.SGD(model.parameters(), lr=lr, weight_decay=weight_decay)
    elif optimizer_name == "SGD+Momentum":
        optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum, weight_decay=weight_decay)
    elif optimizer_name == "Adam":
        optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
    elif optimizer_name == "RMSprop":
        optimizer = optim.RMSprop(model.parameters(), lr=lr, weight_decay=weight_decay)
    elif optimizer_name == "LBFGS":
        optimizer = optim.LBFGS(model.parameters(), lr=lr, max_iter=20)
    else:
        raise ValueError("Unknown optimizer")

    n = len(X_train)
    for epoch in range(epochs):
        model.train()
        idx = torch.randperm(n)
        X_train, y_train = X_train[idx], y_train[idx]

        if optimizer_name == "LBFGS":
            # LBFGS requires closure
            def closure():
                optimizer.zero_grad()
                preds = model(X_train)
                loss = criterion(preds, y_train)
                loss.backward()
                return loss
            optimizer.step(closure)
        else:
            for i in range(0, n, batch_size):
                xb = X_train[i:i+batch_size]
                yb = y_train[i:i+batch_size]
                optimizer.zero_grad()
                preds = model(xb)
                loss = criterion(preds, yb)
                loss.backward()
                optimizer.step()

    # Validation accuracy
    model.eval()
    with torch.no_grad():
        preds_val = model(X_val)
    preds_bin = (preds_val > 0.5).float()
    acc = accuracy_score(y_val, preds_bin)
    return acc


# ------------------------------------------------------------
# 5. Compare Optimizers
# ------------------------------------------------------------
optimizers = ["SGD", "SGD+Momentum", "Adam", "RMSprop", "LBFGS"]
results = {}

for opt in optimizers:
    model = LogisticRegressionModel(X_train.shape[1])
    acc = train_model(opt, model, X_train, y_train, X_val, y_val,
                      lr=1e-3 if opt != "LBFGS" else 1.0,
                      weight_decay=1e-4)
    results[opt] = acc
    print(f"{opt:12s} → Validation Accuracy: {acc:.4f}")

# ------------------------------------------------------------
# 6. Select Best Optimizer & Test
# ------------------------------------------------------------
best_opt = max(results, key=results.get)
print("\nBest Optimizer:", best_opt)

# Retrain best model on Train + Val, Evaluate on Test
model_best = LogisticRegressionModel(X_train.shape[1])
X_combined = torch.cat([X_train, X_val], dim=0)
y_combined = torch.cat([y_train, y_val], dim=0)

_ = train_model(best_opt, model_best, X_combined, y_combined, X_test, y_test,
                lr=1e-3 if best_opt != "LBFGS" else 1.0, weight_decay=1e-4)

model_best.eval()
with torch.no_grad():
    preds_test = model_best(X_test)
preds_bin = (preds_test > 0.5).float()
test_acc = accuracy_score(y_test, preds_bin)

print(f"\nFinal Test Accuracy (using {best_opt}): {test_acc:.4f}")

# ------------------------------------------------------------
# 7. Summary
# ------------------------------------------------------------
print("\n=== Optimizer Comparison Summary ===")
for opt, acc in results.items():
    print(f"{opt:12s} → Val Accuracy: {acc:.4f}")
print(f"\nBest Optimizer: {best_opt} | Test Accuracy: {test_acc:.4f}")


Train: 12000, Val: 4000, Test: 4000
SGD          → Validation Accuracy: 0.5002
SGD+Momentum → Validation Accuracy: 0.4968
Adam         → Validation Accuracy: 0.4968
RMSprop      → Validation Accuracy: 0.4955
LBFGS        → Validation Accuracy: 0.4950

Best Optimizer: SGD

Final Test Accuracy (using SGD): 0.5180

=== Optimizer Comparison Summary ===
SGD          → Val Accuracy: 0.5002
SGD+Momentum → Val Accuracy: 0.4968
Adam         → Val Accuracy: 0.4968
RMSprop      → Val Accuracy: 0.4955
LBFGS        → Val Accuracy: 0.4950

Best Optimizer: SGD | Test Accuracy: 0.5180


In [None]:
# ============================================================
# Polynomial Logistic Regression with Optuna tuning
# Optimizes validation accuracy (60/20/20 split)
# ============================================================

import numpy as np
from sklearn.datasets import make_classification
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import optuna

# ------------------------------------------------------------
# 1. Create or load your dataset
# ------------------------------------------------------------
n = 10

X = np.load('Datasets/kryptonite-%s-X.npy'%(n))
y = np.load('Datasets/kryptonite-%s-y.npy'%(n))

# 60/20/20 split
X_train, X_temp, y_train, y_temp = train_test_split(
    X, y, test_size=0.4, stratify=y, random_state=42
)
X_val, X_test, y_val, y_test = train_test_split(
    X_temp, y_temp, test_size=0.5, stratify=y_temp, random_state=42
)

print(f"Train: {len(X_train)}, Val: {len(X_val)}, Test: {len(X_test)}")

# ------------------------------------------------------------
# 2. Define the Optuna objective
# ------------------------------------------------------------
def objective(trial):
    # Hyperparameters to tune
    degree = trial.suggest_int("degree", 1, 4)
    C = trial.suggest_float("C", 1e-4, 1e3, log=True)
    penalty = trial.suggest_categorical("penalty", ["l2", "l1"])
    solver = "saga" if penalty == "l1" else "lbfgs"

    # Build pipeline: Polynomial → Standardize → Logistic Regression
    model = Pipeline([
        ("poly", PolynomialFeatures(degree=degree, include_bias=False)),
        ("scaler", StandardScaler()),
        ("logreg", LogisticRegression(
            penalty=penalty,
            C=C,
            solver=solver,
            max_iter=5000,
            random_state=42))
    ])

    model.fit(X_train, y_train)
    preds_val = model.predict(X_val)
    acc = accuracy_score(y_val, preds_val)
    return acc

# ------------------------------------------------------------
# 3. Run Optuna optimization
# ------------------------------------------------------------
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=40)

best_trial = study.best_trial
print("\nBest Validation Results")
print("------------------------")
print(f"Validation Accuracy: {best_trial.value:.4f}")
for k, v in best_trial.params.items():
    print(f"  {k}: {v}")

# ------------------------------------------------------------
# 4. Retrain with best params on (Train + Val), test on Test Set
# ------------------------------------------------------------
def train_full_and_test(params):
    degree = params["degree"]
    C = params["C"]
    penalty = params["penalty"]
    solver = "saga" if penalty == "l1" else "lbfgs"

    model = Pipeline([
        ("poly", PolynomialFeatures(degree=degree, include_bias=False)),
        ("scaler", StandardScaler()),
        ("logreg", LogisticRegression(
            penalty=penalty,
            C=C,
            solver=solver,
            max_iter=5000,
            random_state=42))
    ])

    # Train on 80% (train + val)
    X_combined = np.vstack([X_train, X_val])
    y_combined = np.concatenate([y_train, y_val])
    model.fit(X_combined, y_combined)

    preds_test = model.predict(X_test)
    test_acc = accuracy_score(y_test, preds_test)
    return test_acc

test_acc = train_full_and_test(best_trial.params)
print("\nFinal Test Accuracy (using best params): {:.4f}".format(test_acc))


  from .autonotebook import tqdm as notebook_tqdm
[I 2025-10-29 15:43:26,642] A new study created in memory with name: no-name-a383fb3c-8a4e-4291-aef8-d7704d232aaf
  C = trial.suggest_loguniform("C", 1e-4, 1e3)


Train: 12000, Val: 4000, Test: 4000


[W 2025-10-29 15:43:50,030] Trial 0 failed with parameters: {'degree': 3, 'C': 2.658848704188497, 'penalty': 'l1'} because of the following error: KeyboardInterrupt().
Traceback (most recent call last):
  File "/opt/homebrew/lib/python3.11/site-packages/optuna/study/_optimize.py", line 201, in _run_trial
    value_or_values = func(trial)
                      ^^^^^^^^^^^
  File "/var/folders/ty/8nqfhpkn0kg6cq1bbjyvmlgr0000gn/T/ipykernel_97654/3157641241.py", line 55, in objective
    model.fit(X_train, y_train)
  File "/opt/homebrew/lib/python3.11/site-packages/sklearn/base.py", line 1365, in wrapper
    return fit_method(estimator, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/homebrew/lib/python3.11/site-packages/sklearn/pipeline.py", line 663, in fit
    self._final_estimator.fit(Xt, y, **last_step_params["fit"])
  File "/opt/homebrew/lib/python3.11/site-packages/sklearn/base.py", line 1365, in wrapper
    return fit_method(estimator, *args, **kwarg

KeyboardInterrupt: 