In [1]:
# Incremental Tuning with PyTorch Dataset on GPU


## Cell 1: Initial Setup


# Import Libraries
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.metrics import roc_auc_score
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import optuna
import logging

# Setting up the logger
logging.basicConfig(level=logging.INFO, filename='pytorch_initial_tuning.log', filemode='w',
                    format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger()

# Create directories for storing graphs
os.makedirs('graphs_pytorch_incremental', exist_ok=True)

# Check for GPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
logger.info(f'Using device: {device}')

# Load Data
train_path = "train_pytorch_processed.csv"
test_path = "test_pytorch_processed.csv"

logger.info("Loading datasets...")
train_df = pd.read_csv(train_path)
test_df = pd.read_csv(test_path)
logger.info("Datasets loaded successfully.")
logger.info(f"Train dataset shape: {train_df.shape}")
logger.info(f"Test dataset shape: {test_df.shape}")

# Split data into features and target
X = train_df.drop('Response', axis=1).values
y = train_df['Response'].values

# Convert to tensors
X_tensor = torch.tensor(X, dtype=torch.float32).to(device)
y_tensor = torch.tensor(y, dtype=torch.float32).to(device)

# Create dataset and dataloader
dataset = TensorDataset(X_tensor, y_tensor)
train_dataset, val_dataset = train_test_split(dataset, test_size=0.2, random_state=42, stratify=y)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)


In [2]:
# Define the neural network model
class Net(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_dim, output_dim)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        out = self.sigmoid(out)
        return out

# Define the objective function for Optuna
def objective(trial):
    input_dim = X.shape[1]
    hidden_dim = trial.suggest_int('hidden_dim', 16, 128)
    output_dim = 1

    model = Net(input_dim, hidden_dim, output_dim).to(device)

    criterion = nn.BCELoss()
    optimizer_name = trial.suggest_categorical('optimizer', ['Adam', 'RMSprop', 'SGD'])
    lr = trial.suggest_float('lr', 1e-5, 1e-1)
    
    if optimizer_name == 'Adam':
        optimizer = optim.Adam(model.parameters(), lr=lr)
    elif optimizer_name == 'RMSprop':
        optimizer = optim.RMSprop(model.parameters(), lr=lr)
    else:
        optimizer = optim.SGD(model.parameters(), lr=lr)

    for epoch in range(10):  # Train for 10 epochs
        model.train()
        for X_batch, y_batch in train_loader:
            optimizer.zero_grad()
            outputs = model(X_batch)
            loss = criterion(outputs.squeeze(), y_batch)
            loss.backward()
            optimizer.step()
        
        # Validation
        model.eval()
        val_loss = 0
        with torch.no_grad():
            for X_val_batch, y_val_batch in val_loader:
                val_outputs = model(X_val_batch)
                val_loss += criterion(val_outputs.squeeze(), y_val_batch).item()
        
        val_loss /= len(val_loader)
        trial.report(val_loss, epoch)
        
        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()
    
    return val_loss


In [3]:
# Set up Optuna study
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=10)

# Get the best hyperparameters
best_params = study.best_params
logger.info(f"Best parameters from initial search: {best_params}")
logger.info(f"Best validation loss from initial search: {study.best_value}")

# Save results
results_df = study.trials_dataframe()
results_df.to_csv('pytorch_initial_search_results.csv', index=False)

# Train the final model with the best parameters
final_model = Net(input_dim, best_params['hidden_dim'], output_dim).to(device)
criterion = nn.BCELoss()

if best_params['optimizer'] == 'Adam':
    optimizer = optim.Adam(final_model.parameters(), lr=best_params['lr'])
elif best_params['optimizer'] == 'RMSprop':
    optimizer = optim.RMSprop(final_model.parameters(), lr=best_params['lr'])
else:
    optimizer = optim.SGD(final_model.parameters(), lr=best_params['lr'])

# Training the final model
final_model.train()
for epoch in range(10):  # Train for 10 epochs
    for X_batch, y_batch in train_loader:
        optimizer.zero_grad()
        outputs = final_model(X_batch)
        loss = criterion(outputs.squeeze(), y_batch)
        loss.backward()
        optimizer.step()

# Save final model
torch.save(final_model.state_dict(), 'final_pytorch_model_initial.pth')
logger.info('Final model saved to final_pytorch_model_initial.pth')


[I 2024-07-12 13:06:43,001] A new study created in memory with name: no-name-0d968e4b-9f0a-4d98-bba3-9b44978c95e2
[I 2024-07-12 15:04:39,550] Trial 0 finished with value: 0.2934769336757443 and parameters: {'hidden_dim': 77, 'optimizer': 'Adam', 'lr': 0.050234535484116695}. Best is trial 0 with value: 0.2934769336757443.
[I 2024-07-12 15:36:50,227] Trial 1 finished with value: 0.2913493198532937 and parameters: {'hidden_dim': 123, 'optimizer': 'RMSprop', 'lr': 0.02223232908642494}. Best is trial 1 with value: 0.2913493198532937.
[I 2024-07-12 16:08:59,601] Trial 2 finished with value: 0.28680926350104186 and parameters: {'hidden_dim': 57, 'optimizer': 'RMSprop', 'lr': 0.024259345169640407}. Best is trial 2 with value: 0.28680926350104186.
[I 2024-07-12 16:39:04,029] Trial 3 finished with value: 0.2636828551073202 and parameters: {'hidden_dim': 119, 'optimizer': 'SGD', 'lr': 0.012298487011468845}. Best is trial 3 with value: 0.2636828551073202.
[I 2024-07-12 17:14:04,561] Trial 4 finish

NameError: name 'input_dim' is not defined

In [None]:
# Update logger
logging.basicConfig(level=logging.INFO, filename='pytorch_intermediate_tuning.log', filemode='w',
                    format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger()

# Adapt ranges based on the initial search results
def objective(trial):
    hidden_dim = trial.suggest_int('hidden_dim', int(best_params['hidden_dim'] * 0.8), int(best_params['hidden_dim'] * 1.2))
    lr = trial.suggest_float('lr', best_params['lr'] * 0.8, best_params['lr'] * 1.2)

    model = Net(input_dim, hidden_dim, output_dim).to(device)
    criterion = nn.BCELoss()
    
    if best_params['optimizer'] == 'Adam':
        optimizer = optim.Adam(model.parameters(), lr=lr)
    elif best_params['optimizer'] == 'RMSprop':
        optimizer = optim.RMSprop(model.parameters(), lr=lr)
    else:
        optimizer = optim.SGD(model.parameters(), lr=lr)

    for epoch in range(10):  # Train for 10 epochs
        model.train()
        for X_batch, y_batch in train_loader:
            optimizer.zero_grad()
            outputs = model(X_batch)
            loss = criterion(outputs.squeeze(), y_batch)
            loss.backward()
            optimizer.step()
        
        # Validation
        model.eval()
        val_loss = 0
        with torch.no_grad():
            for X_val_batch, y_val_batch in val_loader:
                val_outputs = model(X_val_batch)
                val_loss += criterion(val_outputs.squeeze(), y_val_batch).item()
        
        val_loss /= len(val_loader)
        trial.report(val_loss, epoch)
        
        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()
    
    return val_loss

# Set up Optuna study
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=10)

# Get the best hyperparameters
best_params = study.best_params
logger.info(f"Best parameters from intermediate search: {best_params}")
logger.info(f"Best validation loss from intermediate search: {study.best_value}")

# Save results
results_df = study.trials_dataframe()
results_df.to_csv('pytorch_intermediate_search_results.csv', index=False)

# Train the final model with the best parameters
final_model = Net(input_dim, best_params['hidden_dim'], output_dim).to(device)
criterion = nn.BCELoss()

if best_params['optimizer'] == 'Adam':
    optimizer = optim.Adam(final_model.parameters(), lr=best_params['lr'])
elif best_params['optimizer'] == 'RMSprop':
    optimizer = optim.RMSprop(final_model.parameters(), lr=best_params['lr'])
else:
    optimizer = optim.SGD(final_model.parameters(), lr=best_params['lr'])

# Training the final model
final_model.train()
for epoch in range(10):  # Train for 10 epochs
    for X_batch, y_batch in train_loader:
        optimizer.zero_grad()
        outputs = final_model(X_batch)
        loss = criterion(outputs.squeeze(), y_batch)
        loss.backward()
        optimizer.step()

# Save final model
torch.save(final_model.state_dict(), 'final_pytorch_model_intermediate.pth')
logger.info('Final model saved to final_pytorch_model_intermediate.pth')


In [None]:
# Update logger
logging.basicConfig(level=logging.INFO, filename='pytorch_fine_tuning.log', filemode='w',
                    format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger()

# Adapt ranges based on the intermediate search results
def objective(trial):
    hidden_dim = trial.suggest_int('hidden_dim', int(best_params['hidden_dim'] * 0.9), int(best_params['hidden_dim'] * 1.1))
    lr = trial.suggest_float('lr', best_params['lr'] * 0.9, best_params['lr'] * 1.1)

    model = Net(input_dim, hidden_dim, output_dim).to(device)
    criterion = nn.BCELoss()
    
    if best_params['optimizer'] == 'Adam':
        optimizer = optim.Adam(model.parameters(), lr=lr)
    elif best_params['optimizer'] == 'RMSprop':
        optimizer = optim.RMSprop(model.parameters(), lr=lr)
    else:
        optimizer = optim.SGD(model.parameters(), lr=lr)

    for epoch in range(10):  # Train for 10 epochs
        model.train()
        for X_batch, y_batch in train_loader:
            optimizer.zero_grad()
            outputs = model(X_batch)
            loss = criterion(outputs.squeeze(), y_batch)
            loss.backward()
            optimizer.step()
        
        # Validation
        model.eval()
        val_loss = 0
        with torch.no_grad():
            for X_val_batch, y_val_batch in val_loader:
                val_outputs = model(X_val_batch)
                val_loss += criterion(val_outputs.squeeze(), y_val_batch).item()
        
        val_loss /= len(val_loader)
        trial.report(val_loss, epoch)
        
        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()
    
    return val_loss

# Set up Optuna study
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=10)

# Get the best hyperparameters
best_params = study.best_params
logger.info(f"Best parameters from fine-tuning search: {best_params}")
logger.info(f"Best validation loss from fine-tuning search: {study.best_value}")

# Save results
results_df = study.trials_dataframe()
results_df.to_csv('pytorch_fine_tuning_search_results.csv', index=False)

# Train the final model with the best parameters
final_model = Net(input_dim, best_params['hidden_dim'], output_dim).to(device)
criterion = nn.BCELoss()

if best_params['optimizer'] == 'Adam':
    optimizer = optim.Adam(final_model.parameters(), lr=best_params['lr'])
elif best_params['optimizer'] == 'RMSprop':
    optimizer = optim.RMSprop(final_model.parameters(), lr=best_params['lr'])
else:
    optimizer = optim.SGD(final_model.parameters(), lr=best_params['lr'])

# Training the final model
final_model.train()
for epoch in range(10):  # Train for 10 epochs
    for X_batch, y_batch in train_loader:
        optimizer.zero_grad()
        outputs = final_model(X_batch)
        loss = criterion(outputs.squeeze(), y_batch)
        loss.backward()
        optimizer.step()

# Save final model
torch.save(final_model.state_dict(), 'final_pytorch_model_fine_tuned.pth')
logger.info('Final model saved to final_pytorch_model_fine_tuned.pth')

# Plot feature importances (if applicable)
# Note: Neural networks don't have feature importances like tree-based models,
# so this step can be omitted or replaced with another model evaluation metric.
