In [1]:
# PyTorch Hyperparameter Tuning Notebook

# Import Libraries
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import logging
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset, random_split
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import roc_auc_score
import optuna

# Setting up the logger
logging.basicConfig(level=logging.INFO, filename='pytorch_hyperparameter_tuning.log', filemode='w',
                    format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger()

# Create directories for storing logs and graphs
os.makedirs('graphs_pytorch_tuning', exist_ok=True)

# Check for GPU availability
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
logger.info(f"Using device: {device}")

# Load Data
train_path = "train_pytorch_processed.csv"
test_path = "test_pytorch_processed.csv"

logger.info("Loading datasets...")
train_df = pd.read_csv(train_path)
test_df = pd.read_csv(test_path)
logger.info("Datasets loaded successfully.")
logger.info(f"Train dataset shape: {train_df.shape}")
logger.info(f"Test dataset shape: {test_df.shape}")

# Use a 40% sample of the training data
logger.info("Sampling 40% of the training data...")
train_sample = train_df.sample(frac=0.4, random_state=42)
logger.info(f"Train sample shape: {train_sample.shape}")

# Split data into features and target
X = train_sample.drop('Response', axis=1)
y = train_sample['Response']

# Standardize the features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Convert data to PyTorch tensors
X_tensor = torch.tensor(X, dtype=torch.float32).to(device)
y_tensor = torch.tensor(y.values, dtype=torch.float32).unsqueeze(1).to(device)

# Define the PyTorch dataset
dataset = TensorDataset(X_tensor, y_tensor)

# Split the dataset into training and validation sets
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

# Define the DataLoader
batch_size = 64
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

# Define the model
class SimpleNN(nn.Module):
    def __init__(self, input_dim, hidden_dim1, hidden_dim2, dropout_rate):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim1)
        self.fc2 = nn.Linear(hidden_dim1, hidden_dim2)
        self.fc3 = nn.Linear(hidden_dim2, 1)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(dropout_rate)
        self.sigmoid = nn.Sigmoid()
    
    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.relu(self.fc2(x))
        x = self.dropout(x)
        x = self.sigmoid(self.fc3(x))
        return x

# Define the objective function for Optuna
def objective(trial):
    # Hyperparameters to tune
    hidden_dim1 = trial.suggest_int('hidden_dim1', 64, 256)
    hidden_dim2 = trial.suggest_int('hidden_dim2', 32, 128)
    dropout_rate = trial.suggest_float('dropout_rate', 0.1, 0.5)
    learning_rate = trial.suggest_float('learning_rate', 1e-4, 1e-2)
    
    # Create the model
    model = SimpleNN(X.shape[1], hidden_dim1, hidden_dim2, dropout_rate).to(device)
    
    # Define the loss function and optimizer
    criterion = nn.BCELoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    
    # Training loop
    num_epochs = 10
    model.train()
    for epoch in range(num_epochs):
        epoch_loss = 0
        for X_batch, y_batch in train_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            optimizer.zero_grad()
            outputs = model(X_batch)
            loss = criterion(outputs, y_batch)
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()
    
    # Validation loop
    model.eval()
    y_true = []
    y_pred = []
    with torch.no_grad():
        for X_batch, y_batch in val_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            outputs = model(X_batch)
            y_true.extend(y_batch.cpu().numpy())
            y_pred.extend(outputs.cpu().numpy())
    
    # Calculate ROC AUC score
    roc_auc = roc_auc_score(y_true, y_pred)
    return roc_auc

# Set up Optuna study
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=50)

# Get the best hyperparameters
best_params = study.best_params
logger.info(f"Best parameters: {best_params}")

# Train the final model with the best parameters
hidden_dim1 = best_params['hidden_dim1']
hidden_dim2 = best_params['hidden_dim2']
dropout_rate = best_params['dropout_rate']
learning_rate = best_params['learning_rate']

final_model = SimpleNN(X.shape[1], hidden_dim1, hidden_dim2, dropout_rate).to(device)
criterion = nn.BCELoss()
optimizer = optim.Adam(final_model.parameters(), lr=learning_rate)

# Training loop for the final model
num_epochs = 10
final_model.train()
for epoch in range(num_epochs):
    epoch_loss = 0
    for X_batch, y_batch in train_loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        optimizer.zero_grad()
        outputs = final_model(X_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()
    logger.info(f"Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss/len(train_loader):.4f}")

# Validation loop for the final model
final_model.eval()
y_true = []
y_pred = []
with torch.no_grad():
    for X_batch, y_batch in val_loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        outputs = final_model(X_batch)
        y_true.extend(y_batch.cpu().numpy())
        y_pred.extend(outputs.cpu().numpy())

roc_auc = roc_auc_score(y_true, y_pred)
logger.info(f"Validation ROC AUC Score with best parameters: {roc_auc}")

print(f"Validation ROC AUC Score with best parameters: {roc_auc}")

# Save the final model
torch.save(final_model.state_dict(), 'final_pytorch_model.pth')
logger.info('Final model saved to final_pytorch_model.pth')

# Visualize training loss over epochs
plt.plot(range(num_epochs), epoch_loss)
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training Loss Over Epochs')
plt.savefig('graphs_pytorch_tuning/training_loss.png')
plt.show()
logger.info('Training loss plot saved.')


[I 2024-07-11 00:55:28,944] A new study created in memory with name: no-name-4a57aba3-a16a-468f-9e0c-869247892c37
[I 2024-07-11 01:56:04,597] Trial 0 finished with value: 0.8535028001969027 and parameters: {'hidden_dim1': 197, 'hidden_dim2': 79, 'dropout_rate': 0.311631420927649, 'learning_rate': 0.0038242493070039844}. Best is trial 0 with value: 0.8535028001969027.
[I 2024-07-11 02:56:56,897] Trial 1 finished with value: 0.855951698884446 and parameters: {'hidden_dim1': 123, 'hidden_dim2': 54, 'dropout_rate': 0.3591830556266409, 'learning_rate': 0.001593215219241058}. Best is trial 1 with value: 0.855951698884446.
[I 2024-07-11 03:58:22,534] Trial 2 finished with value: 0.8535594970794276 and parameters: {'hidden_dim1': 151, 'hidden_dim2': 89, 'dropout_rate': 0.1385585250764129, 'learning_rate': 0.0052568783848872735}. Best is trial 1 with value: 0.855951698884446.
[I 2024-07-11 05:00:26,061] Trial 3 finished with value: 0.8374504152727854 and parameters: {'hidden_dim1': 154, 'hidden