In [17]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score
import numpy as np
import pandas as pd

In [18]:
# ===========================
# Data Loading and Preprocessing
# ===========================
# Load dataset
file_path = 'diabetic_data.csv'
data = pd.read_csv(file_path)

# Encode categorical target column 'readmitted'
label_encoder = LabelEncoder()
data['readmitted'] = label_encoder.fit_transform(data['readmitted'])

# Identify categorical features and apply encoding
categorical_columns = data.select_dtypes(include=['object']).columns
for col in categorical_columns:
    data[col] = LabelEncoder().fit_transform(data[col])

# Separate features and target
X = data.drop(columns=['readmitted'])
y = data['readmitted']

# Fill missing values and normalize features
X = X.fillna(0)  # Replace missing values with 0
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert data to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.long)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.long)

# Move tensors to GPU if available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
X_train_tensor = X_train_tensor.to(device)
X_test_tensor = X_test_tensor.to(device)
y_train_tensor = y_train_tensor.to(device)
y_test_tensor = y_test_tensor.to(device)


In [19]:
# ===========================
# Deep RNN Model Definition
# ===========================
class DeepRNNModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes, pooling_type):
        super(DeepRNNModel, self).__init__()
        self.rnn = nn.RNN(input_size, hidden_size, num_layers=num_layers, batch_first=True)  # Define deep RNN
        self.pooling_type = pooling_type  # Pooling type: 'max' or 'avg'
        self.fc = nn.Linear(hidden_size, num_classes)  # Fully connected layer for classification

    def forward(self, x):
        out, _ = self.rnn(x)  # RNN forward pass
        if self.pooling_type == 'max':
            out = torch.max(out, dim=1).values  # Max pooling
        elif self.pooling_type == 'avg':
            out = torch.mean(out, dim=1)  # Average pooling
        out = self.fc(out)  # Final classification layer
        return out

In [20]:
# ===========================
# Hyperparameter Configurations
# ===========================
input_size = X_train_tensor.shape[1]  # Number of input features
num_classes = len(label_encoder.classes_)  # Number of output classes
hidden_sizes = [32, 64, 128]  # List of hidden sizes to test
num_layers_list = [2, 3, 4]  # Number of layers to test
pooling_types = ['max', 'avg']  # Pooling methods to test
epochs_list = [5, 50, 100, 250, 350]  # Number of epochs to test
optimizers = ['SGD', 'RMSprop', 'Adam']  # Optimizers to test

In [21]:
# ===========================
# Training and Evaluation Loop with Summary Logging
# ===========================
best_model = None
best_accuracy = 0
best_params = {}

# Untuk menyimpan hasil akhir dari semua kombinasi hyperparameter
results_summary = []

for hidden_size in hidden_sizes:
    for num_layers in num_layers_list:
        for pooling_type in pooling_types:
            for optimizer_name in optimizers:
                for epochs in epochs_list:
                    print(f"--- Hidden Size: {hidden_size}, Layers: {num_layers}, Pooling: {pooling_type}, Optimizer: {optimizer_name}, Epochs: {epochs} ---")

                    # Initialize model and criterion
                    model = DeepRNNModel(input_size, hidden_size, num_layers, num_classes, pooling_type).to(device)
                    criterion = nn.CrossEntropyLoss()

                    # Select optimizer
                    if optimizer_name == 'SGD':
                        optimizer = optim.SGD(model.parameters(), lr=0.01)
                    elif optimizer_name == 'RMSprop':
                        optimizer = optim.RMSprop(model.parameters(), lr=0.01)
                    elif optimizer_name == 'Adam':
                        optimizer = optim.Adam(model.parameters(), lr=0.01)

                    # Early stopping setup
                    early_stop_patience = 10
                    best_loss = float('inf')
                    patience_counter = 0

                    # Training loop
                    for epoch in range(epochs):
                        model.train()  # Set model to training mode
                        optimizer.zero_grad()  # Clear gradients
                        outputs = model(X_train_tensor.unsqueeze(1))  # Forward pass
                        loss = criterion(outputs, y_train_tensor)  # Compute loss
                        loss.backward()  # Backpropagation
                        optimizer.step()  # Update parameters

                        # Validation step
                        model.eval()  # Set model to evaluation mode
                        with torch.no_grad():
                            val_outputs = model(X_test_tensor.unsqueeze(1))  # Validation forward pass
                            val_loss = criterion(val_outputs, y_test_tensor).item()  # Compute validation loss
                            val_predictions = val_outputs.argmax(dim=1)  # Predicted classes
                            val_accuracy = accuracy_score(y_test_tensor.cpu(), val_predictions.cpu()) * 100  # Accuracy

                        # Log epoch details
                        print(f"Epoch {epoch + 1}/{epochs}, Loss: {loss.item():.4f}, Val Loss: {val_loss:.4f}, Accuracy: {val_accuracy:.2f}%")

                        # Early stopping check
                        if val_loss < best_loss:
                            best_loss = val_loss
                            patience_counter = 0
                        else:
                            patience_counter += 1

                        if patience_counter >= early_stop_patience:
                            print("Early stopping triggered.")
                            break

                    # Evaluate final model
                    model.eval()  # Set model to evaluation mode
                    with torch.no_grad():
                        predictions = model(X_test_tensor.unsqueeze(1)).argmax(dim=1)  # Predict on test set
                        accuracy = accuracy_score(y_test_tensor.cpu(), predictions.cpu())  # Compute accuracy

                    # Log final accuracy for the current hyperparameter combination
                    print(f"Final Accuracy for Hidden Size: {hidden_size}, Layers: {num_layers}, Pooling: {pooling_type}, Optimizer: {optimizer_name}, Epochs: {epochs}: {accuracy:.6f}")
                    results_summary.append(f"{hidden_size},{num_layers},{pooling_type},{optimizer_name},{epochs},{accuracy}")

                    # Save best model and parameters
                    if accuracy > best_accuracy:
                        best_accuracy = accuracy
                        best_model = model
                        best_params = {
                            'hidden_size': hidden_size,
                            'num_layers': num_layers,
                            'pooling_type': pooling_type,
                            'optimizer': optimizer_name,
                            'epochs': epochs
                        }



Epoch 9/100, Loss: 1.1071, Val Loss: 1.1044, Accuracy: 30.92%
Epoch 10/100, Loss: 1.1047, Val Loss: 1.1021, Accuracy: 31.50%
Epoch 11/100, Loss: 1.1023, Val Loss: 1.0997, Accuracy: 32.06%
Epoch 12/100, Loss: 1.1000, Val Loss: 1.0974, Accuracy: 32.54%
Epoch 13/100, Loss: 1.0977, Val Loss: 1.0951, Accuracy: 33.03%
Epoch 14/100, Loss: 1.0954, Val Loss: 1.0929, Accuracy: 33.56%
Epoch 15/100, Loss: 1.0931, Val Loss: 1.0907, Accuracy: 33.99%
Epoch 16/100, Loss: 1.0909, Val Loss: 1.0885, Accuracy: 34.38%
Epoch 17/100, Loss: 1.0888, Val Loss: 1.0864, Accuracy: 34.95%
Epoch 18/100, Loss: 1.0866, Val Loss: 1.0843, Accuracy: 35.40%
Epoch 19/100, Loss: 1.0845, Val Loss: 1.0823, Accuracy: 35.85%
Epoch 20/100, Loss: 1.0824, Val Loss: 1.0802, Accuracy: 36.36%
Epoch 21/100, Loss: 1.0804, Val Loss: 1.0782, Accuracy: 36.90%
Epoch 22/100, Loss: 1.0784, Val Loss: 1.0763, Accuracy: 37.43%
Epoch 23/100, Loss: 1.0764, Val Loss: 1.0743, Accuracy: 37.99%
Epoch 24/100, Loss: 1.0745, Val Loss: 1.0724, Accuracy: 

In [22]:
# ===========================
# Results
# ===========================
print("\nHyperparameter Results Summary:")
for result in results_summary:
    # Parse hasil summary
    hidden_size, num_layers, pooling_type, optimizer_name, epochs, accuracy = result.split(',')
    print(f"Final Accuracy for Hidden Size: {hidden_size}, Layers: {num_layers}, Pooling: {pooling_type}, Optimizer: {optimizer_name}, Epochs: {epochs}: {accuracy}")

print("\nBest Hyperparameters:", best_params)
print("Best Accuracy:", best_accuracy)



Hyperparameter Results Summary:
Final Accuracy for Hidden Size: 32, Layers: 2, Pooling: max, Optimizer: SGD, Epochs: 5: 0.25724673282892796
Final Accuracy for Hidden Size: 32, Layers: 2, Pooling: max, Optimizer: SGD, Epochs: 50: 0.5230912842684484
Final Accuracy for Hidden Size: 32, Layers: 2, Pooling: max, Optimizer: SGD, Epochs: 100: 0.5367004028692149
Final Accuracy for Hidden Size: 32, Layers: 2, Pooling: max, Optimizer: SGD, Epochs: 250: 0.5396482263928466
Final Accuracy for Hidden Size: 32, Layers: 2, Pooling: max, Optimizer: SGD, Epochs: 350: 0.5406799646261177
Final Accuracy for Hidden Size: 32, Layers: 2, Pooling: avg, Optimizer: SGD, Epochs: 5: 0.4713078510366513
Final Accuracy for Hidden Size: 32, Layers: 2, Pooling: avg, Optimizer: SGD, Epochs: 50: 0.5256460646555959
Final Accuracy for Hidden Size: 32, Layers: 2, Pooling: avg, Optimizer: SGD, Epochs: 100: 0.5419573548196914
Final Accuracy for Hidden Size: 32, Layers: 2, Pooling: avg, Optimizer: SGD, Epochs: 250: 0.53905866