In [35]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score
import numpy as np
import pandas as pd

In [36]:

# ===========================
# Data Loading and Preprocessing
# ===========================
# Load dataset
file_path = 'diabetic_data.csv'
data = pd.read_csv(file_path)

# Encode categorical target column 'readmitted'
label_encoder = LabelEncoder()
data['readmitted'] = label_encoder.fit_transform(data['readmitted'])

# Identify categorical features and apply encoding
categorical_columns = data.select_dtypes(include=['object']).columns
for col in categorical_columns:
    data[col] = LabelEncoder().fit_transform(data[col])

# Separate features and target
X = data.drop(columns=['readmitted'])
y = data['readmitted']

# Fill missing values and normalize features
X = X.fillna(0)  # Replace missing values with 0
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert data to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.long)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.long)

# Move tensors to GPU if available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
X_train_tensor = X_train_tensor.to(device)
X_test_tensor = X_test_tensor.to(device)
y_train_tensor = y_train_tensor.to(device)
y_test_tensor = y_test_tensor.to(device)

In [37]:
# ===========================
# RNN Model Definition
# ===========================
class RNNModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes, pooling_type):
        super(RNNModel, self).__init__()
        self.rnn = nn.RNN(input_size, hidden_size, batch_first=True)  # Define RNN layer
        self.pooling_type = pooling_type  # Pooling type: 'max' or 'avg'
        self.fc = nn.Linear(hidden_size, num_classes)  # Fully connected layer for classification

    def forward(self, x):
        out, _ = self.rnn(x)  # RNN forward pass
        if self.pooling_type == 'max':
            out = torch.max(out, dim=1).values  # Max pooling
        elif self.pooling_type == 'avg':
            out = torch.mean(out, dim=1)  # Average pooling
        out = self.fc(out)  # Final classification layer
        return out


In [38]:
# ===========================
# Hyperparameter Configurations
# ===========================
input_size = X_train_tensor.shape[1]  # Number of input features
num_classes = len(label_encoder.classes_)  # Number of output classes
hidden_sizes = [32, 64, 128]  # List of hidden sizes to test
pooling_types = ['max', 'avg']  # Pooling methods to test
epochs_list = [5, 50, 100, 250, 350]  # Number of epochs to test
optimizers = ['SGD',]  # Optimizers to test

In [39]:
# ===========================
# Training and Evaluation Loop with Optimizer Name in Epoch List
# ===========================
best_model = None
best_accuracy = 0
best_params = {}

# Untuk menyimpan hasil akhir dari semua kombinasi hyperparameter
results_summary = []

for hidden_size in hidden_sizes:
    for pooling_type in pooling_types:
        for optimizer_name in optimizers:
            for epochs in epochs_list:
                print(f"--- Hidden Size: {hidden_size}, Pooling: {pooling_type}, Optimizer: {optimizer_name}, Epochs: {epochs} ---")

                # Initialize model and criterion
                model = RNNModel(input_size, hidden_size, num_classes, pooling_type).to(device)
                criterion = nn.CrossEntropyLoss()

                # Select optimizer
                if optimizer_name == 'SGD':
                    optimizer = optim.SGD(model.parameters(), lr=0.01)
                elif optimizer_name == 'RMSprop':
                    optimizer = optim.RMSprop(model.parameters(), lr=0.01)
                elif optimizer_name == 'Adam':
                    optimizer = optim.Adam(model.parameters(), lr=0.01)

                # Early stopping setup
                early_stop_patience = 10
                best_loss = float('inf')
                patience_counter = 0

                # Training loop
                for epoch in range(epochs):
                    model.train()  # Set model to training mode
                    optimizer.zero_grad()  # Clear gradients
                    outputs = model(X_train_tensor.unsqueeze(1))  # Forward pass
                    loss = criterion(outputs, y_train_tensor)  # Compute loss
                    loss.backward()  # Backpropagation
                    optimizer.step()  # Update parameters

                    # Validation step
                    model.eval()  # Set model to evaluation mode
                    with torch.no_grad():
                        val_outputs = model(X_test_tensor.unsqueeze(1))  # Validation forward pass
                        val_loss = criterion(val_outputs, y_test_tensor).item()  # Compute validation loss
                        val_predictions = val_outputs.argmax(dim=1)  # Predicted classes
                        val_accuracy = accuracy_score(y_test_tensor.cpu(), val_predictions.cpu()) * 100  # Accuracy

                    # Log epoch details with optimizer name
                    print(f"Epoch {epoch + 1}/{epochs}, Optimizer: {optimizer_name}, Loss: {loss.item():.4f}, Val Loss: {val_loss:.4f}, Accuracy: {val_accuracy:.2f}%")

                    # Early stopping check
                    if val_loss < best_loss:
                        best_loss = val_loss
                        patience_counter = 0
                    else:
                        patience_counter += 1

                    if patience_counter >= early_stop_patience:
                        print("Early stopping triggered.")
                        break

                # Evaluate final model for this combination
                model.eval()  # Set model to evaluation mode
                with torch.no_grad():
                    predictions = model(X_test_tensor.unsqueeze(1)).argmax(dim=1)  # Predict on test set
                    accuracy = accuracy_score(y_test_tensor.cpu(), predictions.cpu())  # Compute accuracy

                # Log final accuracy for the current hyperparameter combination
                print(f"Final Accuracy for Hidden Size: {hidden_size}, Pooling: {pooling_type}, Optimizer: {optimizer_name}, Epochs: {epochs}: {accuracy:.6f}")
                results_summary.append(f"{hidden_size},{pooling_type},{epochs},{optimizer_name},{accuracy}")

                # Save best model and parameters
                if accuracy > best_accuracy:
                    best_accuracy = accuracy
                    best_model = model
                    best_params = {
                        'hidden_size': hidden_size,
                        'pooling_type': pooling_type,
                        'optimizer': optimizer_name,
                        'epochs': epochs
                    }




--- Hidden Size: 32, Pooling: max, Optimizer: SGD, Epochs: 5 ---
Epoch 1/5, Optimizer: SGD, Loss: 1.1486, Val Loss: 1.1475, Accuracy: 29.23%
Epoch 2/5, Optimizer: SGD, Loss: 1.1462, Val Loss: 1.1452, Accuracy: 29.58%
Epoch 3/5, Optimizer: SGD, Loss: 1.1439, Val Loss: 1.1429, Accuracy: 29.88%
Epoch 4/5, Optimizer: SGD, Loss: 1.1417, Val Loss: 1.1407, Accuracy: 30.12%
Epoch 5/5, Optimizer: SGD, Loss: 1.1394, Val Loss: 1.1385, Accuracy: 30.38%
Final Accuracy for Hidden Size: 32, Pooling: max, Optimizer: SGD, Epochs: 5: 0.303773
--- Hidden Size: 64, Pooling: max, Optimizer: SGD, Epochs: 5 ---
Epoch 1/5, Optimizer: SGD, Loss: 1.1002, Val Loss: 1.0974, Accuracy: 39.62%
Epoch 2/5, Optimizer: SGD, Loss: 1.0984, Val Loss: 1.0956, Accuracy: 40.14%
Epoch 3/5, Optimizer: SGD, Loss: 1.0967, Val Loss: 1.0939, Accuracy: 40.59%
Epoch 4/5, Optimizer: SGD, Loss: 1.0949, Val Loss: 1.0922, Accuracy: 41.02%
Epoch 5/5, Optimizer: SGD, Loss: 1.0932, Val Loss: 1.0905, Accuracy: 41.40%
Final Accuracy for Hidde

In [40]:
# ===========================
# Results
# ===========================
print("\nHyperparameter Results Summary:")
for result in results_summary:
    # Memecah hasil summary menjadi variabel individual
    hidden_size, pooling_type, epochs, optimizer_name, accuracy = result.split(',')
    print(f"Final Accuracy for Hidden Size: {hidden_size}, Pooling: {pooling_type}, Optimizer: {optimizer_name}, Epochs: {epochs}: {accuracy}")
    
print("\nBest Hyperparameters:", best_params)
print("Best Accuracy:", best_accuracy)



Hyperparameter Results Summary:
Final Accuracy for Hidden Size: 32, Pooling: max, Optimizer: SGD, Epochs: 5: 0.3037732141102486
Final Accuracy for Hidden Size: 64, Pooling: max, Optimizer: SGD, Epochs: 5: 0.4139726835020143
Final Accuracy for Hidden Size: 128, Pooling: max, Optimizer: SGD, Epochs: 5: 0.3448953522649111

Best Hyperparameters: {'hidden_size': 64, 'pooling_type': 'max', 'optimizer': 'SGD', 'epochs': 5}
Best Accuracy: 0.4139726835020143
