<a href="https://colab.research.google.com/github/makhlufiaero338/tugas-machine-learning/blob/main/week14/RNN_dan_Deep_RNN_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Step 1: Import Libraries
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
import numpy as np
import os

In [2]:
# Step 2: Upload Data from Computer
from google.colab import files

In [4]:
# Upload your dataset
uploaded = files.upload()

Saving data.csv to data.csv


In [5]:
# Assuming data is uploaded as CSV or similar format
import pandas as pd
data = pd.read_csv(list(uploaded.keys())[0])  # Change to your file format if needed

# Preprocess data (adjust based on your dataset)
# For example: Splitting data into input (X) and target (y)
X = data.iloc[:, :-1].values
y = data.iloc[:, -1].values

In [9]:
# Step 2: Upload Data and Preprocess
from google.colab import files
uploaded = files.upload()

import pandas as pd
data = pd.read_csv(list(uploaded.keys())[0])

# Pisahkan kolom numerik dan kategori
categorical_columns = data.select_dtypes(include=['object']).columns
numerical_columns = data.select_dtypes(include=['int64', 'float64']).columns

# Ubah kolom kategori menjadi numerik (One-Hot Encoding)
data = pd.get_dummies(data, columns=categorical_columns, drop_first=True)

# Pisahkan kembali menjadi X dan y
X = data.iloc[:, :-1].values
y = data.iloc[:, -1].values

# Standarisasi kolom numerik
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Lanjutkan ke train-test split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

Saving data.csv to data (1).csv


In [10]:
# Step 3: Create Dataset and Dataloader
class CustomDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.long)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

train_dataset = CustomDataset(X_train, y_train)
test_dataset = CustomDataset(X_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [11]:
# Step 4: Define the RNN Model
class RNNModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers, pooling="avg"):
        super(RNNModel, self).__init__()
        self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True)
        self.pooling = pooling
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        out, _ = self.rnn(x)
        if self.pooling == "max":
            out = torch.max(out, dim=1).values
        elif self.pooling == "avg":
            out = torch.mean(out, dim=1)
        out = self.fc(out)
        return out

In [15]:
# Step 5: Train and Evaluate
def train_model(model, optimizer, scheduler, criterion, epochs, patience, train_loader, test_loader):
    best_acc = 0
    best_model = None
    patience_counter = 0

    for epoch in range(epochs):
        model.train()
        running_loss = 0.0

        for X_batch, y_batch in train_loader:
            optimizer.zero_grad()
            outputs = model(X_batch)
            loss = criterion(outputs, y_batch)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

        scheduler.step()

        # Validation
        model.eval()
        all_preds, all_labels = [], []
        with torch.no_grad():
            for X_batch, y_batch in test_loader:
                outputs = model(X_batch)
                preds = torch.argmax(outputs, axis=1)
                all_preds.extend(preds.cpu().numpy())
                all_labels.extend(y_batch.cpu().numpy())

        acc = accuracy_score(all_labels, all_preds)
        if acc > best_acc:
            best_acc = acc
            best_model = model.state_dict()
            patience_counter = 0
        else:
            patience_counter += 1

        print(f"Epoch {epoch+1}/{epochs}, Loss: {running_loss/len(train_loader)}, Acc: {acc}")

        if patience_counter > patience:
            print("Early stopping triggered.")
            break

    model.load_state_dict(best_model)
    return model, best_acc

In [None]:
# Step 6: Experiment with Hyperparameters
input_size = X_train.shape[1]
output_size = len(np.unique(y))
results = []

hidden_sizes = [32, 64, 128]
pooling_methods = ["avg", "max"]
epochs_list = [5, 50, 100, 250, 350]
optimizers = {"SGD": optim.SGD, "RMSProp": optim.RMSprop, "Adam": optim.Adam}

for hidden_size in hidden_sizes:
    for pooling in pooling_methods:
        for optimizer_name, optimizer_class in optimizers.items():
            print(f"Testing: Hidden Size={hidden_size}, Pooling={pooling}, Optimizer={optimizer_name}")
            model = RNNModel(input_size, hidden_size, output_size, num_layers=1, pooling=pooling)
            optimizer = optimizer_class(model.parameters(), lr=0.01)
            scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)
            criterion = nn.CrossEntropyLoss()

            trained_model, acc = train_model(model, optimizer, scheduler, criterion,
                                             epochs=50, patience=10,
                                             train_loader=train_loader,
                                             test_loader=test_loader)

            results.append({
                "Hidden Size": hidden_size,
                "Pooling": pooling,
                "Optimizer": optimizer_name,
                "Accuracy": acc
            })

In [None]:
# Step 7: Save Results and Generate Report
results_df = pd.DataFrame(results)
results_df.to_csv("results.csv", index=False)

In [None]:
# Step 8: Generate PDF Report
from fpdf import FPDF

pdf = FPDF()
pdf.set_font("Arial", size=12)
pdf.add_page()

pdf.cell(200, 10, txt="Experiment Results", ln=True, align='C')

for i, row in results_df.iterrows():
    pdf.cell(200, 10, txt=str(row.to_dict()), ln=True, align='L')

pdf.output("experiment_report.pdf")

In [None]:
# Download Results
from google.colab import files
files.download("results.csv")
files.download("experiment_report.pdf")