In [94]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error

In [95]:
# 1. Setup perangkat (CPU/GPU)
print("CUDA Available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("GPU Device Name:", torch.cuda.get_device_name(0))

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device digunakan:", device)

CUDA Available: True
GPU Device Name: NVIDIA GeForce RTX 2070 SUPER
Device digunakan: cuda


In [96]:
# 2. Load Dataset
data = pd.read_csv('winequality-white.csv', delimiter=';')  # Gunakan ';' sebagai pemisah
print("Dataset shape:", data.shape)
print("Columns:", data.columns)
print(data.head())

# Validasi dataset
if data.shape[1] < 2:
    raise ValueError("Dataset harus memiliki minimal satu kolom fitur dan satu kolom target!")

# Split fitur dan target
X = data.iloc[:, :-1].values  # Semua kolom kecuali terakhir
y = data.iloc[:, -1].values   # Kolom terakhir

# Handle NaN pada dataset
if np.isnan(X).any() or np.isnan(y).any():
    print("Handling NaN values in the dataset...")
    X = np.nan_to_num(X)
    y = np.nan_to_num(y)

Dataset shape: (4898, 12)
Columns: Index(['fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar',
       'chlorides', 'free sulfur dioxide', 'total sulfur dioxide', 'density',
       'pH', 'sulphates', 'alcohol', 'quality'],
      dtype='object')
   fixed acidity  volatile acidity  citric acid  residual sugar  chlorides  \
0            7.0              0.27         0.36            20.7      0.045   
1            6.3              0.30         0.34             1.6      0.049   
2            8.1              0.28         0.40             6.9      0.050   
3            7.2              0.23         0.32             8.5      0.058   
4            7.2              0.23         0.32             8.5      0.058   

   free sulfur dioxide  total sulfur dioxide  density    pH  sulphates  \
0                 45.0                 170.0   1.0010  3.00       0.45   
1                 14.0                 132.0   0.9940  3.30       0.49   
2                 30.0                  97.0   0

In [97]:
# 3. Preprocessing Data
scaler = StandardScaler()
X = scaler.fit_transform(X)  # Normalisasi fitur
y = (y - y.min()) / (y.max() - y.min())  # Normalisasi target

# Split data menjadi train-test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert ke tensor PyTorch
X_train_tensor = torch.tensor(X_train, dtype=torch.float32).to(device)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).unsqueeze(1).to(device)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32).to(device)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32).unsqueeze(1).to(device)

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

In [98]:
# 4. Model MLP
class MLPRegression(nn.Module):
    def __init__(self, input_dim, hidden_layers, activation_function):
        super(MLPRegression, self).__init__()
        layers = []
        for hidden_neurons in hidden_layers:
            layers.append(nn.Linear(input_dim, hidden_neurons))
            layers.append(activation_function)
            input_dim = hidden_neurons
        layers.append(nn.Linear(input_dim, 1))  # Output layer
        self.model = nn.Sequential(*layers)

    def forward(self, x):
        return self.model(x)

In [99]:
# 5. Fungsi Pelatihan dan Evaluasi
def train_and_evaluate(model, optimizer, criterion, train_loader, test_loader, epochs):
    for epoch in range(epochs):
        model.train()
        for X_batch, y_batch in train_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            optimizer.zero_grad()
            y_pred = model(X_batch)
            loss = criterion(y_pred, y_batch)
            loss.backward()
            optimizer.step()

    model.eval()
    with torch.no_grad():
        y_pred = model(X_test_tensor).squeeze()
        if torch.isnan(y_pred).any():
            raise ValueError("Model predictions contain NaN!")
        mse = mean_squared_error(y_test_tensor.cpu().numpy(), y_pred.cpu().numpy())
    return mse

In [100]:
# 6. Hyperparameter Tuning
hidden_layers_configs = [
    [4], [8], [16], [32], [64],  # 1 hidden layer
    [8, 16], [16, 32], [32, 64],  # 2 hidden layers
    [8, 16, 32], [16, 32, 64]    # 3 hidden layers
]
activation_functions = [nn.Identity(), nn.Sigmoid(), nn.ReLU(), nn.Tanh()]  # Hapus Softmax
epochs_list = [1, 10, 25, 50, 100, 250]
learning_rates = [0.1, 0.01, 0.001, 0.0001]  # Perbaiki learning rate
batch_sizes = [16, 32, 64, 128, 256, 512]

results = []

for hidden_layers in hidden_layers_configs:
    for activation_function in activation_functions:
        for epochs in epochs_list:
            for lr in learning_rates:
                for batch_size in batch_sizes:
                    model = MLPRegression(X_train.shape[1], hidden_layers, activation_function).to(device)
                    optimizer = optim.SGD(model.parameters(), lr=lr)
                    criterion = nn.MSELoss()
                    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
                    test_loader = DataLoader(test_dataset, batch_size=batch_size)

                    try:
                        mse = train_and_evaluate(model, optimizer, criterion, train_loader, test_loader, epochs)
                        results.append((hidden_layers, activation_function.__class__.__name__, epochs, lr, batch_size, mse))
                    except ValueError as e:
                        print(f"Skipping configuration due to error: {e}")
                        continue

# Save Results
results_df = pd.DataFrame(results, columns=["Hidden Layers", "Activation Function", "Epochs", "Learning Rate", "Batch Size", "MSE"])
results_df.to_csv("results.csv", index=False)

print("Hyperparameter tuning selesai. Hasil disimpan di results.csv.")

Hyperparameter tuning selesai. Hasil disimpan di results.csv.
