In [23]:
import torch
import torch.nn as nn                # Baru (untuk Model)
import torch.optim as optim          # Baru (untuk Optimizer)
import numpy as np
import pandas as pd
from torch.utils.data import DataLoader, Dataset # Baru (Ganti TensorDataset)
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [24]:
df = pd.read_csv("covtype.data", header=None)

# Label pada kolom terakhir
y = df.iloc[:, -1]

print("Jumlah data per kelas:")
print(y.value_counts().sort_index())

Jumlah data per kelas:
54
1    211840
2    283301
3     35754
4      2747
5      9493
6     17367
7     20510
Name: count, dtype: int64


In [25]:
df = pd.read_csv('covtype.data', header=None)

#dikurangin kolom terakhir (karena itu label)
X = df.iloc[:, :-1].values
y = df.iloc[:, -1].values

#nilai label dikurang 1
y = y - 1 

#Training set 70%
X_train, X_temp, y_train, y_temp = train_test_split(
    X, y, test_size=0.30, random_state=42, stratify=y
)

#Validation set 15%
#Test set 15%
X_val, X_test, y_val, y_test = train_test_split(
    X_temp, y_temp, test_size=0.50, random_state=42, stratify=y_temp
)

In [26]:
def show_class_distribution(name, y):
    unique, counts = np.unique(y, return_counts=True)
    print(f"\n{name}:")
    total = counts.sum()
    
    for u, c in zip(unique, counts):
        print(f"  Class {u}: {c}")
    
    print(f"Total = {total}")

show_class_distribution("Train", y_train)
show_class_distribution("Validation", y_val)
show_class_distribution("Test", y_test)


Train:
  Class 0: 148288
  Class 1: 198310
  Class 2: 25028
  Class 3: 1923
  Class 4: 6645
  Class 5: 12157
  Class 6: 14357
Total = 406708

Validation:
  Class 0: 31776
  Class 1: 42495
  Class 2: 5363
  Class 3: 412
  Class 4: 1424
  Class 5: 2605
  Class 6: 3077
Total = 87152

Test:
  Class 0: 31776
  Class 1: 42496
  Class 2: 5363
  Class 3: 412
  Class 4: 1424
  Class 5: 2605
  Class 6: 3076
Total = 87152


In [27]:
#Normalisasi data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val   = scaler.transform(X_val)
X_test  = scaler.transform(X_test)

In [28]:
class ForestCoverDataset(Dataset):
    #data mentah --> tensor
    def __init__(self, features, labels):
        self.features = torch.tensor(features, dtype=torch.float32)
        self.labels = torch.tensor(labels, dtype=torch.long)
    
    #jumlah data
    def __len__(self):
        return len(self.labels)
    
    #ambil 1 data
    def __getitem__(self, idx):
        return self.features[idx], self.labels[idx]

#Objek dataset
train_dataset = ForestCoverDataset(X_train, y_train)
val_dataset   = ForestCoverDataset(X_val, y_val)
test_dataset  = ForestCoverDataset(X_test, y_test)

#DataLoader
batch_size = 64
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader   = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader  = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [29]:
class Network(nn.Module):
    def __init__(self):
        super().__init__()
        self.seq = nn.Sequential(
            #layer input (input 54)
            #hidden layer 1 (128 neuron)
            nn.Linear(54, 128),
            nn.ReLU(),

            #hidden layer 2 (64 neuron)
            nn.Linear(128, 64),
            nn.ReLU(),

            #output layer (7 kelas)
            nn.Linear(64, 7)
        )
    
    def forward(self, x):
        return self.seq(x)

model = Network()
criterion = nn.CrossEntropyLoss() #loss function
optimizer = optim.Adam(model.parameters(), lr=0.003) #learning rate 0.003
# Berdasarkan serangkaian eksperimen hyperparameter tuning, Learning Rate (LR) sebesar 0.003 
# ditetapkan sebagai konfigurasi paling optimal untuk model ini.

In [30]:
num_epochs = 15

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    
    # Training Phase
    for inputs, labels in train_loader:
        optimizer.zero_grad()       # Reset gradient
        outputs = model(inputs)     # Forward
        loss = criterion(outputs, labels) # Hitung error
        loss.backward()             # Backward
        optimizer.step()            # Update bobot
        running_loss += loss.item()
        
    # Validation Phase (Cek performa)
    model.eval()
    val_correct = 0
    val_total = 0
    with torch.no_grad():
        for inputs, labels in val_loader:
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            val_total += labels.size(0)
            val_correct += (predicted == labels).sum().item()
            
    print(f"Epoch {epoch+1}/{num_epochs} | Loss: {running_loss/len(train_loader):.4f} | Val Acc: {val_correct/val_total:.4f}")

Epoch 1/15 | Loss: 0.5172 | Val Acc: 0.8181
Epoch 2/15 | Loss: 0.4107 | Val Acc: 0.8394
Epoch 3/15 | Loss: 0.3654 | Val Acc: 0.8501
Epoch 4/15 | Loss: 0.3432 | Val Acc: 0.8584
Epoch 5/15 | Loss: 0.3279 | Val Acc: 0.8672
Epoch 6/15 | Loss: 0.3150 | Val Acc: 0.8670
Epoch 7/15 | Loss: 0.3067 | Val Acc: 0.8718
Epoch 8/15 | Loss: 0.2986 | Val Acc: 0.8795
Epoch 9/15 | Loss: 0.2928 | Val Acc: 0.8779
Epoch 10/15 | Loss: 0.2895 | Val Acc: 0.8788
Epoch 11/15 | Loss: 0.2844 | Val Acc: 0.8804
Epoch 12/15 | Loss: 0.2808 | Val Acc: 0.8821
Epoch 13/15 | Loss: 0.2761 | Val Acc: 0.8833
Epoch 14/15 | Loss: 0.2724 | Val Acc: 0.8874
Epoch 15/15 | Loss: 0.2688 | Val Acc: 0.8843
