In [8]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from torch.optim.lr_scheduler import ReduceLROnPlateau
from sklearn.metrics import mean_squared_error


In [2]:
# Load the data
train_data = pd.read_csv("./data/train.csv")
test_data = pd.read_csv("./data/test.csv")

In [3]:
# One-hot encoding
train_encoded = pd.get_dummies(train_data, columns=["쇼핑몰 구분", "도시 유형", "지역 유형", "쇼핑몰 유형", "선물 유형"])
X = train_encoded.drop(columns=["ID", "수요량"]).values
y = train_encoded["수요량"].values

In [4]:
# Neural Network architecture
class NeuralNet(nn.Module):
    def __init__(self, input_dim):
        super(NeuralNet, self).__init__()
        self.layer1 = nn.Linear(input_dim, 128)
        self.layer2 = nn.Linear(128, 64)
        self.layer3 = nn.Linear(64, 1)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.2)  # Optional dropout

    def forward(self, x):
        x = self.relu(self.layer1(x))
        x = self.dropout(x)  # Optional dropout
        x = self.relu(self.layer2(x))
        x = self.layer3(x)
        return x

In [13]:
# Initialize K-Fold cross-validation
kf = KFold(n_splits=10, shuffle=True, random_state=42)
models = []
test_predictions_all = []
rmse_values = []

In [15]:
for fold, (train_idx, val_idx) in enumerate(kf.split(X)):
    print(f"Fold {fold + 1}")
    
    X_train_fold, X_val_fold = X[train_idx], X[val_idx]
    y_train_fold, y_val_fold = y[train_idx], y[val_idx]

    # Standard scaling
    scaler = StandardScaler()
    X_train_fold = scaler.fit_transform(X_train_fold)
    X_val_fold = scaler.transform(X_val_fold)
    
    # Convert to PyTorch tensors
    X_train_tensor = torch.FloatTensor(X_train_fold)
    y_train_tensor = torch.FloatTensor(y_train_fold)
    X_val_tensor = torch.FloatTensor(X_val_fold)
    y_val_tensor = torch.FloatTensor(y_val_fold)
    
    # Model, Loss, optimizer, and scheduler
    model = NeuralNet(X_train_tensor.shape[1])
    criterion = nn.MSELoss()
    optimizer = torch.optim.AdamW(model.parameters(), lr=0.001)
    scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=10, verbose=True)
    
    # Training with early stopping
    num_epochs = 3000
    patience = 100
    best_val_loss = float('inf')
    counter = 0
    
    for epoch in range(num_epochs):
        model.train()
        optimizer.zero_grad()
        outputs = model(X_train_tensor).squeeze()
        loss = criterion(outputs, y_train_tensor)
        loss.backward()
        optimizer.step()

        # Validation loss
        model.eval()
        with torch.no_grad():
            val_outputs = model(X_val_tensor).squeeze()
            val_loss = criterion(val_outputs, y_val_tensor)
        
        # Print loss for every epoch (optional)
        # print(f"Epoch {epoch+1}/{num_epochs}, Loss: {loss.item()}, Val Loss: {val_loss.item()}")
        
        # Early stopping and learning rate reduction on plateau
        scheduler.step(val_loss)
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            counter = 0
        else:
            counter += 1
            if counter >= patience:
                print(f"Early stopping in fold {fold + 1}!")
                break
    
    # Save the model for this fold
    models.append(model)

    # Calculate RMSE for this fold
    val_outputs_np = val_outputs.detach().numpy()
    fold_rmse = np.sqrt(mean_squared_error(y_val_fold, val_outputs_np))
    rmse_values.append(fold_rmse)
    print(f"Fold {fold + 1} RMSE: {fold_rmse}")
    
    # Preprocess test data for this fold
    test_encoded = pd.get_dummies(test_data, columns=["쇼핑몰 구분", "도시 유형", "지역 유형", "쇼핑몰 유형", "선물 유형"])
    X_test = test_encoded.drop(columns=["ID"]).values
    X_test_fold = scaler.transform(X_test)
    X_test_tensor = torch.FloatTensor(X_test_fold)

    # Predict on test data for this fold
    model.eval()
    with torch.no_grad():
        test_predictions = model(X_test_tensor).squeeze().numpy()
    test_predictions_all.append(test_predictions)

# You can then average out the predictions from all folds for the test set if needed.
final_predictions = np.mean(test_predictions_all, axis=0)

# Printing average RMSE across folds
print(f"Average RMSE over all folds: {np.mean(rmse_values)}")

Fold 1
Epoch 01312: reducing learning rate of group 0 to 5.0000e-04.
Epoch 01323: reducing learning rate of group 0 to 2.5000e-04.
Epoch 01334: reducing learning rate of group 0 to 1.2500e-04.
Epoch 01345: reducing learning rate of group 0 to 6.2500e-05.
Epoch 01356: reducing learning rate of group 0 to 3.1250e-05.
Epoch 01367: reducing learning rate of group 0 to 1.5625e-05.
Epoch 01378: reducing learning rate of group 0 to 7.8125e-06.
Epoch 01389: reducing learning rate of group 0 to 3.9063e-06.
Epoch 01400: reducing learning rate of group 0 to 1.9531e-06.
Early stopping in fold 1!
Fold 1 RMSE: 172.65132423696747
Fold 2
Epoch 01085: reducing learning rate of group 0 to 5.0000e-04.
Epoch 01182: reducing learning rate of group 0 to 2.5000e-04.
Epoch 01212: reducing learning rate of group 0 to 1.2500e-04.
Epoch 01272: reducing learning rate of group 0 to 6.2500e-05.
Epoch 01283: reducing learning rate of group 0 to 3.1250e-05.
Epoch 01294: reducing learning rate of group 0 to 1.5625e-05

In [16]:
# Averaging test predictions from all folds
final_predictions = np.mean(np.array(test_predictions_all), axis=0)

# Generate a submission file
submission_dl = pd.DataFrame({'ID': test_data["ID"], '수요량': final_predictions})
submission_dl.to_csv("./data/submission_dl_kfold.csv", index=False)
