In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
from torch.optim.lr_scheduler import ReduceLROnPlateau

# Load the data
train_data = pd.read_csv("./data/train.csv")
test_data = pd.read_csv("./data/test.csv")

# One-hot encoding
train_encoded = pd.get_dummies(train_data, columns=["쇼핑몰 구분", "도시 유형", "지역 유형", "쇼핑몰 유형", "선물 유형"])
X = train_encoded.drop(columns=["ID", "수요량"]).values
y = train_encoded["수요량"].values

# Splitting data
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Standard scaling
scaler = StandardScaler()
X_train_standard = scaler.fit_transform(X_train)
X_val_standard = scaler.transform(X_val)

# Convert to PyTorch tensors
X_train_tensor = torch.FloatTensor(X_train_standard)
y_train_tensor = torch.FloatTensor(y_train)
X_val_tensor = torch.FloatTensor(X_val_standard)
y_val_tensor = torch.FloatTensor(y_val)

# Initial Neural Network architecture with optional dropout
class NeuralNet(nn.Module):
    def __init__(self, input_dim):
        super(NeuralNet, self).__init__()
        self.layer1 = nn.Linear(input_dim, 128)
        self.layer2 = nn.Linear(128, 64)
        self.layer3 = nn.Linear(64, 1)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.2)  # Optional dropout

    def forward(self, x):
        x = self.relu(self.layer1(x))
        x = self.dropout(x)  # Optional dropout
        x = self.relu(self.layer2(x))
        x = self.layer3(x)
        return x

model = NeuralNet(X_train_tensor.shape[1])

# Loss, optimizer with weight decay, and learning rate scheduler
criterion = nn.MSELoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=0.001)
scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=10, verbose=True)

# Training with early stopping
num_epochs = 3000
patience = 50
best_val_loss = float('inf')
counter = 0
for epoch in range(num_epochs):
    model.train()
    optimizer.zero_grad()
    outputs = model(X_train_tensor).squeeze()
    loss = criterion(outputs, y_train_tensor)
    loss.backward()
    optimizer.step()

    # Validation loss
    model.eval()
    with torch.no_grad():
        val_outputs = model(X_val_tensor).squeeze()
        val_loss = criterion(val_outputs, y_val_tensor)

    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {loss.item()}, Val Loss: {val_loss.item()}")

    # Early stopping and learning rate reduction on plateau
    scheduler.step(val_loss)
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        counter = 0
        torch.save(model.state_dict(), "best_model.pth")
    else:
        counter += 1
        if counter >= patience:
            print("Early stopping triggered!")
            break

# ...

# Load the best model
model.load_state_dict(torch.load("best_model.pth"))

# RMSE on validation set
model.eval()
with torch.no_grad():
    val_predictions = model(X_val_tensor).squeeze().detach().numpy()
rmse = np.sqrt(mean_squared_error(y_val, val_predictions))
print(f"Validation RMSE: {rmse}")

# Preprocessing test data
test_encoded = pd.get_dummies(test_data, columns=["쇼핑몰 구분", "도시 유형", "지역 유형", "쇼핑몰 유형", "선물 유형"])
X_test = test_encoded.drop(columns=["ID"]).values
X_test_standard = scaler.transform(X_test)
X_test_tensor = torch.FloatTensor(X_test_standard)

# Predict on test data
with torch.no_grad():
    test_predictions = model(X_test_tensor).squeeze().numpy()

# Generate a submission file
submission_dl = pd.DataFrame({'ID': test_data["ID"], '수요량': test_predictions})
submission_dl.to_csv("./data/submission_dl_simple.csv", index=False)




Epoch 1/3000, Loss: 183299.46875, Val Loss: 190231.671875
Epoch 2/3000, Loss: 183261.03125, Val Loss: 190191.203125
Epoch 3/3000, Loss: 183220.46875, Val Loss: 190151.046875
Epoch 4/3000, Loss: 183181.515625, Val Loss: 190110.71875
Epoch 5/3000, Loss: 183142.078125, Val Loss: 190069.671875
Epoch 6/3000, Loss: 183101.625, Val Loss: 190027.484375
Epoch 7/3000, Loss: 183061.90625, Val Loss: 189983.59375
Epoch 8/3000, Loss: 183018.125, Val Loss: 189937.703125
Epoch 9/3000, Loss: 182973.625, Val Loss: 189889.28125
Epoch 10/3000, Loss: 182928.1875, Val Loss: 189838.03125
Epoch 11/3000, Loss: 182879.359375, Val Loss: 189783.578125
Epoch 12/3000, Loss: 182823.34375, Val Loss: 189725.5
Epoch 13/3000, Loss: 182767.15625, Val Loss: 189663.515625
Epoch 14/3000, Loss: 182710.484375, Val Loss: 189597.296875
Epoch 15/3000, Loss: 182644.0, Val Loss: 189526.40625
Epoch 16/3000, Loss: 182575.65625, Val Loss: 189450.421875
Epoch 17/3000, Loss: 182505.0, Val Loss: 189368.875
Epoch 18/3000, Loss: 182421.82