In [None]:
# Define the architecture of the neural network
# Choose an appropriate activation function for each layer
# Determine the number of nodes in each layer
# Choose an appropriate loss function

In [None]:
# Train the neural network

In [None]:
# Evaluate the performance of the neural network

In [None]:
# Use the trained neural network to make predictions

In [None]:
# import module
from collections import defaultdict
import torch
import torch.nn as nn
import torch.optim as optim
from tqdm.auto import tqdm
import numpy as np
import pandas as pd
from sklearn.model_selection import KFold
# from d2l import torch as d2l
from torch.utils.data import DataLoader,random_split,Dataset, SubsetRandomSampler, TensorDataset
# from tqdm.auto import tqdm
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
# import optuna

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device}")

In [None]:
class Model(nn.Module):
    def __init__(self, input_size=2, output_size=1):
        super().__init__()
        self.model=nn.Sequential(nn.Linear(input_size, 32),
                                nn.ReLU(),
                                nn.BatchNorm1d(32),
                                nn.Linear(32, 64),
                                nn.ReLU(),
                                nn.BatchNorm1d(64),
                                nn.Linear(64, 128),
                                nn.ReLU(),
                                nn.BatchNorm1d(128),
                                nn.Linear(128, 256),
                                nn.ReLU(),
                                nn.BatchNorm1d(256),
                                nn.Linear(256, 128),
                                nn.ReLU(),
                                nn.BatchNorm1d(128),
                                nn.Linear(128, 64),
                                nn.ReLU(),
                                nn.BatchNorm1d(64),
                                nn.Linear(64, 32),
                                nn.ReLU(),
                                nn.BatchNorm1d(32),
                                nn.Linear(32, output_size)
                                )
    def forward(self, x):
        return self.model(x)

In [None]:
# %%time
# Creating Optuna object and defining its parameters
# study = optuna.create_study(direction='minimize')
# study.optimize(objective, n_trials = 30)
# Showing optimization results
# print('Number of finished trials:', len(study.trials))
# print('Best trial parameters:', study.best_trial.params)
# print('Best score:', study.best_value)


# Number of finished trials: 30
# Best trial parameters: {'learning_rate': 0.0027315610999692746, 'optimizer': 'SGD', 'batch_size': 240, 'num_hidden_1': 408, 'num_hidden_2': 31, 'num_hidden_3': 886, 'num_hidden_4': 345, 'num_hidden_5': 464, 'num_hidden_6': 366}
# Best score: 0.022719970179928675

In [None]:
# class Model(nn.Module):
#     def __init__(self, input_size=2, output_size=1):
#         super().__init__()
#         layers = []
#         neurons = [32, 64, 128, 64] * 3 + [32, 16, 8, 4, 2]
#         for neuron in neurons:
#             layers.append(nn.Linear(input_size, neuron))
#             layers.append(nn.Tanh())
#             layers.append(nn.BatchNorm1d(neuron))
#             input_size = neuron
#         layers.append(nn.Linear(input_size, output_size))
#         self.model = nn.Sequential(*layers)

#     def forward(self, x):
#         return self.model(x)

In [None]:
# class Model(nn.Module):
#     def __init__(self, input_size=2, output_size=1):
#         super().__init__()
#         self.input_layer = nn.Linear(input_size, 32)
#         self.hidden_layer1 = nn.Linear(32, 64)
#         self.hidden_layer2 = nn.Linear(64, 128)
#         self.hidden_layer3 = nn.Linear(128, 64)
#         self.hidden_layer4 = nn.Linear(64, 32)
#         self.output_layer = nn.Linear(32, output_size)
#         self.tanh = nn.Tanh()
#         self.bn1 = nn.BatchNorm1d(32)
#         self.bn2 = nn.BatchNorm1d(64)
#         self.bn3 = nn.BatchNorm1d(128)
#         self.bn4 = nn.BatchNorm1d(64)
#         self.bn5 = nn.BatchNorm1d(32)

#     def forward(self, x):
#         x = self.bn1(self.tanh(self.input_layer(x)))
#         x = self.bn2(self.tanh(self.hidden_layer1(x)))
#         x = self.bn3(self.tanh(self.hidden_layer2(x)))
#         x = self.bn4(self.tanh(self.hidden_layer3(x)))
#         x = self.bn5(self.tanh(self.hidden_layer4(x)))
#         x = self.output_layer(x)
#         return x

In [None]:
def train_epoch(model, dataloader, loss_fn, optimizer, device) -> float:
    num_batches = len(dataloader) # batches per epoch
    train_loss = 0.0
    model.train()
    for batch, (x, y) in enumerate(dataloader):
        x, y = x.to(device), y.to(device)
        output = model(x)
        loss = loss_fn(output,y)
        train_loss += loss.item()
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    return train_loss/num_batches

In [None]:
def valid_epoch(model, dataloader, loss_fn, device) -> float:
    num_batches = len(dataloader) # batches per epoch
    valid_loss = 0.0
    model.eval()
    with torch.inference_mode(mode=True):
        for batch, (x, y) in enumerate(dataloader):
            x, y = x.to(device), y.to(device)
            output = model(x)
            loss = loss_fn(output, y)
            valid_loss += loss.item()
    return valid_loss/num_batches

In [None]:
# scheduler: torch.optim.lr_scheduler,
def train(model, train_loader, valid_loader, loss_fn, optimizer, scheduler, epochs, device):
    # Init the results
    result = defaultdict(list)
    # Set the model to the device
    model.to(device)
    # Iterate over the epochs
    for epoch in tqdm(range(1, epochs + 1)):
        # Train the model
        train_loss = train_epoch(model, train_loader, loss_fn, optimizer, device)
        # Validate the model
        valid_loss = valid_epoch(model, valid_loader, loss_fn, device)
        # Record the loss
        result["train_loss"].append(train_loss)
        result["valid_loss"].append(valid_loss)
        # Adjust the learning rate
        if scheduler:
            scheduler.step(valid_loss)
    # Return the results
    return result

In [None]:
def plot(result):
    train_loss = result["train_loss"]
    valid_loss = result["valid_loss"]
    epochs = range(len(result["train_loss"]))
    plt.figure()
    plt.plot(epochs, train_loss, label="train_loss")
    plt.plot(epochs, valid_loss, label="valid_loss")
    plt.title("Loss")
    plt.xlabel("Epochs")
    plt.ylabel("Loss")
    plt.legend()
    plt.show()

In [None]:
K = 10
EPOCHS = 500
BATCH_SIZE = 64
LEARNING_RATE = 0.0001

models = []

In [None]:
df = pd.read_csv('data/train.csv')
x = torch.tensor(df.drop(['id','y'],axis=1).values,dtype=torch.float32).view(-1, 2)
y = torch.tensor(df['y'].values,dtype=torch.float32).view(-1, 1)
train_data = TensorDataset(x, y)

In [None]:
kfold = KFold(n_splits=K)
model = Model()
for fold_i, (train_idx, val_idx) in enumerate(kfold.split(train_data)):
        train_sampler = SubsetRandomSampler(train_idx)
        valid_sampler = SubsetRandomSampler(val_idx)
        train_loader = DataLoader(train_data, batch_size=BATCH_SIZE, sampler=train_sampler)
        valid_loader = DataLoader(train_data, batch_size=BATCH_SIZE, sampler=valid_sampler)

        # model = Model()
        # loss_fn = nn.HuberLoss(reduction="mean")
        loss_fn = nn.MSELoss()
        optimizer = torch.optim.AdamW(model.parameters(), lr=LEARNING_RATE, weight_decay=0.6)
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=100)
        # scheduler = scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=50)
        # scheduler, 
        result = train(model, train_loader, valid_loader, loss_fn, optimizer, scheduler, EPOCHS, device)
        print(f"KFold: {fold_i+1} Train loss: {result['train_loss'][-1]} Valid loss: {result['valid_loss'][-1]}")
        models.append(model.state_dict())
        plot(result)


In [None]:
# data_loader = DataLoader(train_data, batch_size=BATCH_SIZE, shuffle=True)
model.to(device)
loss_fn = nn.MSELoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=LEARNING_RATE, weight_decay=0.6)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=100)
losses = []
model.train()
for epoch in tqdm(range(EPOCHS)):
    x, y = x.to(device), y.to(device)
    output = model(x)
    loss = loss_fn(output,y)
    losses.append(loss.item())
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

In [None]:
test_data = pd.read_csv('data/test.csv')
test_x = torch.tensor(test_data.drop(['id'],axis=1).values,dtype=torch.float32).view(-1, 2).to(device)

model = Model().to(device)
outputs = np.zeros(len(test_x))

for model_i in models:
    # Load model state dict and set to eval mode
    model.load_state_dict(model_i)
    model.eval()
    # Convert tensor to numpy array
    outputs += model(test_x).cpu().detach().numpy().flatten()
# Average predictions
outputs /= len(models)
# Create submission file
submission = pd.DataFrame({"id": range(1, len(outputs) + 1), "y": outputs})
# Save submission file
submission.to_csv("./submission.csv", index=False)

In [None]:
y_pred_path = 'submission.csv'
y_best_path = 'submission_test.csv'

In [None]:
y_pred = pd.read_csv(y_pred_path)
y_best = pd.read_csv(y_best_path)
y_pred = y_pred.iloc[:, -1].values
y_best = y_best.iloc[:, -1].values
print(np.mean((y_pred - y_best) ** 2))