In [306]:
# Define the architecture of the neural network
# Choose an appropriate activation function for each layer
# Determine the number of nodes in each layer
# Choose an appropriate loss function

In [307]:
# Train the neural network

In [308]:
# Evaluate the performance of the neural network

In [309]:
# Use the trained neural network to make predictions

In [310]:
# import module
from collections import defaultdict
import torch
import torch.nn as nn
import torch.optim as optim
from tqdm.auto import tqdm
import numpy as np
import pandas as pd
from sklearn.model_selection import KFold
from d2l import torch as d2l
from torch.utils.data import DataLoader,random_split,Dataset, SubsetRandomSampler, TensorDataset
from tqdm.auto import tqdm
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler

In [311]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device}")

Using cuda


In [316]:
def train_epoch(model, dataloader, loss_fn, optimizer, device) -> float:
    num_batches = len(dataloader) # batches per epoch
    train_loss = 0.0
    model.train()
    for batch, (x, y) in enumerate(dataloader):
        x, y = x.to(device), y.to(device)
        output = model(x)
        loss = loss_fn(output,y)
        train_loss += loss.item()
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    return train_loss/num_batches

In [317]:
def valid_epoch(model, dataloader, loss_fn, device) -> float:
    num_batches = len(dataloader) # batches per epoch
    valid_loss = 0.0
    model.eval()
    with torch.inference_mode(mode=True):
        for batch, (x, y) in enumerate(dataloader):
            x, y = x.to(device), y.to(device)
            output = model(x)
            loss = loss_fn(output, y)
            valid_loss += loss.item()
    return valid_loss/num_batches

In [318]:
# scheduler: torch.optim.lr_scheduler,
def train(model, train_loader, valid_loader, loss_fn, optimizer, epochs, device):
    # Init the results
    result = defaultdict(list)
    # Set the model to the device
    model.to(device)
    # Iterate over the epochs
    for epoch in tqdm(range(1, epochs + 1)):
        # Train the model
        train_loss = train_epoch(model, train_loader, loss_fn, optimizer, device)
        # Validate the model
        valid_loss = valid_epoch(model, valid_loader, loss_fn, device)
        # Record the loss
        result["train_loss"].append(train_loss)
        result["valid_loss"].append(valid_loss)
        # Adjust the learning rate
        # if scheduler:
        #     scheduler.step(valid_loss)
    # Return the results
    return result

In [319]:
def plot(result):
    train_loss = result["train_loss"]
    valid_loss = result["valid_loss"]
    epochs = range(len(result["train_loss"]))
    plt.figure()
    plt.plot(epochs, train_loss, label="train_loss")
    plt.plot(epochs, valid_loss, label="valid_loss")
    plt.title("Loss")
    plt.xlabel("Epochs")
    plt.ylabel("Loss")
    plt.legend()
    plt.show()

In [320]:
K = 5
EPOCHS = 500
BATCH_SIZE = 128
LEARNING_RATE = 0.0001

models = []

In [321]:
df = pd.read_csv('data/train.csv')
x = torch.tensor(df.drop(['id','y'],axis=1).values,dtype=torch.float32)
y = torch.tensor(df['y'].values,dtype=torch.float32)
train_data = TensorDataset(x, y)

In [322]:
kfold = KFold(n_splits=K)

for fold_i, (train_idx, val_idx) in enumerate(kfold.split(train_data)):
        train_sampler = SubsetRandomSampler(train_idx)
        valid_sampler = SubsetRandomSampler(val_idx)
        train_loader = DataLoader(train_data, batch_size=BATCH_SIZE, sampler=train_sampler)
        valid_loader = DataLoader(train_data, batch_size=BATCH_SIZE, sampler=valid_sampler)

        model = Model()
        loss_func = nn.HuberLoss(reduction="mean")
        optimizer = torch.optim.AdamW(model.parameters(), lr=LEARNING_RATE, weight_decay=0.5)
        # scheduler = scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=50)
        # scheduler, 
        result = train(model, train_loader, valid_loader, loss_fn, optimizer, EPOCHS, device)
        print(f"KFold: {fold_i+1} Train loss: {result['train_loss'][-1]} Valid loss: {result['valid_loss'][-1]}")
        models.append(model.state_dict())
        # plot(result)


  0%|          | 0/500 [00:00<?, ?it/s]

KFold: 1 Train loss: 0.022605466321110726 Valid loss: 0.024375080059354123


  0%|          | 0/500 [00:00<?, ?it/s]

KFold: 2 Train loss: 0.02375664930790663 Valid loss: 0.024516614583822396


  0%|          | 0/500 [00:00<?, ?it/s]

KFold: 3 Train loss: 0.023327333815395833 Valid loss: 0.023619546483342465


  0%|          | 0/500 [00:00<?, ?it/s]

KFold: 4 Train loss: 0.022596201356500388 Valid loss: 0.023605415167716835


  0%|          | 0/500 [00:00<?, ?it/s]

KFold: 5 Train loss: 0.02421234279870987 Valid loss: 0.02374288898247939


In [333]:
test_data = pd.read_csv('data/test.csv')
test_x = torch.tensor(test_data.drop(['id'],axis=1).values,dtype=torch.float32).reshape(-1, 2).to(device)

model = Model().to(device)
outputs = np.zeros(len(test_x))

for model_i in models:
    # Load model state dict and set to eval mode
    model.load_state_dict(model_i)
    model.eval()
    # Convert tensor to numpy array
    outputs += model(test_x).cpu().detach().numpy().reshape(-1)
# Average predictions
outputs /= len(models)
# Create submission file
submission = pd.DataFrame({"id": range(1, len(outputs) + 1), "y": outputs})
# Save submission file
submission.to_csv("./submission.csv", index=False)