In [1]:
import torch
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from torch.utils.data import DataLoader, TensorDataset
from torch import nn
from sklearn.model_selection import train_test_split

Found Intel OpenMP ('libiomp') and LLVM OpenMP ('libomp') loaded at
the same time. Both libraries are known to be incompatible and this
can cause random crashes or deadlocks on Linux when loaded in the
same Python program.
Using threadpoolctl may cause crashes or deadlocks. For more
information and possible workarounds, please see
    https://github.com/joblib/threadpoolctl/blob/master/multiple_openmp.md



In [2]:
train = pd.read_csv("data/train.csv")
# assigning data and target
target = torch.from_numpy(train["FloodProbability"].values)
data = torch.from_numpy(train.drop(["FloodProbability", "id"], axis=1).values)

X, y = data, target
# wrapping the tensors in a dataset
dataset = TensorDataset(X, y)

# n_samples, n_features = X.shape

# # data splitting
# X_train, X_test, y_train, y_test = train_test_split(
#     X, y, test_size=0.2, random_state=1234
# )

In [3]:
X.shape[1]

20

In [4]:
class MLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.layers = nn.Sequential(
            nn.Linear(X.shape[1], 64),
            nn.ReLU(),
            nn.Linear(64, 64),
            nn.ReLU(),
            nn.Linear(64, 64),
            nn.ReLU(),
            nn.Linear(64, 64),
            nn.ReLU(),
            nn.Linear(64, 132),
            nn.ReLU(),
            nn.Linear(132, 132),
            nn.ReLU(),
            nn.Linear(132, 132),
            nn.BatchNorm1d(132),
            nn.ReLU(),
            nn.Linear(132, 64),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 1),
            # nn.Dropout(p=0.5),
        )

    def forward(self, x):
        return self.layers(x)

In [5]:
# trainloader
trainloader = torch.utils.data.DataLoader(
    dataset, batch_size=16, shuffle=True, num_workers=1
)

In [6]:
class R2Loss(nn.Module):
    def __init__(self):
        super(R2Loss, self).__init__()

    def forward(self, y_pred, y_true):
        ss_total = torch.sum((y_true - torch.mean(y_true)) ** 2)
        ss_residual = torch.sum((y_true - y_pred) ** 2)
        r2 = 1 - (ss_residual / ss_total)
        return 1 - r2

In [7]:
# create model and loss func and optimizer
# Initialize the MLP
mlp = MLP()

# Define the loss function and optimizer
loss_function = R2Loss()
# scheduler and optimizer
optimizer = torch.optim.RMSprop(mlp.parameters(), lr=1e-4, weight_decay=1e-5)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.1)

In [8]:
# epochs boi
epochs = 50
for epoch in range(0, epochs):
    # print(f"Starting Epoch: {epoch+1}")

    total_loss = 0.0

    batch_loss = 0.0

    for i, data in enumerate(trainloader, 0):
        inputs, targets = data
        inputs, targets = inputs.float(), targets.float()
        targets = targets.reshape((targets.shape[0], 1))

        optimizer.zero_grad()
        outputs = mlp(inputs)
        loss = loss_function(outputs, targets)
        loss.backward()
        nn.utils.clip_grad_norm_(mlp.parameters(), max_norm=1.0)
        optimizer.step()
        # scheduler.step()
        batch_loss += loss.item()
        total_loss += loss.item()

        if i % 32 == 0:
            # print(f"Loss after mini-batch : { (i+1, total_loss/500) }")
            batch_loss = 0.0

    print(f"Epoch [{epoch+1}/{epochs}]: Train Loss: {total_loss/len(trainloader):.4f}")

# print("YESSIR WE MADE IT OUT DA HOOD")

Epoch [1/50]: Train Loss: 0.2964
Epoch [2/50]: Train Loss: 0.1976
Epoch [3/50]: Train Loss: 0.1892
Epoch [4/50]: Train Loss: 0.1852
Epoch [5/50]: Train Loss: 0.1827


KeyboardInterrupt: 

In [None]:
# model parameters
mlp.state_dict()

In [None]:
# making predictions and saving the test results to csv
test = pd.read_csv("data/test.csv")
test_cleaned = test.drop(["id"], axis=1)
test_cleaned = torch.from_numpy(test_cleaned.values)
test_cleaned = test_cleaned.float()

with torch.inference_mode():
    y_preds = mlp(test_cleaned)
y_preds

df = pd.DataFrame(y_preds.numpy())

df1 = pd.merge(test["id"], df, left_index=True, right_index=True)

df1.rename(columns={"O": "FloodProbability"}, inplace=True)
column_names = df1.columns.tolist()
column_names[1] = "FloodProbability"
df1.columns = column_names

df1

In [None]:
df1.to_csv("submission_new_5.csv", index=False)