In [None]:
import torch
from torch import nn
from torch.nn import functional as F

import pandas as pd

from sklearn_pandas import DataFrameMapper
from sklearn.preprocessing import LabelBinarizer, OneHotEncoder

from pytorch_utils import train_loop, test_loop

In [None]:
torch.manual_seed(42)

import random
random.seed(42)

import numpy as np
np.random.seed(0)


## Data

In [None]:
df_train = pd.read_csv("../input/tabular-playground-series-jan-2022/train.csv", index_col="row_id")
df_test = pd.read_csv("../input/tabular-playground-series-jan-2022/test.csv", index_col="row_id")

### Vectorize

In [None]:
def add_time_cols(df):
    df.date = pd.to_datetime(df.date)
    df["weekday"] = df.date.dt.weekday
    df["month"] = df.date.dt.month

    return df

In [None]:
df_train = add_time_cols(df_train)
df_test = add_time_cols(df_test)

In [None]:
df_train.head()

In [None]:
mapper = DataFrameMapper([
     (['weekday'], OneHotEncoder()),
     (['month'], OneHotEncoder()),
     (['country'], OneHotEncoder()),
     ('store', LabelBinarizer()),
     (['product'], OneHotEncoder())
])

In [None]:
train_data = mapper.fit_transform(df_train)
test_data = mapper.fit_transform(df_test)

In [None]:
train_data[0]

In [None]:
train_data.shape

In [None]:
test_data.shape

In [None]:
train_ds = torch.utils.data.TensorDataset(torch.from_numpy(train_data).float(), torch.from_numpy(df_train["num_sold"].values).unsqueeze(1).float())
num_train = int(.8 * len(train_ds))
num_val = len(train_ds) - num_train
train_ds, val_ds = torch.utils.data.random_split(train_ds, [num_train, num_val])

In [None]:
len(train_ds), len(val_ds)

## Model

In [None]:
class SalesRegressor(nn.Module):
    def __init__(self):
        super(SalesRegressor, self).__init__()
        self.linear1 = nn.Linear(26, 512)
        self.out = nn.Linear(512, 1)
  
    def forward(self, inputs):
        x = F.relu(self.linear1(inputs))
        num_sold = self.out(x)

        return num_sold

In [None]:
model = SalesRegressor()

In [None]:
pred = model(torch.from_numpy(train_data).float()[0])
pred

## Optimization

In [None]:
learning_rate = 1e-3
batch_size = 64
epochs = 50

In [None]:
train_dataloader = torch.utils.data.DataLoader(train_ds, batch_size)
val_dataloader = torch.utils.data.DataLoader(val_ds, batch_size)

In [None]:
X, y = next(iter(train_dataloader))

In [None]:
preds = model(X)

In [None]:
loss = nn.MSELoss()

In [None]:
loss(preds, y)

In [None]:
if torch.cuda.is_available():
    device = 'cuda'
else:
    device = 'cpu'

In [None]:
model = model.to(device)

In [None]:
optimizer = torch.optim.RMSprop(model.parameters(), lr=learning_rate)

In [None]:
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train_loop(train_dataloader, model, loss, optimizer, device)
    test_loop(val_dataloader, model, loss, device)
print("Done!")

## Inference

In [None]:
preds = model(torch.from_numpy(test_data).float())

In [None]:
df_pred = pd.Series(preds.squeeze().detach().numpy(), name="num_sold", index=df_test.index).to_frame()

In [None]:
df_pred.head()

In [None]:
df_pred.to_csv("/kaggle/working/submission.csv")