In [None]:
class MLP(nn.Module):
    def __init__(self, dims):
        super().__init__()
        self.dims = dims
        self.layers = nn.ModuleList()
        for i in range(len(dims) - 1):
            self.layers.append(nn.Linear(dims[i], dims[i + 1]))

        self.activation = nn.ReLU()

    def forward(self, x):
        for i, layer in enumerate(self.layers):
            x = layer(x)
            if i < len(self.layers) - 1:
                x = self.activation(x)
        
        if self.dims[-1] == 1:
            return x.view(-1)
        else:
            return x

In [None]:
import numpy as np
import torch 
import torch.nn as nn
import torch.utils.data as data
import torch.optim as optim
from torch.optim.lr_scheduler import StepLR
import joblib
import pandas as pd
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error
import matplotlib.pyplot as plt

In [None]:
class MyDataset(data.Dataset):
    def __init__(self, x, y):
        super().__init__()
        self.x = x.to_numpy()
        self.y = y.to_numpy()

    def __getitem__(self, index):
        sample = self.x[index]
        target = self.y[index]
        return torch.from_numpy(sample).float(), torch.from_numpy(np.array(target)).float()

    def __len__(self):
        return self.y.shape[0]

In [None]:
df_x = pd.read_csv('/kaggle/input/walmart-dataset/Walmart.csv')
df_y = df_x['Weekly_Sales']
df_x = df_x.drop(columns=['Weekly_Sales'])
#pd.concat([df_x.nunique(axis=0), df_x.isna().sum(axis=0), df_x.dtypes], axis=1).rename(columns={0: 'uniques', 1:'na', 2:'type'})

In [None]:
df_x['Store'] = df_x['Store'].astype(object)
df_x['Holiday_Flag'] = df_x['Holiday_Flag'].astype(object)

df_x['Date'] = pd.to_datetime(df_x['Date'])
df_x['day'] = df_x['Date'].dt.day
df_x['month'] = df_x['Date'].dt.month
df_x['year'] = df_x['Date'].dt.year

df_x = df_x.drop(columns=['Date'])

x_train, x_test, y_train, y_test = train_test_split(df_x, df_y, test_size=0.1, random_state=1)

In [None]:
numeric_features = list(x_train.loc[:, x_train.dtypes != object])
numeric_encoder = StandardScaler()
numeric_encoder.fit(x_train.loc[:, numeric_features])
x_train.loc[:, numeric_features] = numeric_encoder.transform(x_train.loc[:, numeric_features])
x_test.loc[:, numeric_features] = numeric_encoder.transform(x_test.loc[:, numeric_features])

categorical_features = list(x_train.loc[:, x_train.dtypes == object])
cat_encoder = OneHotEncoder()
cat_encoder.fit(x_train.loc[:, categorical_features])

transformed = cat_encoder.transform(x_train.loc[:, categorical_features].to_numpy())
ohe = pd.DataFrame(transformed.toarray(), columns=cat_encoder.get_feature_names_out())
x_train = x_train.reset_index()
x_train = pd.concat([x_train, ohe], axis=1)
x_train = x_train.drop(columns=categorical_features)

transformed = cat_encoder.transform(x_test.loc[:, categorical_features].to_numpy())
ohe = pd.DataFrame(transformed.toarray(), columns=cat_encoder.get_feature_names_out())
x_test = x_test.reset_index()
x_test = pd.concat([x_test, ohe], axis=1)
x_test = x_test.drop(columns=categorical_features)

In [None]:
loss_func = nn.L1Loss()
batch_size = 512
learning_rate = 0.01
num_epochs = 500
dims = [55, 512, 512, 1]
decay_every = 250
decay = 0.1
model_dims = {'dims': dims}
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
model = MLP(dims)
model = model.to(device)
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
scheduler = StepLR(optimizer, decay_every, gamma=decay)
train_dataset = MyDataset(x_train, y_train)
test_dataset = MyDataset(x_test, y_test)
train_loader = data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

In [None]:
for epoch in range(1, num_epochs + 1):
    print('epoch:', epoch)
    model.train()
    for samples, targets in train_loader:
        samples = samples.to(device)
        targets = targets.to(device)
        preds = model(samples)
        loss = loss_func(preds, targets)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step() 
        
    scheduler.step()
    model.eval() 
    with torch.no_grad():
        for mode, loader in zip(['train', 'test'], [train_loader, test_loader]):
            epoch_loss, rel_abs_error, num_samples = 0, 0, 0
            for samples, targets in loader:
                samples = samples.to(device)
                targets = targets.to(device)
                preds = model(samples)
                loss = loss_func(preds, targets)
                epoch_loss += loss.item() * targets.shape[0] 
                rel_abs_error += torch.abs((targets - preds) / targets).sum() 
                num_samples += targets.shape[0]

            epoch_loss = epoch_loss / num_samples
            rel_abs_error = rel_abs_error / num_samples
            print(mode, '- mae:', f'{epoch_loss:.2}', 'mape:', f'{rel_abs_error:.2}')

In [None]:
test_pred = model(torch.from_numpy(x_test.to_numpy()).float().to(device))
test_pred = test_pred.detach().cpu().numpy()
mae = mean_absolute_error(test_pred, y_test)
mape = mean_absolute_percentage_error(test_pred, y_test)
print('MAE:', mae)
print('MAPE:', mape)