In [112]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler

import torch
import torch.nn as nn
import torch.nn.functional as F

In [113]:
train_df = pd.read_csv('data/train.csv', parse_dates=['date'])
test_df = pd.read_csv('data/test.csv', parse_dates=['date'])

In [114]:
train_df.head()

Unnamed: 0,row_id,date,country,store,product,num_sold
0,0,2015-01-01,Finland,KaggleMart,Kaggle Mug,329
1,1,2015-01-01,Finland,KaggleMart,Kaggle Hat,520
2,2,2015-01-01,Finland,KaggleMart,Kaggle Sticker,146
3,3,2015-01-01,Finland,KaggleRama,Kaggle Mug,572
4,4,2015-01-01,Finland,KaggleRama,Kaggle Hat,911


In [115]:
test_df.head()

Unnamed: 0,row_id,date,country,store,product
0,26298,2019-01-01,Finland,KaggleMart,Kaggle Mug
1,26299,2019-01-01,Finland,KaggleMart,Kaggle Hat
2,26300,2019-01-01,Finland,KaggleMart,Kaggle Sticker
3,26301,2019-01-01,Finland,KaggleRama,Kaggle Mug
4,26302,2019-01-01,Finland,KaggleRama,Kaggle Hat


In [116]:
#extract year from the date
train_df['year'] = train_df['date'].dt.year
test_df['year'] = test_df['date'].dt.year

In [117]:
#drop the row_id and date columns
train_df.drop(['row_id', 'date'], axis=1, inplace=True)
test_df.drop(['row_id', 'date'], axis=1, inplace=True)

In [118]:
#one hot encode the categorical variables
def one_hot_encode(df):
    #check if column is object or not
    for col in df.columns:
        if df[col].dtype == 'object':
            #create a new column for the one-hot encoded variables
            df = pd.get_dummies(df, columns=[col])
    return df

In [119]:
train_df_one_hot = one_hot_encode(train_df)
test_df_one_hot = one_hot_encode(test_df)

In [120]:
X_train = train_df_one_hot.drop(['num_sold'], axis=1)
y_train = train_df_one_hot['num_sold']

X_test = test_df_one_hot.copy()
#make a dummy variable for y_test
y_test = np.zeros(test_df_one_hot.shape[0])

In [121]:
#scale the data
X_scaler = StandardScaler()
X_train = X_scaler.fit_transform(X_train)
y_scaler = StandardScaler()
y_train = y_scaler.fit_transform(y_train.values.reshape(-1, 1))

X_test = X_scaler.transform(X_test)

In [124]:
#pytorch dataset
class DataSet(torch.utils.data.Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y
    def __getitem__(self, index):
        return self.X[index], self.y[index]
    def __len__(self):
        return len(self.X)

In [125]:
#convert to tensor
X_train_tensor = torch.from_numpy(X_train).float()
y_train_tensor = torch.from_numpy(y_train).float()
X_test_tensor = torch.from_numpy(X_test).float()
y_test_tensor = torch.from_numpy(y_test).float()

In [126]:
#create dataset
train_dataset = DataSet(X_train_tensor, y_train_tensor)
test_dataset = DataSet(X_test_tensor, y_test_tensor)
#create dataloader
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=False)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=32, shuffle=False)

In [127]:
for X, y in train_loader:
    print(X.shape, y.shape)
    break

torch.Size([32, 9]) torch.Size([32, 1])


In [128]:
#create model
class LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(LSTM, self).__init__()
        self.hidden_size = hidden_size
        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    def forward(self, x):
        out = self.lstm(x)
        out = self.fc(out[0])
        return out

In [129]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)

Using device: cpu


In [130]:
input_size = X_train_tensor.shape[-1]
hidden_size = 100
output_size = 1
model = LSTM(input_size, hidden_size, output_size).to(device)

In [131]:
#loss and optimizer
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [132]:
#train model
epochs = 100
for epoch in range(epochs):
    for i, (x, y) in enumerate(train_loader):
        x = x.to(device)
        y = y.to(device)
        #forward
        y_pred = model(x)
                
        loss = criterion(y_pred, y)
        #backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        if (i+1) % 100 == 0:
            print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'
                  .format(epoch+1, epochs, i+1, len(train_loader), loss.item()))

Epoch [1/100], Step [100/822], Loss: 0.0400
Epoch [1/100], Step [200/822], Loss: 0.0213
Epoch [1/100], Step [300/822], Loss: 0.1048
Epoch [1/100], Step [400/822], Loss: 0.1397
Epoch [1/100], Step [500/822], Loss: 0.0214
Epoch [1/100], Step [600/822], Loss: 0.0875
Epoch [1/100], Step [700/822], Loss: 0.0602
Epoch [1/100], Step [800/822], Loss: 0.0175
Epoch [2/100], Step [100/822], Loss: 0.0330
Epoch [2/100], Step [200/822], Loss: 0.0197
Epoch [2/100], Step [300/822], Loss: 0.1056
Epoch [2/100], Step [400/822], Loss: 0.1521
Epoch [2/100], Step [500/822], Loss: 0.0301
Epoch [2/100], Step [600/822], Loss: 0.1012
Epoch [2/100], Step [700/822], Loss: 0.0676
Epoch [2/100], Step [800/822], Loss: 0.0306
Epoch [3/100], Step [100/822], Loss: 0.0327
Epoch [3/100], Step [200/822], Loss: 0.0241
Epoch [3/100], Step [300/822], Loss: 0.1102
Epoch [3/100], Step [400/822], Loss: 0.1610
Epoch [3/100], Step [500/822], Loss: 0.0394
Epoch [3/100], Step [600/822], Loss: 0.1126
Epoch [3/100], Step [700/822], L

In [133]:
#make predictions and make a list of the predictions
y_pred = []
for x, _ in test_loader:
    x = x.to(device)
    y_pred.append(model(x).detach().cpu().numpy())
y_pred = np.concatenate(y_pred, axis=0)

In [135]:
y_pred = y_scaler.inverse_transform(y_pred)

In [142]:
df_submission = pd.read_csv('data/sample_submission.csv')

In [143]:
df_submission['num_sold'] = y_pred

In [145]:
df_submission.to_csv('submission.csv', index=False)