In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import warnings
import numpy as np
import pandas as pd
import copy
from pathlib import Path
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import tensorflow as tf
from tensorflow.keras.layers import LSTM, Dropout, Dense
from tensorflow.keras import optimizers, Sequential, Model

import matplotlib.pyplot as plt
%matplotlib inline
import warnings
warnings.filterwarnings("ignore")

# Overview
Demand forecasting is the estimation of a probable future demand for a product or service. The term is often used interchangeably with demand planning, yet the latter is a broader process that commences with forecasting but is not limited to it. It is proportional to sales forecasting and can be achieved in the same way.



# Data Loading

In [None]:
train=pd.read_csv('../input/competitive-data-science-predict-future-sales/sales_train.csv')
item_cat=pd.read_csv('../input/competitive-data-science-predict-future-sales/item_categories.csv')
items=pd.read_csv('../input/competitive-data-science-predict-future-sales/items.csv')
ss=pd.read_csv('../input/competitive-data-science-predict-future-sales/sample_submission.csv')
shops=pd.read_csv('../input/competitive-data-science-predict-future-sales/shops.csv')
test=pd.read_csv('../input/competitive-data-science-predict-future-sales/test.csv')

In [None]:
train.head()

In [None]:
items.head()

In [None]:
ss.head()

In [None]:
test.tail()           

# EDA

In [None]:
print(test.ID.nunique())
print(test.shape)

In [None]:
test.info()

In [None]:
print(train.item_id.nunique())
print(train.shop_id.nunique())
train.head()

In [None]:
train.shape

In [None]:
import seaborn as sns
sns.heatmap(train.isnull())

In [None]:
print(test[~test.item_id.isin(train.item_id)].shape)
print(test[~test.shop_id.isin(train.shop_id)].shape)
print(test.ID.nunique())
print(test.shop_id.nunique())
print(test.item_id.nunique())
test.head()

In [None]:
print(items.item_id.nunique())
print(items.item_category_id.nunique())
print(test[~test.item_id.isin(items.item_id)].shape)
items.head()

In [None]:
print(train[~train.item_id.isin(items.item_id)]['item_id'].nunique())
print(test[~test.item_id.isin(train.item_id)]['item_id'].nunique())

In [None]:
train = train[train['shop_id'].isin(test['shop_id'])]
train = train[train['item_id'].isin(test['item_id'].unique())]

In [None]:
train_data=pd.merge(train,items,on='item_id',how='inner')
train_data.drop('item_name',axis=1,inplace=True)
train_data.head()

In [None]:
train_data['item_cnt_month']=train_data.groupby(['shop_id','item_id','date_block_num'])['item_cnt_day'].transform('sum')
train_data['monthly_sales']=train_data.groupby('date_block_num')['item_cnt_day'].transform('sum')
train_data.head()

In [None]:
sns.lineplot(x='date_block_num', y='monthly_sales', data=train_data)

In [None]:
print(train_data['item_cnt_month'].min())
print(train_data['item_cnt_month'].max())
print(train_data['item_cnt_month'].mean())
print(train_data['item_cnt_month'].median())

In [None]:
train_data=train_data[(train_data.item_cnt_month>=0) & (train_data.item_cnt_month<=15)]

In [None]:
print(train_data['item_cnt_month'].min())
print(train_data['item_cnt_month'].max())
print(train_data['item_cnt_month'].mean())
print(train_data['item_cnt_month'].median())

# Sequence Modelling
The model takes a sequence as input and returns a sequence as output, therefore the flat dataframe we have must be converted into sequences.
Our data consists of monthly frequencies. Since monthly data have less number of data points, LSTM trains ineffectively on this set and poses the issue of overfitting. Hence, we forecast on last one year time series only, broken into two steps.

In [None]:
mat= train_data.pivot_table(index=['shop_id', 'item_id'], columns='date_block_num',values='item_cnt_month', fill_value=0).reset_index()
mat.head()

In [None]:
first = 20
last = 33
sub_series = 12
l = []

for index, row in mat.iterrows():
    for i in range((last - (first + sub_series)) + 1):
        x = [row['shop_id'], row['item_id']]
        for j in range(sub_series + 1):
            x.append(row[i + first + j])
        l.append(x)

columns = ['shop_id', 'item_id']
[columns.append(i) for i in range(sub_series)]
columns.append('label')

mat1 = pd.DataFrame(l, columns=columns)
mat1.head()

In [None]:
mat1[(mat1['shop_id']==2) & (mat1['item_id']==31)]

In [None]:
y = mat1['label']
mat1.drop(['label','shop_id','item_id'], axis=1, inplace=True)

In [None]:
X_train, X_valid, y_train, y_valid = train_test_split(mat1, y.values, test_size=0.1, random_state=0)

In [None]:
print(X_train.shape)
print(X_valid.shape)

In [None]:
# from sklearn.preprocessing import StandardScaler
# scale=StandardScaler()
# X_train=scale.fit_transform(X_train)
# X_valid=scale.transform(X_valid)

In [None]:
X_train1 = X_train.values.reshape((X_train.shape[0], X_train.shape[1], 1))
X_valid1 = X_valid.values.reshape((X_valid.shape[0], X_valid.shape[1], 1))

# LSTM
We implement a demand forecasting method based on multi-layer LSTM networks. LSTM has strong ability to capture nonlinear patterns in time series data. It is a type of recurrent neural network, specifically designed to learn long term dependencies, overcoming the problems of vanishing and exploding gradient. The current model works on the Many-In-Many-Out mechanism, that is it predicts multiple forecast outputs using multiple inputs (lag variables).

In [None]:
lstm_model = Sequential()
lstm_model.add(LSTM(X_train1.shape[1], input_shape=(X_train1.shape[1], X_train1.shape[2]), return_sequences=True))
lstm_model.add(LSTM(6, activation='relu', return_sequences=True))
lstm_model.add(LSTM(1, activation='relu'))
lstm_model.add(Dense(10, kernel_initializer='glorot_normal', activation='relu'))
lstm_model.add(Dense(10, kernel_initializer='glorot_normal', activation='relu'))
lstm_model.add(Dense(1))
lstm_model.summary()

adam = optimizers.Adam(0.0001)
lstm_model.compile(loss='mse', optimizer=adam)

In [None]:
lstm_model.fit(X_train1, y_train, validation_data=(X_valid1, y_valid), batch_size=128, epochs=10, verbose=1)

In [None]:
from sklearn.metrics import mean_squared_error,mean_absolute_error,explained_variance_score, r2_score
preds=lstm_model.predict(X_valid1)
print('MAE',mean_absolute_error(y_valid, preds))
print('MSE',mean_squared_error(y_valid, preds))

In [None]:
final = mat.drop_duplicates(subset=['shop_id', 'item_id'])
X_test = pd.merge(test, final, on=['shop_id', 'item_id'], how='left', suffixes=['', '_'])
X_test.fillna(0, inplace=True)
X_test.drop(['ID', 'item_id', 'shop_id'], axis=1, inplace=True)
X_test = X_test[[(i + (34 - sub_series )) for i in range(sub_series )]]
X_test.head()

In [None]:
#X_test=scale.transform(X_test)
X_test1 = X_test.values.reshape((X_test.shape[0], X_test.shape[1], 1))
preds=lstm_model.predict(X_test1)

In [None]:
sub=pd.DataFrame(test['ID'])
sub['item_cnt_month']=preds
sub

In [None]:
sub.to_csv('subm.csv', index=False)