In [None]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt 
plt.rcParams['figure.figsize'] = (15.0, 8.0)
import pickle 
import seaborn as sb 
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout
import warnings
warnings.filterwarnings('always')
warnings.filterwarnings('ignore')

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))



### Loading the datasets

In [None]:
#get the datasets 
common_path = '../input/competitive-data-science-predict-future-sales/'
train = pd.read_csv(common_path+'sales_train.csv')
test = pd.read_csv(common_path+'/test.csv')
submission = pd.read_csv(common_path+'/sample_submission.csv')
items = pd.read_csv(common_path+'/items.csv')
item_cats = pd.read_csv(common_path+'/item_categories.csv')
shops = pd.read_csv(common_path+'/shops.csv')

### EDA and Feature Engineering

In [None]:
#Define a function that shows some info about a dataset 
def get_info(df):
    #show some infos about the dataframe
    print('+++++++++++++++++++++++++++++')
    print(f'HEAD :',end='\n')
    print(df.head())
    print('+++++++++++++++++++++++++++++')
    print(f'INFOS :',end='\n')
    print(df.info())
    print('+++++++++++++++++++++++++++++')
    print(f'STATS :',end='\n')
    print(df.describe().transpose())
    print('+++++++++++++++++++++++++++++')
    print('Number of Null Values :',end=' ')
    print(df.isnull().sum().sum())
    print('+++++++++++++++++++++++++++++')
    print(f'SHAPE :',end=' ')
    print(df.shape)  

In [None]:
#ABOUT TRAIN SET
get_info(train)

In [None]:
#ABOUT TEST SET
get_info(test)

In [None]:
#ABOUT ITEMS 
get_info(items)

In [None]:
#ABOUT ITEMS CATEGORIES 
get_info(item_cats)

In [None]:
#ABOUT SHOPS
get_info(shops)

In [None]:
#get some informations on our datasets 
print ('# of shops: ', train['shop_id'].max())
print ('# of items: ', train['item_id'].max())
print ('# of months: ',train['date_block_num'].max())
print ('# of categories: ', items['item_category_id'].max())


In [None]:
items_per_cat = items.groupby('item_category_id').size()
df_cats=pd.DataFrame(data=items_per_cat,columns=['Number of Items'],index=items_per_cat.index)
df_cats.sort_values(by='item_category_id')

In [None]:
#format the dates 
train['date'] = pd.to_datetime(train['date'], format='%d.%m.%Y')
#pivot the dataframe and index with item_id and shop_id
train = train.pivot_table(index=['item_id', 'shop_id'],
                            values=['item_cnt_day'],
                            columns='date_block_num', fill_value=0)
#reset the index 
train = train.reset_index()
#show 
train.head()

In [None]:
#merge the train set and the test set 
dataset = pd.merge(test, train, on=['item_id', 'shop_id'], how='left')
dataset = dataset.fillna(0)
dataset.head()

In [None]:
dataset.shape

In [None]:
#drop  some columns 
dataset.drop(['shop_id', 'item_id', 'ID'], axis=1,inplace=True)
dataset.head()

### Building LSTM Model for Forecasting the Sales

In [None]:
#splitting the dataset 
X_train = np.expand_dims(dataset.values[:, :-1], axis=2)
y_train = dataset.values[:, -1:]
X_test = np.expand_dims(dataset.values[:, 1:], axis=2)

X_train.shape, y_train.shape, X_test.shape

In [None]:
#buid LSTM Network 
model = Sequential()
model.add(LSTM(units=64, input_shape=(33, 1)))
model.add(Dropout(0.3))
model.add(Dense(1))

model.compile(loss='rmse',
              optimizer='adam',
              metrics=['mean_squared_error'])
model.summary()


In [None]:
history = model.fit(X_train, y_train,
                    batch_size=4096, 
                    epochs=10)

In [None]:
#plot the loss function
plt.rcParams['figure.figsize'] = (15, 8)
plt.plot(history.history['loss'], label= 'loss(mse)')
plt.plot(np.sqrt(history.history['mean_squared_error']), label= 'loss(rmse)')
plt.legend(loc=1)
plt.title('Loss Function')
plt.ylabel('# of Epochs')

In [None]:
# make the forecasts 
predictions = model.predict(X_test)
predictions = predictions.clip(0, 20)
print(predictions[8:20])

### Submission

In [None]:
submission = pd.DataFrame({'ID': test['ID'], 'item_cnt_month': predictions.ravel()})
submission.to_csv('submission.csv',index=False)
print('Submission Made Sucessefully !!')

### Under Construction...