In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load


import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
# -*- coding: utf-8 -*-
"""
Created on July 24 15:31:02

@author: Palagiri Akash Reddy
"""


import numpy as np
import pandas as pd
from scipy.stats import describe

import warnings

warnings.simplefilter('ignore')
import matplotlib.pyplot as plt

import seaborn as sns
sns.set()
df_train = pd.read_csv('../input/demand-forecasting-kernels-only/train.csv')

df_train.head()

df_train['date'] = pd.to_datetime(df_train['date'])
df_train.index = pd.DatetimeIndex(df_train['date'])
df_train.drop('date', axis=1, inplace=True)

df_train.info()



from itertools import product, starmap


def storeitems():
    return product(range(1,51), range(1,11))


def storeitems_column_names():
    return list(starmap(lambda i,s: f'item_{i}_store_{s}_sales', storeitems()))


def sales_by_storeitem(df):
    ret = pd.DataFrame(index=df.index.unique())
    for i, s in storeitems():
        ret[f'item_{i}_store_{s}_sales'] = df[(df['item'] == i) & (df['store'] == s)]['sales'].values
    return ret

df_train = sales_by_storeitem(df_train)

df_train.info()


# load data
df_test = pd.read_csv('../input/demand-forecasting-kernels-only/test.csv')
df_test.head()

# strings to dates
df_test['date'] = pd.to_datetime(df_test['date'])
df_test.index = pd.DatetimeIndex(df_test['date'])
df_test.drop('date', axis=1, inplace=True)
df_test.info()

# mock sales to use same transformations as in df_train
df_test['sales'] = np.zeros(df_test.shape[0])
df_test = sales_by_storeitem(df_test)
df_test.info()

# make sure all column names are the same and in the same order
col_names = list(zip(df_test.columns, df_train.columns))
for cn in col_names:
    assert cn[0] == cn[1]
    
df_test['is_test'] = np.repeat(True, df_test.shape[0])
df_train['is_test'] = np.repeat(False, df_train.shape[0])
df_total = pd.concat([df_train, df_test])
df_total.info()

weekday_df = pd.get_dummies(df_total.index.weekday, prefix='weekday')
weekday_df.index = df_total.index
weekday_df.head()

month_df = pd.get_dummies(df_total.index.month, prefix='month')
month_df.index =  df_total.index
month_df.head()

df_total = pd.concat([weekday_df, month_df, df_total], axis=1)
df_total.info()

## make sure stacked and standard sales columns appear in the same order:
#sales_cols = [col for col in df_total.columns if '_sales' in col and '_sales_' not in col]
##stacked_sales_cols = [col for col in df_total.columns if '_sales_' in col]
#other_cols = [col for col in df_total.columns if col not in set(sales_cols)]
#
#sales_cols = sorted(sales_cols)
#
#
#new_cols = other_cols + sales_cols
#
#
#df_total = df_total.reindex(columns=new_cols)

df_total.head()

df_total.tail()

df_total.describe()

from sklearn.preprocessing import MinMaxScaler
cols_to_scale = [col for col in df_total.columns if 'weekday' not in col and 'month' not in col and 'is_' not in col]

scaler = MinMaxScaler(feature_range=(0,1))
scaled_cols = scaler.fit_transform(df_total[cols_to_scale])
df_total[cols_to_scale] = scaled_cols
df_total.head()

df_train = df_total[df_total['is_test'] == False].drop('is_test', axis=1)
df_test = df_total[df_total['is_test'] == True].drop('is_test', axis=1)

df_train.info()

df_test.info()

#df_train_changed = df_train
#cols_to_sort = storeitems_column_names()
#
#df_train_changed =  df_train_changed.reset_index()
#df_train_changed = df_train_changed.drop(['date'], axis =1)
#temp = df_train_changed[cols_to_sort]
#df_train_changed = df_train_changed.drop(cols_to_sort, axis =1)
#df_train_changed = pd.concat([df_train_changed,temp],axis =1)
#
##changing the original
#temp = df_total[cols_to_sort]
#df_total = df_total.drop(cols_to_sort, axis =1)
#df_total = pd.concat([df_total,temp],axis =1)



def to_sequences(dataset, seq_size=1):
    x = []
    y = []

    for i in range(len(dataset)-seq_size):
        #print(i)
        window = dataset.iloc[i:(i+seq_size), :]
        x.append(window)
        y.append(dataset.iloc[i+seq_size, 19:])
        
    return np.array(x),np.array(y)

seq_size = 3
X, y = to_sequences(df_train, seq_size)

print("Shape of training set: {}".format(X.shape))
inputshape = (X.shape[1], X.shape[2])
print(inputshape)

from keras.callbacks import EarlyStopping
from keras.models import Sequential
from keras.layers import LSTM, Flatten,Dropout,Dense


model = Sequential()
model.add(LSTM(128, activation='relu',input_shape=inputshape, return_sequences=True))
model.add(Dropout(0.1))
model.add(LSTM(64, activation='tanh', return_sequences=True))
model.add(Dropout(0.1))
model.add(LSTM(32, activation='tanh', return_sequences=False))
model.add(Dense(500))


model.compile(optimizer='adam', loss = 'mse', metrics = ['accuracy'])
model.summary()




model.fit(X, y, verbose=1, epochs = 20,batch_size = 1,shuffle = False)


temp = df_train.iloc[-seq_size:,:]
temp = pd.concat([temp,df_test],axis =0)

for i in range(0,90):
    temp_list = []
    temp_var = temp.iloc[i:(i+seq_size),:]
    temp_list.append(temp_var)
    temp_array = np.array(temp_list)
    temp_array = temp_array.reshape(1,seq_size,519)
    prediction = model.predict(temp_array)
    temp.iloc[(i+seq_size):(i+seq_size+1),19:519] = prediction.flatten().tolist()

temp = temp.iloc[seq_size:,:]
temp[cols_to_scale] = scaler.inverse_transform(temp[cols_to_scale])


result = np.zeros(0,dtype=np.int)

for i, s in storeitems():
    col_name = f'item_{i}_store_{s}_sales'
    result = np.concatenate((result,temp[col_name].values))
result = result.round()
result = pd.DataFrame(result, columns=['sales'])
result.index.name = 'id'
result.head()
result.to_csv('submission.csv')