In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

## Read dataset

In [None]:
df = pd.read_csv("/kaggle/input/competitive-data-science-predict-future-sales/sales_train.csv", index_col='date', parse_dates=['date'])

In [None]:
df.sort_index(inplace=True)

In [None]:
df['sales'] = df.item_price * df.item_cnt_day

In [None]:
df.drop(['date_block_num', 'shop_id', 'item_id', 'item_price', 'item_cnt_day'], axis='columns', inplace=True)

In [None]:
df.head()

In [None]:
df.plot(figsize=(12,6))

In [None]:
from statsmodels.tsa.seasonal import seasonal_decompose

In [None]:
results = seasonal_decompose(x=df['sales'], period=1)
results.plot();

In [None]:
msk = np.random.rand(len(df)) < 0.8
train = df[msk]
test = df[~msk]

In [None]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()

In [None]:
df.head(),df.tail()

In [None]:
scaler.fit(train)
scaled_train = scaler.transform(train)
scaled_test = scaler.transform(test)

In [None]:
scaled_train[:10]

In [None]:
from keras.preprocessing.sequence import TimeseriesGenerator

In [None]:
# define generator
n_input = 3
n_features = 1
generator = TimeseriesGenerator(scaled_train, scaled_train, length=n_input, batch_size=1)

In [None]:
X,y = generator[0]
print(f'Given the Array: \n{X.flatten()}')
print(f'Predict this y: \n {y}')

In [None]:
X.shape

In [None]:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM

# define model

In [None]:
model = Sequential()
model.add(LSTM(100, activation='relu', recurrent_activation='sigmoid', dropout=0.2, input_shape=(n_input, n_features)))
model.add(Dense(1, activation='relu'))
model.compile(optimizer='adam', loss='mse', metrics=['accuracy'])

In [None]:
model.summary()

In [None]:
model.fit(generator,epochs=1)

In [None]:
loss_per_epoch = model.history.history['loss']
plt.plot(range(len(loss_per_epoch)),loss_per_epoch)

In [None]:
last_train_batch = scaled_train[-n_input:]

In [None]:
last_train_batch = last_train_batch.reshape((1, n_input, n_features))

In [None]:
model.predict(last_train_batch)

In [None]:
scaled_test[0]

In [None]:
test_predictions = []

first_eval_batch = scaled_train[-n_input:]
current_batch = first_eval_batch.reshape((1, n_input, n_features))

for i in range(100):
    
    # get the prediction value for the first batch
    current_pred = model.predict(current_batch)[0]
    
    # append the prediction into the array
    test_predictions.append(current_pred) 
    
    # use the prediction to update the batch and remove the first value
    current_batch = np.append(current_batch[:,1:,:],[[current_pred]],axis=1)

In [None]:
test = test[:100]

In [None]:
true_predictions = scaler.inverse_transform(test_predictions)

In [None]:
test['Predictions'] = true_predictions

In [None]:
test.plot(figsize=(14,5))

In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error
from math import sqrt
rmse=sqrt(mean_squared_error(test['sales'],test['Predictions']))
print(rmse)

mae = mean_absolute_error(test['sales'],test['Predictions'])
print(mae)