In [None]:
# Import packages

import pandas as pd
import numpy as np
import random

#notebook reproducible 
np.random.seed(42)
random.seed(42)

# preprocessing and feature engineering
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler
from statsmodels.tsa.stattools import adfuller, acf, pacf


# modeling
import keras
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout, RepeatVector, TimeDistributed
from sklearn.model_selection import train_test_split

# visualization
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go
from matplotlib import rc
from pandas.plotting import register_matplotlib_converters
from pandas_profiling import ProfileReport
from pylab import rcParams
sns.set(style='whitegrid', palette='muted', font_scale=1.5)
rcParams['figure.figsize'] = 22, 10

In [None]:
df = pd.read_csv('train_processed.csv', parse_dates=['Date'])
df.drop(['Unnamed: 0'], axis=1, inplace=True)
print(f"The dataset contains {len(df)} Sales Data")
pd.set_option('display.max_columns', len(df.columns)) # To view all columns , , index_col='Date'
pd.set_option('display.max_rows', 100)
df.head(3)

In [None]:
df.isna().sum()

In [None]:
df = df.set_index(['Date'])
df.sort_values(by='Date', ascending = True, inplace=True)
df.head()

In [None]:
df2 = df[['Date', 'Sales']]
df2.head()

In [None]:
df2.Date = pd.to_datetime(df2.Date)
df2 = df2[df2['Sales'] > 0.0]

In [None]:
import matplotlib.pyplot as plt
df2.plot(x='Date', figsize=(20, 15))
plt.ylabel("Sales")
plt.title("Sales", fontsize=16)
plt.legend(fontsize=14);
plt.show()

In [None]:
train_size = int(len(df2) * 0.70)
test_size = len(df2) - train_size
train, testt = df2.iloc[0:train_size], df2.iloc[train_size:len(df2)]
print(train.shape, testt.shape)

In [None]:
test_size = int(len(testt) * 0.50)
val_size = len(testt) - test_size
test, val = testt.iloc[0:test_size], testt.iloc[test_size:len(testt)]
print(val.shape, test.shape)

In [None]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
scaler = scaler.fit(train[['Sales']])

train['Sales'] = scaler.transform(train[['Sales']])
test['Sales'] = scaler.transform(test[['Sales']])
val['Sales'] = scaler.transform(val[['Sales']])

In [None]:
TIME_STEPS=30

def create_sequences(X, y, time_steps=TIME_STEPS):
    Xs, ys = [], []
    for i in range(len(X)-time_steps):
        Xs.append(X.iloc[i:(i+time_steps)].values)
        ys.append(y.iloc[i+time_steps])
    
    return np.array(Xs), np.array(ys)

# X_train, y_train = create_sequences(updated_df_drop[['Close_scaled']], updated_df_drop['Close_scaled'])


X_train, y_train = create_sequences(train[['Sales']], train['Sales'])
X_test, y_test = create_sequences(test[['Sales']], test['Sales'])
X_val, y_val = create_sequences(val[['Sales']], val['Sales'])

print(f'Training shape: {X_train.shape}')
print(f'Testing shape: {X_test.shape}')
print(f'Testing shape: {X_val.shape}')

print(f'Testing shape: {y_train.shape}')
print(f'Testing shape: {y_test.shape}')
print(f'Testing shape: {y_val.shape}')

In [None]:
model = Sequential()
model.add(LSTM(128, input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(Dropout(rate=0.2))

model.add(RepeatVector(X_train.shape[1]))
model.add(LSTM(128, return_sequences=True))

model.add(Dropout(rate=0.2))
model.add(TimeDistributed(Dense(X_train.shape[2])))

model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001, epsilon=1e-08, decay=0.01), loss='mae')
model.summary()