In [None]:
import pandas as pd
import numpy as np 
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow import keras
from kerastuner.tuners import RandomSearch

In [None]:
df_train=pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Future sales prediction/sales_train.csv')
df_train.tail()

In [None]:
df_train.shape

In [None]:
df_test=pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Future sales prediction/test.csv')
df_test.head()

In [None]:
df_test.shape

In [None]:
items=pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Future sales prediction/items.csv')
items.head(3)

In [None]:
item_categories=pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Future sales prediction/item_categories.csv')
item_categories.head()

**EDA with training data**

In [None]:
df_train['date']=pd.to_datetime(df_train['date'],dayfirst=True)

In [None]:
df_train.head(2)

In [None]:
# totals items per day of different shops

df_train.groupby(['date','shop_id'])[['item_cnt_day']].sum()

In [None]:
# Merging all the different columns 

df_item=pd.merge(df_train,items,on='item_id')
df_final=pd.merge(df_item,item_categories,on='item_category_id')

In [None]:
df_final=df_final.reset_index()

In [None]:
df_final.head(3)

In [None]:
df_final.isnull().sum()

**Visualizations**

In [None]:
df_visu=df_final.set_index('date')

In [None]:
df_visu.item_cnt_day.resample('W').mean().plot()

In [None]:
df_visu.item_cnt_day.resample('M').mean().plot()

**Converting the data to monthly basis and creating a pivot table**

In [None]:
df_month=df_final['date'].dt.month
df_year=df_final['date'].dt.year
df_final['date']=df_month.astype(str)+'/'+df_year.astype(str)
df_final

In [None]:
df_grouped=df_final.groupby(['date','shop_id','item_id']).sum()


In [None]:
#Creating a pivot table with shopid as index and date as columns with item per day as values

df=df_grouped.pivot_table(index=['shop_id','item_id'],columns='date',values='item_cnt_day')
df=df.fillna(0)

In [None]:
df.reset_index(inplace=True)

In [None]:
df

**Creating X_test for kaggle prediction**

In [None]:
X_test=pd.merge(df,df_test,on=['shop_id','item_id'],how='right')

In [None]:
X_test.shape

In [None]:
X_test.isnull().sum()

In [None]:
X_test=X_test.fillna(0)

In [None]:
X_test

In [None]:
X_test_new=X_test.drop(columns=['ID','1/2013','shop_id','item_id'])

In [None]:
X_test_new.shape

In [None]:
X_test_new

**Train test split**

In [None]:
X=df.drop(columns=['10/2015','shop_id','item_id'])
y=df['10/2015'].values.reshape(-1,1)
X_train,X_val,y_train,y_val=train_test_split(X,y,test_size=0.2,random_state=42)

In [None]:
print(X.shape)
print(y.shape)

In [None]:
scaled=StandardScaler()
X_train_scaled=scaled.fit_transform(X_train)
y_scaled=scaled.fit_transform(y_train)
X_val_scaled=scaled.fit_transform(X_val)
y_val_scaled=scaled.fit_transform(y_val)
X_test_scaled=scaled.fit_transform(X_test_new)

**ANN**

In [None]:
#Plotting training loss and validation loss

def loss_plot(epochs,train_loss,val_loss):
  itr=np.arange(1,epochs+1)
  plt.plot(itr,train_loss,color='red',label='Training loss')
  plt.plot(itr,val_loss,color='green',label='Validation loss')
  plt.legend()
  plt.show()

In [None]:
#Training a ANN model

model=keras.Sequential()
model.add(keras.layers.Dense(units=150,activation='relu',input_dim=33))
model.add(keras.layers.Dense(units=200,activation='relu'))
model.add(keras.layers.Dense(units=200,activation='relu'))
model.add(keras.layers.Dense(units=1,activation='sigmoid'))
model.compile(optimizer='adam',loss='mean_squared_error')
callback=[tf.keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True)]
model.summary()

In [None]:
model_fitting=model.fit(x=X_train_scaled,y=y_scaled,epochs=30,validation_data=(X_val_scaled,y_val_scaled),callbacks=[callback])


In [None]:
#Plot of training loss anad validation loss

train_loss=model_fitting.history['loss']
val_loss=model_fitting.history['val_loss']
loss_plot(len(train_loss),train_loss,val_loss)

**Hyper tuning the model**

In [None]:
def build_model(hp):
  model=keras.Sequential()
  model.add(keras.layers.Dense(units=hp.Int('units',min_value=50,max_value=300,step=50),activation='relu',input_dim=33))
  for i in range(hp.Int('hidden_layers',1,4)):
    model.add(keras.layers.Dense(units=hp.Int('hidden_units',min_value=50,max_value=300,step=50),activation='relu'))
  model.add(keras.layers.Dense(units=1,activation='sigmoid'))
  model.compile(optimizer='adam',loss='mean_squared_error')
  return model

In [None]:
tuner=RandomSearch(
    hypermodel=build_model,
    objective='val_loss',
    max_trials=3,
    executions_per_trial=1,
    directory='new_test',
    project_name='test'
)

In [None]:
tuner.search_space_summary()

In [None]:
tuner.search(x=X_train_scaled,y=y_scaled,epochs=15,validation_data=(X_val_scaled,y_val_scaled))

In [None]:
tuner.results_summary()

**Predictions**

In [None]:
predictions=model.predict(X_test_scaled).round(1)

In [None]:
item=pd.DataFrame(predictions,columns=['item_cnt_month'])

In [None]:
data={
    'item_cnt_month':item['item_cnt_month']
}
df_sub=pd.DataFrame(data=data,index=X_test['ID'])
df_sub

In [None]:
df_sub.to_csv('sample_sub_final.csv')