## Import Libraries..

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, cross_val_score

In [2]:
import os
for dirname, _, filenames in os.walk('future-sales/'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

future-sales/items.csv
future-sales/item_categories.csv
future-sales/sales_train.csv
future-sales/sample_submission.csv
future-sales/shops.csv
future-sales/test.csv


## Read Data..

In [3]:
item_categories = pd.read_csv('future-sales/item_categories.csv')
sale_item = pd.read_csv('future-sales/items.csv')
shop_name = pd.read_csv('future-sales/shops.csv')
train_data = pd.read_csv('future-sales/sales_train.csv')
test_data = pd.read_csv('future-sales/test.csv')

In [4]:
train_data.head()

Unnamed: 0,date,date_block_num,shop_id,item_id,item_price,item_cnt_day
0,02.01.2013,0,59,22154,999.0,1.0
1,03.01.2013,0,25,2552,899.0,1.0
2,05.01.2013,0,25,2552,899.0,-1.0
3,06.01.2013,0,25,2554,1709.05,1.0
4,15.01.2013,0,25,2555,1099.0,1.0


In [5]:
test_data.head()

Unnamed: 0,ID,shop_id,item_id
0,0,5,5037
1,1,5,5320
2,2,5,5233
3,3,5,5232
4,4,5,5268


## Data Exploration..

In [6]:
#Copy the train_data
train_Data = train_data.copy()

#Check NaN
train_Data.isna().sum()

date              0
date_block_num    0
shop_id           0
item_id           0
item_price        0
item_cnt_day      0
dtype: int64

In [7]:
def month_column(col):
    temp = col.split('.')[1]
    return temp
train_Data['Month'] = train_Data['date'].apply(month_column)

def year_column(col):
    temp = col.split('.')[2]
    return temp
train_Data['Year'] = train_Data['date'].apply(year_column)

train_Data['Sales'] = train_Data['item_price'] * train_Data['item_cnt_day']

item_categories = []
for i in train_Data['item_id']:
    item_categories.append(sale_item['item_category_id'].iloc[i])

train_Data['item_categories'] = item_categories

# Add the item_id_categories columns
train_Data['item_id_categories'] = train_Data['item_id'].apply(str) + ',' +train_Data['item_categories'].apply(str)

In [8]:
train_Data.head()

Unnamed: 0,date,date_block_num,shop_id,item_id,item_price,item_cnt_day,Month,Year,Sales,item_categories,item_id_categories
0,02.01.2013,0,59,22154,999.0,1.0,1,2013,999.0,37,2215437
1,03.01.2013,0,25,2552,899.0,1.0,1,2013,899.0,58,255258
2,05.01.2013,0,25,2552,899.0,-1.0,1,2013,-899.0,58,255258
3,06.01.2013,0,25,2554,1709.05,1.0,1,2013,1709.05,58,255458
4,15.01.2013,0,25,2555,1099.0,1.0,1,2013,1099.0,56,255556


# Deep Learning

## Apply train test split..

In [9]:
train_Data = train_Data[train_Data['Month'] == '11']
training_data = train_Data.drop(columns = ['date', 'date_block_num', 'item_price', 'Month', 'Year','Sales', 
                                           'item_id_categories', 'item_cnt_day'])
training_target = train_Data['item_cnt_day']

In [10]:
training_data = np.array(training_data)
training_target = np.array(training_target)

In [11]:
training_data.shape

(183164, 3)

In [12]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import plot_model

In [13]:
model = Sequential()
model.add(Dense(4, activation = 'sigmoid', input_dim = training_data.shape[1]))
model.add(Dense(2, activation = 'relu'))
model.add(Dense(1, activation = 'sigmoid'))
model.compile(optimizer= 'adam', 
              loss = 'mse', 
              metrics = ['mse', 'mae'])
history = model.fit(training_data, training_target, epochs = 5, batch_size = 256, validation_split = 0.2)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


## Test Data..

In [14]:
test_data.head()

Unnamed: 0,ID,shop_id,item_id
0,0,5,5037
1,1,5,5320
2,2,5,5233
3,3,5,5232
4,4,5,5268


In [15]:
test_Data = test_data.copy()
#Add the item_categories columns
item_categor = []
for i in test_Data['item_id']:
    item_categor.append(sale_item['item_category_id'].iloc[i])

test_Data['item_categories'] = item_categor

In [16]:
test_Data = test_Data.drop(columns = 'ID')
test_Data = np.array(test_Data)
test_data['item_cnt_month'] = model.predict(test_Data)

In [17]:
submission = pd.read_csv('future-sales/sample_submission.csv')
submission['item_cnt_month'] = model.predict(test_Data)
submission.to_csv('submission.csv', index = False)

In [18]:
submission

Unnamed: 0,ID,item_cnt_month
0,0,0.885349
1,1,0.885349
2,2,0.885349
3,3,0.885349
4,4,0.885349
...,...,...
214195,214195,0.885349
214196,214196,0.885349
214197,214197,0.885349
214198,214198,0.885349


# THE END..!!