# Futre Sales Prediction

`Author:` [Syed Muhammad Ebad](https://www.kaggle.com/syedmuhammadebad)\
`Date:` 22-June-2024\
[Send me an email](mailto:mohammadebad1@hotmail.com)\
[Visit my GitHub profile](https://github.com/smebad)

## Importing Libraries

In [140]:
import pandas as pd
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.utils import plot_model
import warnings
warnings.filterwarnings("ignore")


## Load Data

In [141]:
# Load the training, item, item categories, shop, and test data.
sales_train_data = pd.read_csv('sales_train.csv')
items_data = pd.read_csv('items.csv')
items_categories_data = pd.read_csv('item_categories.csv')
shops_data = pd.read_csv('shops.csv')
test_data = pd.read_csv('test.csv')

In [142]:
# Displaying the first 10 rows of the sales training data
sales_train_data.head(10)

Unnamed: 0,date,date_block_num,shop_id,item_id,item_price,item_cnt_day
0,02.01.2013,0,59,22154,999.0,1.0
1,03.01.2013,0,25,2552,899.0,1.0
2,05.01.2013,0,25,2552,899.0,-1.0
3,06.01.2013,0,25,2554,1709.05,1.0
4,15.01.2013,0,25,2555,1099.0,1.0
5,10.01.2013,0,25,2564,349.0,1.0
6,02.01.2013,0,25,2565,549.0,1.0
7,04.01.2013,0,25,2572,239.0,1.0
8,11.01.2013,0,25,2572,299.0,1.0
9,03.01.2013,0,25,2573,299.0,3.0


## Feature Engineering

### Extracting Month from Date

In [143]:
# Function to split month from the date
def split_month(columns):
    words = columns.split('.')[1]
    return words

# Apply the function to extract the month
sales_train_data['Month'] = sales_train_data['date'].apply(split_month)

In [144]:
sales_train_data

Unnamed: 0,date,date_block_num,shop_id,item_id,item_price,item_cnt_day,Month
0,02.01.2013,0,59,22154,999.00,1.0,01
1,03.01.2013,0,25,2552,899.00,1.0,01
2,05.01.2013,0,25,2552,899.00,-1.0,01
3,06.01.2013,0,25,2554,1709.05,1.0,01
4,15.01.2013,0,25,2555,1099.00,1.0,01
...,...,...,...,...,...,...,...
2935844,10.10.2015,33,25,7409,299.00,1.0,10
2935845,09.10.2015,33,25,7460,299.00,1.0,10
2935846,14.10.2015,33,25,7459,349.00,1.0,10
2935847,22.10.2015,33,25,7440,299.00,1.0,10


### Extract Year from Date

In [145]:
# Function to split year from the date
def split_year(columns):
    words = columns.split('.')[2]
    return words

# Usin function to extract the year
sales_train_data['Year'] = sales_train_data['date'].apply(split_year)

In [146]:
sales_train_data

Unnamed: 0,date,date_block_num,shop_id,item_id,item_price,item_cnt_day,Month,Year
0,02.01.2013,0,59,22154,999.00,1.0,01,2013
1,03.01.2013,0,25,2552,899.00,1.0,01,2013
2,05.01.2013,0,25,2552,899.00,-1.0,01,2013
3,06.01.2013,0,25,2554,1709.05,1.0,01,2013
4,15.01.2013,0,25,2555,1099.00,1.0,01,2013
...,...,...,...,...,...,...,...,...
2935844,10.10.2015,33,25,7409,299.00,1.0,10,2015
2935845,09.10.2015,33,25,7460,299.00,1.0,10,2015
2935846,14.10.2015,33,25,7459,349.00,1.0,10,2015
2935847,22.10.2015,33,25,7440,299.00,1.0,10,2015


In [147]:
# Add a 'Sales' column
sales_train_data['Sales'] = sales_train_data['item_cnt_day']

In [148]:
items_data

Unnamed: 0,item_name,item_id,item_category_id
0,! ВО ВЛАСТИ НАВАЖДЕНИЯ (ПЛАСТ.) D,0,40
1,!ABBYY FineReader 12 Professional Edition Full...,1,76
2,***В ЛУЧАХ СЛАВЫ (UNV) D,2,40
3,***ГОЛУБАЯ ВОЛНА (Univ) D,3,40
4,***КОРОБКА (СТЕКЛО) D,4,40
...,...,...,...
22165,"Ядерный титбит 2 [PC, Цифровая версия]",22165,31
22166,Язык запросов 1С:Предприятия [Цифровая версия],22166,54
22167,Язык запросов 1С:Предприятия 8 (+CD). Хрустале...,22167,49
22168,Яйцо для Little Inu,22168,62


### Add Item Categories

In [149]:
# Create item_categories_data
items_categories_data = []
for i in sales_train_data['item_id']:
  items_categories_data.append(items_data['item_category_id'].iloc[i])

# Adding item categories to the sales training data
sales_train_data['item_categories'] = items_categories_data

In [150]:
sales_train_data.head(10)

Unnamed: 0,date,date_block_num,shop_id,item_id,item_price,item_cnt_day,Month,Year,Sales,item_categories
0,02.01.2013,0,59,22154,999.0,1.0,1,2013,1.0,37
1,03.01.2013,0,25,2552,899.0,1.0,1,2013,1.0,58
2,05.01.2013,0,25,2552,899.0,-1.0,1,2013,-1.0,58
3,06.01.2013,0,25,2554,1709.05,1.0,1,2013,1.0,58
4,15.01.2013,0,25,2555,1099.0,1.0,1,2013,1.0,56
5,10.01.2013,0,25,2564,349.0,1.0,1,2013,1.0,59
6,02.01.2013,0,25,2565,549.0,1.0,1,2013,1.0,56
7,04.01.2013,0,25,2572,239.0,1.0,1,2013,1.0,55
8,11.01.2013,0,25,2572,299.0,1.0,1,2013,1.0,55
9,03.01.2013,0,25,2573,299.0,3.0,1,2013,3.0,55


In [161]:
# Create a combined item_id_categories column
sales_train_data['item_id_categories']=sales_train_data['item_id'].apply(str) + '.' + sales_train_data['item_categories'].apply(str)

In [None]:
sales_train_data

Unnamed: 0,date,date_block_num,shop_id,item_id,item_price,item_cnt_day,Month,Year,Sales,item_categories,item_id_categories
0,02.01.2013,0,59,22154,999.00,1.0,01,2013,1.0,37,22154.37
1,03.01.2013,0,25,2552,899.00,1.0,01,2013,1.0,58,2552.58
2,05.01.2013,0,25,2552,899.00,-1.0,01,2013,-1.0,58,2552.58
3,06.01.2013,0,25,2554,1709.05,1.0,01,2013,1.0,58,2554.58
4,15.01.2013,0,25,2555,1099.00,1.0,01,2013,1.0,56,2555.56
...,...,...,...,...,...,...,...,...,...,...,...
2935844,10.10.2015,33,25,7409,299.00,1.0,10,2015,1.0,55,7409.55
2935845,09.10.2015,33,25,7460,299.00,1.0,10,2015,1.0,55,7460.55
2935846,14.10.2015,33,25,7459,349.00,1.0,10,2015,1.0,55,7459.55
2935847,22.10.2015,33,25,7440,299.00,1.0,10,2015,1.0,57,7440.57


In [None]:
# Filtering the data for the month of November
train_Data = sales_train_data[sales_train_data['Month']==11]

# Drop the unnecessary columns for training
training_data = sales_train_data.drop(columns=['date', 'date_block_num', 'item_price', 'Month', 'Year', 'Sales', 'item_id_categories', 'item_cnt_day'])

In [None]:
train_target = sales_train_data['item_cnt_day']

## Model Training

In [None]:
# Defining and compiling the model
model = Sequential()
model.add(Dense(15, activation='sigmoid', input_dim=training_data.shape[1]))
model.add(Dense(10, activation='relu'))
model.add(Flatten())
model.add(Dense(1, activation='sigmoid'))

In [None]:
model.compile(optimizer='adam',
              loss='mse',
               metrics=['mse', 'mae' ])

In [110]:
# Train the model
history = model.fit(training_data, train_target, epochs=100, batch_size=512, validation_split=0.2)

Epoch 1/100
[1m4588/4588[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 2ms/step - loss: 4.5216 - mae: 0.2674 - mse: 4.5216 - val_loss: 17.2638 - val_mae: 0.2590 - val_mse: 17.2638
Epoch 2/100
[1m4588/4588[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 2ms/step - loss: 4.7032 - mae: 0.2530 - mse: 4.7032 - val_loss: 17.2638 - val_mae: 0.2590 - val_mse: 17.2638
Epoch 3/100
[1m4588/4588[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 2ms/step - loss: 4.1182 - mae: 0.2517 - mse: 4.1182 - val_loss: 17.2638 - val_mae: 0.2590 - val_mse: 17.2638
Epoch 4/100
[1m4588/4588[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 2ms/step - loss: 4.4817 - mae: 0.2519 - mse: 4.4817 - val_loss: 17.2638 - val_mae: 0.2590 - val_mse: 17.2638
Epoch 5/100
[1m4588/4588[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 2ms/step - loss: 4.0739 - mae: 0.2511 - mse: 4.0739 - val_loss: 17.2638 - val_mae: 0.2590 - val_mse: 17.2638
Epoch 6/100
[1m4588/4588[0m [32m━━━━━━━━━━━━━━━━━━━━

## Prepare Test Data

In [152]:
# Merge test data with item categories
test_data = test_data.merge(items_data[['item_id', 'item_category_id']], on='item_id', how='left')

In [153]:
# Adding Month and Year columns to test data
test_data['Month'] = '11'
test_data['Year'] = '2015'

In [154]:
# Prepare 'item_categories' for test data similar to training data
test_data['item_categories'] = test_data['item_id'].map(items_data.set_index('item_id')['item_category_id'])

In [155]:
# Drop unnecessary columns from test data
test_data = test_data.drop(columns=['ID'])

In [156]:
# Ensuring all columns are in the same order as the training data
test_data = test_data[training_data.columns]

## Predict on Test Data

In [157]:
# Using the trained model to predict the sales for the test data
test_predictions = model.predict(test_data)

[1m6694/6694[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 916us/step


## Submission

In [163]:
# Creating the submission file
submission = pd.DataFrame({
    'ID': test_data.index,
    'item_cnt_month': test_predictions.flatten()
})

In [162]:
# Saving the submission file
submission.to_csv('submission.csv', index=False)

## Conclusion

* The model has been trained on the sales data and predictions have been made for the test data.
* The results are saved in the submission file.