# Predict Future Sales

<a id="top"></a>

<div class="list-group" id="list-tab" role="tablist">
<h3 class="list-group-item list-group-item-action active" data-toggle="list"  role="tab" aria-controls="home">navigation</h3>

* [1. Library](#1)
* [2. Data reading](#2)
* [3. Visualization](#3)
* [4. Data collection](#4)
* [5. Hypertunning ARIMA model](#5)
* [6. Predicted total sale on Nov 2015](#6)


<a id="1"></a>

# Library

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 5GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import plotly
from datetime import datetime
import plotly.graph_objects as go
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from statsmodels.tsa.arima_model import ARIMA
from sklearn.metrics import mean_squared_error
import plotly.express as px

<a id="2"></a>

# Data reading

In [None]:
sales_train = pd.read_csv("/kaggle/input/competitive-data-science-predict-future-sales/sales_train.csv")
sales_train.head()

In [None]:
df_test = pd.read_csv("../input/competitive-data-science-predict-future-sales/test.csv",nrows = 10)

In [None]:
sales_train.shape

In [None]:
sales_train.columns

In [None]:
sales_train_columns = sales_train.columns.tolist()
sales_train[sales_train_columns].isnull().sum()

In [None]:
sales_train.dtypes

In [None]:
sales_train['date'] =  pd.to_datetime(sales_train['date'],
                              format='%d.%m.%Y')

In [None]:
sales_train.describe()

In [None]:
shop_id_list = list(set(sales_train.shop_id.tolist()))
item_id_list = list(set(sales_train.item_id.tolist()))

<a id="3"></a>

# Visualization

In [None]:
fig = go.Figure(go.Indicator(
    mode = "number",
    value = len(sales_train["shop_id"].value_counts()),
    title = {'text': "How many shops in the dataset ?"},
    domain = {'x': [0, 1], 'y': [0, 1]}
))
fig.show()

In [None]:
fig = go.Figure(go.Indicator(
    mode = "number",
    value = len(sales_train["item_id"].value_counts()),
    title = {'text': "How many total unique item in the dataset ?"},
    domain = {'x': [0, 1], 'y': [0, 1]}
))
fig.show()

In [None]:
top_10_tranding_shop =  sales_train["item_cnt_day"].groupby(sales_train.shop_id).sum().sort_values(ascending=False)[:10]
fig = go.Figure(go.Bar(y=top_10_tranding_shop.index, x=top_10_tranding_shop.values, 
                      orientation="h")) 
fig.update_layout(title_text='top 10 tranding shop',xaxis_title="Count",yaxis_title="Shop ID")
fig.update_yaxes(type="category")
fig.show()

In [None]:
print("top 10 tranding Item")
top_10_tranding_item =  sales_train["item_cnt_day"].groupby(sales_train.item_id).sum().sort_values(ascending=False)[:10]
fig = go.Figure(go.Bar(y=top_10_tranding_item.index, x=top_10_tranding_item.values, 
                      orientation="h")) 
fig.update_layout(title_text='top 10 tranding Item',xaxis_title="Count",yaxis_title="Item ID")
fig.update_yaxes(type="category")
fig.show()


In [None]:
total_count_sell_on_date_block_num = sales_train["item_cnt_day"].groupby(sales_train.date_block_num).sum()

fig = go.Figure(data=go.Scatter(x = total_count_sell_on_date_block_num.index,
                                y = total_count_sell_on_date_block_num.values,
                                mode = 'lines')) # hover text goes here
fig.update_layout(title = 'Total count sale on Date block numer ',xaxis_title = "Date block numer",yaxis_title = "Number of total count sales")
fig.show()

In [None]:
""""print("Select shop ID")
loop_condition= True
while(loop_condition == True):
    shop_id_number = int(input())
    if shop_id_number in shop_id_list:
        loop_condition = False
        print(" shop_id : ",shop_id_number)
    else:
        print(" please enter valid shop_id")"""

#select 31 shop_id you can choose any shop_id with uper code 

shop_id_number = 31
shop_id_data = sales_train.loc[sales_train["shop_id"] == shop_id_number]
top_five_item = shop_id_data["item_cnt_day"].groupby(shop_id_data.item_id).sum().sort_values(ascending=False)[:5]
fig = go.Figure(go.Bar(y=top_five_item.index, x=top_five_item.values, 
                      orientation="h")) 
fig.update_layout(title_text='Top 5 selling item in particular shop',xaxis_title="Count",yaxis_title="Item ID")
fig.update_yaxes(type="category")
fig.show()

<a id="4"></a>

# Data collection

In [None]:
def data_collector(id_shop,id_item):
    data = sales_train[(sales_train["shop_id"]==id_shop) & (sales_train["item_id"]==id_item)]
    data = data.drop(["date_block_num","shop_id","item_id","item_price"],axis= 1)
    data=data.groupby(data['date']).sum()
    first = {'item_cnt_day':0}
    last = {'item_cnt_day':0}
    df_first = pd.DataFrame(first, index =['2013-01-01'])
    df_last = pd.DataFrame(last, index =['2015-10-31'])
    data = pd.concat([df_first,data,df_last])
    data = data.asfreq('d')
    data = data.fillna(0)
    return data

<a id="5"></a>

# Hypertunning ARIMA model

In [None]:
sales = []
def Hypertunning_parameter_ARIMA_model(data):
    dict_for = {}
    #data = data.set_index("date")
    train, test = train_test_split(data, test_size=0.1)
    p_value = range(0,3)
    d_value = range(0,3)
    q_value = range(0,4)
    for p in p_value:
        for d in d_value:
            for q in q_value:
                try:
                    order = (p,d,q)
                    df_model = ARIMA(train,order)
                    df_model_fit= df_model.fit()
                    df_predict = df_model_fit.forecast(steps=len(test))[0]
                    error = mean_squared_error(test.item_cnt_day,df_predict)
                    dict_for.update( {order : error} )
                except:
                    pass
    for i , j in dict_for.items(): 
        if j == min(dict_for.values()):
            final_order = i
    df_model_final = ARIMA(train,order=final_order)
    df_model_fit= df_model_final.fit()
    df_predict = df_model_fit.forecast(steps=len(test)+30)[0]
    df_predict_next_month_sale = df_predict[-30:].sum()
    sales.append(df_predict_next_month_sale)

<a id="6"></a>

# Predicted total sale on Nov 2015 

In [None]:
print("Select shop ID for prediction of nov 2015 sale")
"""loop_condition= True
while(loop_condition == True):
    a= int(input())
    if a in shop_id_list:
        loop_condition = False
        print(" shop_id : ",a)
    else:
        print(" please enter valid shop_id")"""
a = 31

In [None]:
print("Select Item ID for prediction of nov 2015 sale")
"""loop_condition= True
while(loop_condition == True):
    b= int(input())
    if b in item_id_list:
        loop_condition = False
        print(" item_id : ",b)
    else:
        print(" please enter valid item_id")"""
b = 20949

In [None]:
def predict_main_model():
        data = data_collector(a,b)
        Hypertunning_parameter_ARIMA_model(data)

            

In [None]:
predict_main_model()

In [None]:
fig = go.Figure(go.Indicator(
    mode = "number",
    value = int(sales[0]),
    title = {'text': "Nov 2015 total sales count"},
    domain = {'x': [0, 1], 'y': [0, 1]}
))
fig.show()