# **Store's Weekly Sales Prediction**

In [None]:
# Required packages
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn.metrics as metrics
import sklearn.model_selection as model_selection
import pickle

**Load Dataset**

In [None]:
feature_ds=pd.read_csv('/content/Features_data_set.csv')

In [None]:
sale_ds=pd.read_csv('/content/sales_data_set.csv')

In [None]:
store_ds=pd.read_csv('/content/stores_data_set.csv')

**Departmentwise Sales Prediction**

In [None]:
dept_sale_df=sale_ds.copy()

In [None]:
dept_sale_df.drop('Store',axis=1,inplace=True)

In [None]:
dept_sale_df['Date']=pd.to_datetime(dept_sale_df['Date'])
dept_sale_df['Week']=dept_sale_df.Date.dt.isocalendar().week
dept_sale_df['Year']=dept_sale_df.Date.dt.year
dept_sale_df.sort_values(by=['Dept','Date','IsHoliday'],ascending=True)

In [None]:
# Week col is enough for weekly sales prediction day and date is not needed so removing date column
dept_sale_df.drop('Date',axis=1,inplace=True)

In [None]:
# Sum up all sales for each department
dept_sale_df.sort_values(by=['Dept','IsHoliday','Week','Year'],ascending=True)
dept_grouped=dept_sale_df.groupby(['Dept','IsHoliday','Week','Year'])

dept_sale_df1=dept_grouped['Weekly_Sales'].sum()
dept_sale_df1=dept_sale_df1.reset_index(name='Dept_SP')


In [None]:
# for checking values randomly
dept_sale_df1.to_csv('/content/dept_sale.csv',index=False)

**Preprocessing**

In [None]:
# checking null values
dept_sale_df1.isnull().sum()

In [None]:
dept_sale_df1.IsHoliday.isnull().sum()

In [None]:
#  checking na values
dept_sale_df1.isna().sum()

In [None]:
dept_sale_df1.info()

In [None]:
dept_sale_df1.columns

In [None]:
dept_sale_df.IsHoliday.unique()

In [None]:
# Encoding IsHoliday object col to numeric form
from sklearn.preprocessing import OrdinalEncoder
enc=OrdinalEncoder()
dept_sale_df1['IsHoliday'] = enc.fit_transform(dept_sale_df1[['IsHoliday']])

In [None]:
# checking for duplicates
df=dept_sale_df1[dept_sale_df1.duplicated(subset=['Dept','IsHoliday','Week','Year'],keep='first')==True].sort_values(by=['Dept','Week'],ascending=True)
df

In [None]:
dept_sale_df1.info()

**EDA**

**Finding Outliers**

In [None]:
dept_sale_df1.describe()

In [None]:
def plot(df, column):
    plt.figure(figsize=(16,6))
    plt.subplot(1,3,1)
    sns.boxplot(data=df, x=column)
    plt.title(f'Box Plot for {column}')

In [None]:
for i in dept_sale_df1.columns:
    plot(dept_sale_df1, i)

In [None]:
# Finding the outliers using IQR
def findOutliers(data, col):
	Q3 = np.quantile(data[col], 0.75)
	Q1 = np.quantile(data[col], 0.25)
	IQR = Q3 - Q1

	print("IQR value for column %s is: %s" % (col, IQR))

	lower_range = Q1 - 1.5 * IQR
	upper_range = Q3 + 1.5 * IQR
	x=np.where((data[col] > upper_range) | (data[col] < lower_range))
	print(col,":",lower_range, upper_range,"Outliers:",x[0])
for i in dept_sale_df1.columns:
	findOutliers(dept_sale_df1, i)

In [None]:
dept_sale_df1

**Model selection**

In [None]:
# Features split into x and y. x denotes independant feature and y denotes dependant feature and y is target
y= dept_sale_df1['Dept_SP']
x = dept_sale_df1.drop('Dept_SP', axis =1)

In [None]:
# Splitting data into train(80%) and test(20%)
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2,random_state=20)
x_train.shape,x_test.shape,y_train.shape,y_test.shape

In [None]:
from sklearn.ensemble import GradientBoostingRegressor
model_gbr = GradientBoostingRegressor().fit(x_train, y_train)
y_pred=model_gbr.predict(x_test)
print("MAE:",metrics.mean_absolute_error(y_test, y_pred))
print("MSE:",metrics.mean_squared_error(y_test, y_pred))
print("R2 Score:",metrics.r2_score(y_test, y_pred))

In [None]:
from sklearn.ensemble import RandomForestRegressor
model_rfr = RandomForestRegressor().fit(x_train, y_train)
y_pred=model_rfr.predict(x_test)
print("MAE:",metrics.mean_absolute_error(y_test, y_pred))
print("MSE:",metrics.mean_squared_error(y_test, y_pred))
print("R2 Score:",metrics.r2_score(y_test, y_pred))

In [None]:
from sklearn.tree import  DecisionTreeRegressor
model_dtr = DecisionTreeRegressor().fit(x_train, y_train)
y_pred=model_dtr.predict(x_test)
print("MAE:",metrics.mean_absolute_error(y_test, y_pred))
print("MSE:",metrics.mean_squared_error(y_test, y_pred))
print("R2 Score:",metrics.r2_score(y_test, y_pred))

In [None]:
dept_sale_df1.head(5)

**Test Data**

In [None]:
# Sample data1
test_data=np.array([[1,0,1,2010]])
y_pred=model_rfr.predict(test_data)
y_pred[0]

In [None]:
dept_sale_df1.tail(5)

In [None]:
# Sample data2
test_data=np.array([[99,1,40,2012]])
y_pred=model_rfr.predict(test_data)
y_pred[0]

**Create Pickle File**

In [None]:
# write pickle file  for Department wise sale prediction
with open('/content/dept_sale.pkl', 'wb') as f:
    pickle.dump(model_rfr, f)

**Weekly Sales prediction with markdown and holiday**

**Load data**

In [None]:
weekly_sale_ds=sale_ds.copy()

In [None]:
weekly_sale_ds['Date']=pd.to_datetime(weekly_sale_ds['Date'])

In [None]:
weekly_sale_ds.info()

In [None]:
# adding new col week as its a weekly prediction
weekly_sale_ds['Week']=weekly_sale_ds.Date.dt.isocalendar().week
weekly_sale_ds['Year']=weekly_sale_ds.Date.dt.year

In [None]:
# Week,year are enough to predict weekly sales
weekly_sale_ds.drop('Date',axis=1,inplace=True)

In [None]:
# Sum up all the Dept's sales for each store
weekly_sale_ds.sort_values(by=['Week','Year'],ascending=True)
sale_grouped=weekly_sale_ds.groupby(['Store','IsHoliday','Week','Year'])

sale_sum=sale_grouped['Weekly_Sales'].sum()
sale_sum=sale_sum.reset_index(name='Weekly_SP')
sale_sum

In [None]:
feature_ds1=feature_ds.copy()

In [None]:
feature_ds1['Date']=pd.to_datetime(feature_ds1['Date'])
feature_ds1.sort_values(by=['Store','Date'],ascending=True)

In [None]:
feature_ds1['Week']=feature_ds1.Date.dt.isocalendar().week
feature_ds1['Year']=feature_ds1.Date.dt.year

In [None]:
feature_ds1.drop('Date',axis=1,inplace=True)

In [None]:
merged_df = pd.merge(sale_sum, feature_ds1, on=['Store','Week','Year','IsHoliday'])

In [None]:
merged_df.info()

**PrePocessing**

In [None]:
merged_df.isna().sum()

In [None]:
# Fill null values with 0 it denotes no markdown given on the week or markdown concept is not introduced for old years
merged_df['MarkDown1']=merged_df['MarkDown1'].fillna(0)
merged_df['MarkDown2']=merged_df['MarkDown2'].fillna(0)
merged_df['MarkDown3']=merged_df['MarkDown3'].fillna(0)
merged_df['MarkDown4']=merged_df['MarkDown4'].fillna(0)
merged_df['MarkDown5']=merged_df['MarkDown5'].fillna(0)

In [None]:
# For checking  original sale amount without markdown
# merged_df['Expected_SP']=merged_df['Weekly_Actual_SP']+merged_df['MarkDown1']+merged_df['MarkDown2']+merged_df['MarkDown3']+merged_df['MarkDown4']+merged_df['MarkDown5']

In [None]:
merged_df['Unemployment']=merged_df['Unemployment'].fillna(0)
merged_df['CPI']=merged_df['CPI'].fillna(0)

In [None]:
merged_df.sort_values(by=['Store','Week','Year'],ascending=True)

In [None]:
# for data cross checking
merged_df.to_csv('/content/sale_df.csv',index=False)

In [None]:
merged_df.info()

**Outliers**

In [None]:
def plot(df, column):
    plt.figure(figsize=(16,6))
    plt.subplot(1,3,1)
    sns.boxplot(data=df, x=column)
    plt.title(f'Box Plot for {column}')

In [None]:
for i in merged_df.columns:
    plot(merged_df, i)

**Feature selection**

In [None]:
corr_data = merged_df.corr()
corr_data

In [None]:
plt.figure(figsize=(16,6))
sns.heatmap(corr_data,cmap="coolwarm",fmt=".2f",annot=True)

In [None]:
# all column has less than 0.9 so feature has taken for training

In [None]:
from sklearn.preprocessing import OrdinalEncoder
enc=OrdinalEncoder()
merged_df['IsHoliday'] = enc.fit_transform(merged_df[['IsHoliday']])

In [None]:
merged_df.info()

**Training the model**

In [None]:
y= merged_df['Weekly_SP']
x = merged_df.drop('Weekly_SP', axis =1)

In [None]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2)
x_train.shape,x_test.shape,y_train.shape,y_test.shape

In [None]:
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn import metrics

In [None]:
from sklearn.ensemble import RandomForestRegressor
model_rfr = RandomForestRegressor().fit(x_train, y_train)
y_pred= model_rfr.predict(x_test)
print(metrics.mean_absolute_error(y_test, y_pred))
print(metrics.mean_squared_error(y_test, y_pred))
metrics.r2_score(y_test, y_pred)

In [None]:
from sklearn.ensemble import GradientBoostingRegressor
model_gbr = GradientBoostingRegressor().fit(x_train, y_train)
y_pred=model_gbr.predict(x_test)
print(metrics.mean_absolute_error(y_test, y_pred))
print(metrics.mean_squared_error(y_test, y_pred))
metrics.r2_score(y_test, y_pred)

In [None]:
from sklearn.ensemble import AdaBoostRegressor
model_abr = AdaBoostRegressor().fit(x_train, y_train)
y_pred= model_abr.predict(x_test)
print(metrics.mean_absolute_error(y_test, y_pred))
print(metrics.mean_squared_error(y_test, y_pred))
metrics.r2_score(y_test, y_pred)

In [None]:
# Random Forest Regressor has selected for prediction and testing the model with sample data

In [None]:
merged_df.head(2)

In [None]:
# Sample data1
test_data=np.array([[1,0,1,2010,71.89,2.603,0.0,	0.0,	0.0	,0.0,	0.0,	211.671989,	7.838]])
y_pred=model_rfr.predict(test_data)
y_pred[0]

In [None]:
merged_df.tail(2)

In [None]:
# Sample data2
test_data=np.array([[45,	0.0,	52,54.47,	4.000,	1956.28,	0.0,7.89,	599.32,	3990.54,	192.327265,	8.667,50]])
y_pred=model_rfr.predict(test_data)
y_pred[0]

In [None]:
# write pickle file  for weekly sale prediction
with open('/content/weekly_sale.pkl', 'wb') as f:
    pickle.dump(model_rfr, f)

In [None]:
from datetime import datetime
date_string='01/12/2024'
dt1=datetime.strptime(date_string, "%d/%m/%Y")
print(dt1)
k=dt1.isocalendar().week
print(k)


In [None]:
merged_df.info()

**Streamlit APP**

In [None]:
%%writefile app.py
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn.metrics as metrics
import sklearn.model_selection as model_selection
import pickle
import streamlit as st
from PIL import Image
from datetime import datetime

#streamlit  page setting
icon = Image.open("store.jpg")
st.set_page_config(page_title= "KR Store - Kavitha",
                page_icon= icon,
                layout= "wide",
                initial_sidebar_state= "expanded",
                )

st.subheader(":blue[KR Store]")
tab1,tab2,tab3=st.tabs([":blue[Department Sales]",":blue[Weekly Sales]",":blue[About]"])

with tab1:
  col1,col2,col3=st.columns(3)
  with col1:
    txt_dept=st.number_input("Enter Dept No")
  with col2:
    txt_date=st.text_input("Enter the Date",datetime.today().strftime("%d/%m/%Y"))
    dt1=datetime.strptime(txt_date, "%d/%m/%Y")
    dept_week=dt1.isocalendar().week
    dept_year=dt1.year
  with col3:
    txt_holiday=st.selectbox("Holiday in date entered week", ("True","False"))
    if txt_holiday=="True":
      holiday=0
    else:
      holiday=1



  if st.button("Predict", key="Department Sales"):
      # load the regression pickle model
      with open('/content/dept_sale.pkl', 'rb') as f:
          model_dpt = pickle.load(f)

      # make array for all user input values in required order for model prediction
      user_data = np.array([[int(txt_dept),int(holiday),int(dept_week),int(dept_year)]])

      # model predict the Department sales based on user input
      y_pred = model_dpt.predict(user_data)
      sale_price = y_pred[0]

      # round the value with 2 decimal point
      sale_price = round(sale_price, 2)
      st.write("Department weekly sales: ", sale_price)

with tab2:
  col4,col5,col6=st.columns(3)
  with col4:
    txt_store=st.number_input("Enter Store No")
    txt_holiday1=st.selectbox("Holiday for date entered week", ("True","False"))
    txt_temp=st.number_input("Enter Temperature in celcius")
    txt_fuel_price=st.number_input("Enter Fuel Price")
    if txt_holiday1=="True":
      week_holiday=0
    else:
      week_holiday=1
  with col5:
    txt_Markdown1=st.number_input("Enter MarkDown1")
    txt_Markdown2=st.number_input("Enter MarkDown2")
    txt_Markdown3=st.number_input("Enter MarkDown3")
    txt_Markdown4=st.number_input("Enter MarkDown4")

  with col6:
    txt_Markdown5=st.number_input("Enter MarkDown5")
    txt_CPI=st.number_input("Enter CPI")
    txt_unemployment=st.number_input("Enter unemployment")
    txt_date1=st.text_input("Enter Date" ,datetime.today().strftime("%d/%m/%Y"))

    dt2=datetime.strptime(txt_date1, "%d/%m/%Y")
    sale_week=dt2.isocalendar().week
    sale_year=dt2.year

  if st.button("Predict", key="Weekly Sales"):
      # load the regression pickle model
      with open('/content/weekly_sale.pkl', 'rb') as f:
          model_weekly = pickle.load(f)

      user_data = np.array([[int(txt_store),int(week_holiday),int(sale_week),int(sale_year),float(txt_temp),
                          float(txt_fuel_price),float(txt_Markdown1),float(txt_Markdown2),float(txt_Markdown3),
                          float(txt_Markdown4),float(txt_Markdown5),
                          float(txt_CPI),float(txt_unemployment)]])

      # model predict Sales based on user input
      y_pred = model_weekly.predict(user_data)

      Week_Sale = y_pred[0]
      markdown=float(txt_Markdown1)+float(txt_Markdown2)+float(txt_Markdown3)+ float(txt_Markdown4)+float(txt_Markdown5)
      st.write("Weekly Sale :", Week_Sale, "With Markdown",markdown)
      st.write("Expected sale price might be", Week_Sale+markdown)
with tab3:
    st.caption(":blue[Overview:]")
    st.caption(":blue[Department wise and general weeklly sales prediction for the given store]")
    st.caption(":blue[Data Cleaning has done for the all the null values ]")
    st.caption(":blue[Model built for Department wise sales prediction with 4 input features]")
    st.caption(":blue[Model built for general weekly sales prediction with markdown and holiday as input]")


In [None]:
!npm install localtunnel

In [None]:
pip install streamlit

In [None]:
!streamlit run /content/app.py &>/content/logs.txt & npx localtunnel --port 8501 & curl ipv4.icanhazip.com