# Mexico Toy Sales

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()
import plotly.express as px
import statsmodels.api as sm
!pip install pmdarima
from pmdarima.arima import auto_arima
from statsmodels.tsa.statespace.sarimax import SARIMAX
from sklearn.metrics import mean_absolute_error
from sklearn import metrics

In [None]:
inventory = pd.read_csv('../input/toy-sales/inventory.csv')
products = pd.read_csv('../input/toy-sales/products.csv')
sales = pd.read_csv('../input/toy-sales/sales.csv')
sales['Date'] = pd.to_datetime(sales['Date'])
stores = pd.read_csv('../input/toy-sales/stores.csv')

In [None]:
sales1 = pd.merge(sales, products, on='Product_ID')
sales2 = pd.merge(sales1, stores, on='Store_ID')

sales2['Product_Cost'] = sales2['Product_Cost'].str.replace('$', '')
sales2['Product_Cost'] = sales2['Product_Cost'].str.replace(' ', '')
sales2['Product_Price'] = sales2['Product_Price'].str.replace('$', '')
sales2['Product_Price'] = sales2['Product_Price'].str.replace(' ', '')

sales2['Product_Cost'] = sales2['Product_Cost'].astype(float)
sales2['Product_Price'] = sales2['Product_Price'].astype(float)

sales2['sales'] = sales2['Units'] * sales2['Product_Price']
sales2['cost'] = sales2['Units'] * sales2['Product_Cost']
sales2['Date'] = pd.to_datetime(sales2['Date'])
sales2.head()

In [None]:
sales2.info()

# EDA

In [None]:
sales2.groupby(['Store_City','Product_Category']).sum().unstack().plot(kind='barh',y='sales', stacked=True, figsize=(20, 10))
plt.show()

In [None]:
sales2.groupby(['Product_Category'])['sales'].sum().sort_values(ascending=False)

In [None]:
sales2.groupby(['Store_Name','Product_Category']).sum().unstack().plot(kind='barh',y='sales', stacked=True, figsize=(15, 15))
plt.show()

In [None]:
sales2.groupby(['Store_Location','Product_Category']).sum().unstack().plot(kind='bar',y='sales', stacked=True, figsize=(8, 8))
plt.xticks(rotation=0)
plt.show()

In [None]:
sales2.groupby(['Product_Name','Store_Location']).sum().unstack().plot(kind='barh',y='sales', stacked=True, figsize=(8, 8))
plt.xticks(rotation=0)
plt.show()

Best seller is Lego Bricks from Toys category, which is also the top category. Now lets see if it is true for top 3 cities

# TOP 3 Cities

Even though top location is Downtown, first two cities with the biggest sales are from residential areas


In [None]:
sales2.groupby('Store_City')['sales'].sum().sort_values(ascending=False).head(3)

In [None]:
sales_city1 = sales2[sales2['Store_City'] == 'Cuidad de Mexico']
plt.figure(figsize=(6,6))
sales_city1.groupby(['Product_Name'])['sales'].sum().sort_values(ascending=False).head(5).plot(kind='barh')
plt.xticks(rotation=0)
plt.show()

In [None]:
sales_city2 = sales2[sales2['Store_City'] == 'Guadalajara']
plt.figure(figsize=(6,6))
sales_city2.groupby(['Product_Name'])['sales'].sum().sort_values(ascending=False).head(5).plot(kind='barh')
plt.xticks(rotation=0)
plt.show()

In [None]:
sales_city3 = sales2[sales2['Store_City'] == 'Monterrey']
plt.figure(figsize=(6,6))
sales_city3.groupby(['Product_Name'])['sales'].sum().sort_values(ascending=False).head(5).plot(kind='barh')
plt.xticks(rotation=0)
plt.show()

# TOP Location

In [None]:
sales_downtown = sales2[sales2['Store_Location'] == 'Downtown']
sales_downtown.head()

In [None]:
plt.figure(figsize=(8,8))
sales_downtown.groupby(['Product_Name'])['sales'].sum().sort_values(ascending=False).head(5).plot(kind='barh')
plt.xticks(rotation=0)
plt.show()

In [None]:
sales2.groupby(['Product_Name'])['sales'].sum().sort_values(ascending=False).head(5)

**Sales Graph over period in the given Datasets**

In [None]:
sales_daily = sales2.groupby('Date').sales.sum()
sales_daily_df = pd.DataFrame(sales_daily)
sales_weekly_mean = sales_daily_df['sales'].resample('W').mean()
sales_monthly_mean = sales_daily_df['sales'].resample('M').mean()

In [None]:
plt.figure(figsize=(30, 10))
plt.plot(sales_daily, label='Daily')
plt.plot(sales_weekly_mean, label='Weekly Mean')
plt.plot(sales_monthly_mean, label='Monthly Mean')
plt.legend()
plt.show()

In [None]:
sales_daily.plot(style='k.', figsize=(30, 6))
plt.show()

# Possible Pattern

In [None]:
sales_monthly = sales2.groupby([sales2['Date'].dt.year.rename('year'), sales2['Date'].dt.to_period("M").rename('month')])['sales'].sum().reset_index()
sales_monthly['month'] = sales_monthly['month'].astype(str)

plt.figure(figsize=(30, 6))
plt.plot(sales_monthly['month'],sales_monthly['sales'], label='Monthly Sales')
plt.legend()

xcoords = ['2017-04', '2018-04','2017-02', '2018-02', '2017-08', '2018-08']
for xc in xcoords:
    plt.axvline(x=xc, color='black', linestyle='--')
    
    
plt.annotate('PATTERN-1', ('2017-03', 750000))
plt.annotate('PATTERN-2', ('2017-06', 750000))
plt.annotate('PATTERN-3', ('2017-10', 750000))

plt.annotate('Similiar to PATTERN-1', ('2018-02', 700000))
plt.annotate('Similiar to PATTERN-2', ('2018-05', 700000))
plt.annotate('Possible PATTERN-3?', ('2018-08', 750000))

plt.show()

# Building a Model

In [None]:
sales_weekly = pd.DataFrame(sales2.groupby([sales2['Date'].dt.year.rename('year'), sales2['Date'].dt.to_period("W").rename('week')])['sales'].sum().reset_index().drop('year', axis=1).set_index('week'))
sales_weekly

In [None]:
arima_model=auto_arima(sales_weekly['sales'], start_p=1,d=1,start_q=1,
                      max_p=5,max_q=5,max_d=5,m=12,
                        start_P=0,D=1,start_Q=0,max_P=5,max_D=5,max_Q=5,
                      seasonal=True,
                       trace=True,
                      error_action="ignore",
                      suppress_warnings=True,
                      stepwise=True,n_fits=50)

In [None]:
arima_model.summary()

In [None]:
size=int(len(sales_weekly)*0.66)
X_train,X_test=sales_weekly[0:size],sales_weekly[size:len(sales_weekly)]

model=SARIMAX(sales_weekly['sales'],
             order=(1,1,0),
             seasonal_order=(0,1,1,12))
result=model.fit()
result.summary()

In [None]:
start_index=0
end_index=len(X_train)-1
train_prediction=result.predict(start_index,end_index)

st_index=len(X_train)
ed_index=len(sales_weekly)-1
prediction=result.predict(st_index,ed_index)

In [None]:
plt.figure(figsize=(10, 6))
train_prediction.plot(legend=True)
X_train['sales'].plot(legend=True)

In [None]:
print('Absolute Error:', metrics.mean_absolute_error(X_train, train_prediction))
print('MSE:', metrics.mean_squared_error(X_train, train_prediction))
print('RMSE:', np.sqrt(metrics.mean_squared_error(X_train, train_prediction)))

In [None]:
plt.figure(figsize=(10, 4))
prediction.plot(legend=True)
X_test['sales'].plot(legend=True)

In [None]:
print('Absolute Error:', metrics.mean_absolute_error(X_test, prediction))
print('MSE:', metrics.mean_squared_error(X_test, prediction))
print('RMSE:', np.sqrt(metrics.mean_squared_error(X_test, prediction)))

In [None]:
forecast=result.predict(len(sales_weekly), (len(sales_weekly)-1)+56).rename('Forecast')

plt.figure(figsize=(10,4))

X_train['sales'].plot(label="Training",color='green')
train_prediction.plot(legend=True)
X_test['sales'].plot(label="Test",color='blue')
prediction.plot(legend=True)
forecast.plot(label="Forecast",color="red")
plt.legend(loc="lower right")

# Inventory

In [None]:
out_of_stock = pd.merge(inventory, products, on='Product_ID')
out_of_stock = pd.merge(out_of_stock, stores, on='Store_ID')
out_of_stock = out_of_stock[out_of_stock['Stock_On_Hand'] == 0]
out_of_stock

In [None]:
sale_for_out = pd.DataFrame(sales2.groupby([sales2['Date'].dt.year.rename('year'), sales2['Date'].dt.to_period("M").rename('month'), 'Product_Name', 'Store_Location'])['Units'].sum().reset_index())
sale_for_out = sale_for_out[sale_for_out['month'] == '2018-09']
sale_for_out

In [None]:
out_of_stock['Product_Cost'] = out_of_stock['Product_Cost'].str.replace('$', '')
out_of_stock['Product_Cost'] = out_of_stock['Product_Cost'].str.replace(' ', '')
out_of_stock['Product_Price'] = out_of_stock['Product_Price'].str.replace('$', '')
out_of_stock['Product_Price'] = out_of_stock['Product_Price'].str.replace(' ', '')

out_of_stock['Product_Cost'] = out_of_stock['Product_Cost'].astype(float)
out_of_stock['Product_Price'] = out_of_stock['Product_Price'].astype(float)

out_of_stock

In [None]:
out_of_stock.info()

In [None]:
sales_loss = pd.merge(out_of_stock, sale_for_out, on=['Product_Name', 'Store_Location'])
sales_loss['loss'] = sales_loss['Units'] * (sales_loss['Product_Price']-sales_loss['Product_Cost'])
sales_loss

In [None]:
print('According to the last months data, The Loss from out of Stock Products for 1 month is', sales_loss['loss'].sum(), 'Dollars')

In [None]:
in_stock = pd.merge(inventory, products, on='Product_ID')
in_stock = pd.merge(in_stock, stores, on='Store_ID')

in_stock['Product_Cost'] = in_stock['Product_Cost'].str.replace('$', '')
in_stock['Product_Cost'] = in_stock['Product_Cost'].str.replace(' ', '')
in_stock['Product_Price'] = in_stock['Product_Price'].str.replace('$', '')
in_stock['Product_Price'] = in_stock['Product_Price'].str.replace(' ', '')

in_stock['Product_Cost'] = in_stock['Product_Cost'].astype(float)
in_stock['Product_Price'] = in_stock['Product_Price'].astype(float)

in_stock['Total']= in_stock['Product_Price'] * in_stock['Stock_On_Hand']
print('According to the inventory data, Total money tied up in inventory at the toy stores is', in_stock['Total'].sum(), 'Dollars') 