In [None]:
from IPython.core.display import display, HTML, Javascript
html_contents ="""
<!DOCTYPE html>
<html lang="en">
    <head>
        <link rel="stylesheet" href="https://www.w3schools.com/w3css/4/w3.css">
        <link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Benne">
        <link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Open Sans">
        <style>
        .title-section {
            font-family: "Benne", Arial, sans-serif;
            color: "#6A8CAF";
            }
        body {
            font-family: "Open Sans", Arial, sans-serif;
            }
        </style>
    </head>
</html>
"""
HTML(html_contents)

# <span class="title-section w3-xxxlarge">Store Sales Forecasting</span>

[Corporación Favorita](https://www.corporacionfavorita.com/) is a large Ecuadorian-based grocery retailer. Born in 1952 as La Favorita, in center of Quito. In 1957 open its first supermarket, the first self-service store in the country.

**Context**

Forecasts aren’t just for meteorologists. Governments forecast economic growth. Scientists attempt to predict the future population. And businesses forecast product demand—a common task of professional data scientists. Forecasts are especially relevant to brick-and-mortar grocery stores, which must dance delicately with how much inventory to buy. Predict a little over, and grocers are stuck with overstocked, perishable goods. Guess a little under, and popular items quickly sell out, leading to lost revenue and upset customers. More accurate forecasting, thanks to machine learning, could help ensure retailers please customers by having just enough of the right products at the right time.

Current subjective forecasting methods for retail have little data to back them up and are unlikely to be automated. The problem becomes even more complex as retailers add new locations with unique needs, new products, ever-transitioning seasonal tastes, and unpredictable product marketing.

**Potential Impact**

If successful, you'll have flexed some new skills in a real world example. For grocery stores, more accurate forecasting can decrease food waste related to overstocking and improve customer satisfaction. The results of this ongoing competition, over time, might even ensure your local store has exactly what you need the next time you shop.

In [None]:
# import libraries
import pandas as pd
import numpy as np
import datetime as dt
import pandas_datareader as pdr
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib import ticker
plt.rcParams['font.sans-serif'] = ['Verdana']

In [None]:
# load dataset
store = pd.read_csv("../input/store-sales-time-series-forecasting/stores.csv")
train = pd.read_csv("../input/store-sales-time-series-forecasting/train.csv")
test = pd.read_csv("../input/store-sales-time-series-forecasting/test.csv")
transactions = pd.read_csv("../input/store-sales-time-series-forecasting/transactions.csv")
oil = pd.read_csv("../input/store-sales-time-series-forecasting/oil.csv")
holidays_events = pd.read_csv("../input/store-sales-time-series-forecasting/holidays_events.csv")

## <span class="title-section w3-xxlarge">Store by State</span>

In [None]:
plt.rcParams['figure.dpi'] = 300
fig = plt.figure(figsize=(3, 9), facecolor='#F7F7F7')
gs = fig.add_gridspec(2, 1)
gs.update(wspace=1.5, hspace=1.1)

##########STORE-BY-STATE##########
store_by_state = store.groupby('state')
store_by_state.first().reset_index()
temp = store_by_state.first().reset_index()
temp = temp.sort_values('store_nbr', ascending=False)
background_color = "#F7F7F7"
color_map = ["#E8F0F2" for _ in range(20)]
color_map[0] = "#A2DBFA" #F2A154 79B4B7
sns.set_palette(sns.color_palette(color_map))

ax0 = fig.add_subplot(gs[0, 0])
ax0.set_facecolor(background_color)
ax0.set(xlim=(0, 60))
ax0.set(ylim=(0, 20))
for s in ["left", "right", "top", "bottom"]:
    ax0.spines[s].set_visible(False)

#graph
ax0 = sns.barplot(ax=ax0, y=temp['state'], x=temp['store_nbr'], 
                      zorder=2, linewidth=0.3, edgecolor="#7F7C82", 
                      orient='h', saturation=0.9, alpha=0.7)

#format axis
ax0.axes.get_xaxis().set_visible(False)
ax0.set_ylabel("State",fontsize=3, weight='semibold')
ax0.tick_params(labelsize=3, width=0.2, length=1)
ax0.axvline(linewidth=1, color="#7F7C82")

#title
x0, x1 = ax0.get_xlim()
y0, y1 = ax0.get_ylim()
ax0.text(x0, y1-1, 'Store by State', fontsize=4, ha='left', va='top', weight='semibold')

#data label
i=0
for p in ax0.patches:
    value = f'{p.get_width():4,.0f}'
    x = p.get_x() + 1
    i+=1
    y = p.get_y() + p.get_height() / 2 
    ax0.text(x, y, value, ha='center', va='center', fontsize=2, weight='semibold') 

x_format = ticker.FuncFormatter(lambda x, p: format(int(x), ','))
ax0.xaxis.set_major_formatter(x_format)

plt.show()

## <span class="title-section w3-xxlarge">Yearly Transactions</span>

In [None]:
plt.rcParams['figure.dpi'] = 300
fig = plt.figure(figsize=(3, 5), facecolor='#F7F7F7')
gs = fig.add_gridspec(2, 1)
gs.update(wspace=1.5, hspace=1.1)

##########TRANSACTIONS##########
transactions['year'] = pd.DatetimeIndex(transactions['date']).year
temp = transactions.groupby('year', as_index=False).sum()
temp = temp.drop(4)
temp = temp.sort_values('year', ascending=False)

background_color = "#F7F7F7"
color_map = ["#E8F0F2" for _ in range(20)]
color_map[2] = "#A2DBFA" #F2A154 79B4B7
sns.set_palette(sns.color_palette(color_map))

ax0 = fig.add_subplot(gs[0, 0])
ax0.set_facecolor(background_color)
ax0.set(xlim=(0, 6))
ax0.set(ylim=(0, 35000))
for s in ["right", "top"]:
    ax0.spines[s].set_visible(False)

#graph
ax0 = sns.barplot(ax=ax0, y=temp['transactions']/1000, x=temp['year'], 
                      zorder=2, linewidth=0.3, edgecolor="#7F7C82", 
                      orient='v', saturation=0.9, alpha=0.7)

#format axis
# ax0.axes.get_xaxis().set_visible(False)
ax0.set_ylabel("Transactions",fontsize=3, weight='semibold')
ax0.set_xlabel("Year",fontsize=3, weight='semibold')
ax0.tick_params(labelsize=3, width=0.2, length=1)
# ax0.axhline(linewidth=1, color="#7F7C82")

#title
x0, x1 = ax0.get_xlim()
y0, y1 = ax0.get_ylim()
ax0.text(x0, y1+5000, 'Transactions by Year', fontsize=4, ha='left', va='top', weight='semibold')

i=0
for p in ax0.patches:
    value = f'{p.get_height():,.0f} K'
    x = p.get_x() + p.get_width() / 2 
    y = p.get_y() + p.get_height() + 1000
    ax0.text(x, y, value, ha='center', va='center', fontsize=2.5, weight='semibold') 

y_format = ticker.FuncFormatter(lambda x, p: format(int(x), ','))
ax0.yaxis.set_major_formatter(y_format)

plt.show()

The chart above shows number of transactions yearly. The highest is in 2015 then slightly down in 2016. The data in 2017 only until August, it is taken out of the chart.

<span class="title-section w3-xlarge">It is not finished yet!</span>

This is just a beginning. The work has just started, if you like this notebook, you know it is <span class="w3-tag w3-large"><b>FREE</b></span> to click the upvote button.

Thanks for reading this notebook. If you have any feedback or comments please write it down the comment section below.