In [None]:
from pathlib import Path
import json
import numpy as np 
import pandas as pd 
from learntools.time_series.style import *
from wordcloud import WordCloud
import matplotlib.pyplot as plt

from urllib.request import urlopen
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.io as pio

pio.templates.default = 'none'

px.defaults.template = 'none'
px.defaults.color_continuous_scale = px.colors.sequential.Blackbody
px.defaults.width = 1000
px.defaults.height = 500






# read in store sales
comp_dir = Path('../input/store-sales-time-series-forecasting')
store_sales = pd.read_csv(
    comp_dir / 'train.csv',
    #usecols=['store_nbr', 'family', 'date', 'sales'],
    dtype={
        'store_nbr': 'int32',
        #'family': 'category',
        'sales': 'float32',
        'onpromotion': 'float32',
    },
    parse_dates=['date'],
    infer_datetime_format=True,
)
store_sales = store_sales.set_index(['store_nbr', 'family', 'date']).sort_index()
#store_sales['date'] = store_sales.date.dt.to_period('D')

# Calculate Average sales per store and family and add to data
store_sales = store_sales.join(store_sales.groupby(['store_nbr', 'family']).sales.mean(),
                               on=['store_nbr', 'family'],
                               rsuffix='_avg')
# Calculate relative store sales by dividing sales by sales_av
store_sales['sales_rel'] = store_sales['sales'] / store_sales['sales_avg'].replace(0,1)


# Read in store information
strs = pd.read_csv(comp_dir / 'stores.csv')

# Create canton column for chloropeth map in stores
ct = strs.groupby(['city', 'state']).size().reset_index()
ct['canton'] = ct['city'].str.upper()
ct.loc[ ct['city'] == 'Puyo', 'canton'] = 'PASTAZA'
ct.loc[ ct['city'] == 'Libertad', 'canton'] = 'BALAO'
strs = strs.join(ct.set_index('city')['canton'], on='city')

# Join stores data to sales data
store_sales = store_sales.join(strs.set_index('store_nbr'), on='store_nbr')


# load Ecuador geoJSON
with urlopen('https://raw.githubusercontent.com/pablocarreraest/Ecuador-geoJSON/master/geojson/cantons.geojson') as response:
    counties = json.load(response)

# use geoJSON to create a data frame with all the possible cantons
cantons = []
for f in counties["features"]:
    f["properties"]["cantonup"] = f["properties"]["canton"].upper()
    cantons.append({"canton": f["properties"]["canton"].upper(),
                    "iso_1": f["properties"]["iso_1"],
                    "iso_2": f["properties"]["iso_2"],
                    "province": f["properties"]["province"]
                   })
cantons = pd.DataFrame(cantons)



# Corporación Favorita Store Sales Dashboard
This notebook is designed to interactively explore store sales from Corporación Favorita, a large Ecuadorian-based grocery retailer.

The provided data features number of sold items per product family, store number and day from 2013 to August of 2017. Favorita has 54 stores 22 cantons and groups their product into 33 different families. The visualizations aim to allow to explore all of these dimensions.

In [None]:
fig = make_subplots(
    rows=2, cols=3,
    column_widths=[0.3, 0.3, 0.4],
    shared_yaxes=True,
    specs=[[{"type": "choropleth"}, {"type": "choropleth"}, {"type": "choropleth", "rowspan": 2}],
           [{"type": "choropleth"}, {"type": "choropleth"}, None]
          ])

rw = 0
tdf = store_sales.reset_index()
for i, year in enumerate(tdf.date.dt.year.unique()):
    q = (tdf.date.dt.year==year) & (tdf.sales > 0)
    cl = i%2+1
    rw = rw+1 if i%2==0 else rw
    if i == 4:
        rw = 1
        cl = 3
    
    # left join from cantons i.o. to display all cantons
    tmp = cantons.join(
        tdf[q].groupby('canton').sales.mean(), 
        on='canton').fillna(0)
    
    # ad map
    fig.add_trace(
        go.Choropleth(
            geojson=counties, 
            locations=tmp['canton'], z=tmp['sales'],
            name=str(year),
            showscale=i==4,
            featureidkey='properties.cantonup',
        ),
        row=rw, col=cl
        )
    
    # Add year text
    fig.add_trace(
        go.Scattergeo(
            showlegend = False,
            lon = [-78],
            lat = [1.5],
            text = f'{year}',
            mode = 'text',
            textfont={ "size":20}),
        row=rw, col=cl)

# set map range
fig.update_geos(
    scope='south america',
    lataxis_range=[-5,2], 
    lonaxis_range=[-81, -75])

fig.update_layout(height=1000, title='Average Sales per Canton and Year')

fig.show()

In [None]:

tdf = store_sales.loc[:, :, '2017'].reset_index()

fig = px.line(tdf, 
              x='date',
              line_group='store_nbr',
              animation_frame='family',
              y='sales',
              markers=True,
              title='Sales 2017 per Day, Store and Product Family')

fig.layout.updatemenus =[
                        dict(active=0, buttons=list([
                            dict(label=f"{s}",
                                 method="update",
                                 args=[{"visible": [f"store_nbr={s}" in e.hovertemplate for e in fig.data] },
                                       #{"title": "Store Number"}
                                      ]) for s in  tdf.store_nbr.unique()
                        ]))]

#fig.update_layout(
#    xaxis=dict(
#        rangeslider=dict(
#            visible=True
#        ),
#        type="date"
#    ),
#    height=700
#)

fig.show()

In [None]:
tdf = store_sales.loc[:, :, '2017'].reset_index()

fig = px.box(tdf[tdf.sales>0], 
             y='sales',
             color='family',
             animation_frame='store_nbr',
             title='2017 Sales Boxplot per Store and Product Family'
            )

fig.show()

In [None]:

top10 = store_sales.groupby('store_nbr').sales.sum().nlargest(10)
tdf = store_sales.loc[top10.index.values, :, :].reset_index()
tdf['year'] = tdf.date.dt.year
tdf['store'] = 'store ' + tdf.store_nbr.astype('str')
tdf= tdf.groupby(['store','family','year']).sum().reset_index()
fig = px.bar(tdf, 
             x='store', 
             y='sales', 
             color='family',
             animation_frame='year',
             title="Top 10 Stores by total sales")
fig.show()

In [None]:

low10 = store_sales.groupby('store_nbr').sales.sum().nsmallest(10)
fig = make_subplots(
    rows=1, cols=2,
    #column_widths=[0.3, 0.3, 0.4],
    shared_yaxes=True,
    specs=[[{"type": "choropleth"}, {"type": "choropleth"}]
          ])

rw = 0
tdf = store_sales.reset_index()
for i, stores in enumerate([top10, low10]):
    q = (tdf.store_nbr.isin(stores.index.values))
    levels = ['store_nbr', 'type', 'cluster','canton']
    
    # left join from cantons i.o. to display all cantons
    tmp = cantons.join(
        tdf[q].groupby(levels).mean().reset_index(levels[:-1]), 
        on='canton').fillna(0)
    
    # ad map
    fig.add_trace(
        go.Choropleth(
            geojson=counties, 
            locations=tmp['canton'], z=tmp['sales'],
            showscale=False,
            colorscale= 'Greens' if i==0 else 'Reds',
            featureidkey='properties.cantonup'
        ),
        row=1, col=i+1
        )
    
    # Add text
    fig.add_trace(
        go.Scattergeo(
            showlegend = False,
            lon = [-78],
            lat = [1.5],
            text = 'Top' if i==0 else 'Low',
            mode = 'text',
            textfont={ "size":20}),
        row=1, col=i+1)

# set map range
fig.update_geos(
    scope='south america',
    lataxis_range=[-5,2], 
    lonaxis_range=[-81, -75])

fig.update_layout(title='Top and Lowest 10 Selling Stores')

fig.show()

In [None]:
#top5 = store_sales.groupby('family').sales.sum().nlargest(5)
#tdf = store_sales.loc[:,top5.index.values,'2014':].reset_index()
#tdf['year'] = tdf.date.dt.year
##tdf['store'] = 'store ' + tdf.store_nbr.astype('str')
#tdf= tdf.groupby(['store_nbr','family','year']).sum().reset_index()
##tdf['familyy'] = tdf.family.astype('str')
#fig = px.scatter(tdf, 
#             x='sales',
#             y='store_nbr',
#             size='onpromotion', 
#             color='family',
#             animation_frame='year',
#             title="Top 10 Stores by total sales",
#             log_x=True,
#             size_max=60)
#fig.show()