## IMPORTS

In [241]:
import pandas as pd
import matplotlib.pyplot as plt

import numpy as np

from bokeh.plotting import figure
from bokeh.models import Slider, ColumnDataSource, CDSView, IndexFilter, Plot, VBar, HBar, HoverTool, CustomJS, Select, Legend
from bokeh.layouts import row, gridplot, layout, column
from bokeh.io import show, save, curdoc
from bokeh.themes import Theme
from bokeh.palettes import viridis as palette
from bokeh.transform import factor_cmap

## BOKEH THEME

In [242]:
theme = Theme('templates/theme.yaml')
curdoc().theme = theme

## CREATING DF AND VARIABLES

In [243]:
df = pd.read_csv('data/sales_predictions_clean.csv')
outlets = df['Outlet_Identifier'].unique()
outlets= list(outlets)
df.head()

Unnamed: 0,Item_Identifier,Item_Weight,Item_Fat_Content,Item_Visibility,Item_Type,Item_MRP,Outlet_Identifier,Outlet_Establishment_Year,Outlet_Size,Outlet_Location_Type,Outlet_Type,Item_Outlet_Sales
0,FDA15,9.3,Low Fat,0.016047,Dairy,249.8092,OUT049,1999,Medium,Tier 1,Supermarket Type1,3735.138
1,DRC01,5.92,Regular,0.019278,Soft Drinks,48.2692,OUT018,2009,Medium,Tier 3,Supermarket Type2,443.4228
2,FDN15,17.5,Low Fat,0.01676,Meat,141.618,OUT049,1999,Medium,Tier 1,Supermarket Type1,2097.27
3,FDX07,19.2,Regular,0.0,Fruits and Vegetables,182.095,OUT010,1998,Small,Tier 3,Grocery Store,732.38
4,NCD19,8.93,Low Fat,0.0,Household,53.8614,OUT013,1987,High,Tier 3,Supermarket Type1,994.7052


## OUTLETS TOTAL SALES

In [244]:
### CREATING NEW DF to make CDS
outlets_dict = {}
for i in range(len(outlets)):
    outlets_dict[i] = (
    outlets[i],
    df[df['Outlet_Identifier'] == outlets[i]]['Outlet_Establishment_Year'].iloc[0],
    df[df['Outlet_Identifier'] == outlets[i]]['Outlet_Size'].iloc[0],
    df[df['Outlet_Identifier'] == outlets[i]]['Outlet_Location_Type'].iloc[0],
    df[df['Outlet_Identifier'] == outlets[i]]['Outlet_Type'].iloc[0],
    df[df['Outlet_Identifier'] == outlets[i]]['Item_Visibility'].sum().round(5),
    int(df[df['Outlet_Identifier'] == outlets[i]]['Item_Outlet_Sales'].sum()),
    )
outlets_df = pd.DataFrame.from_dict(outlets_dict, orient='index', columns=['Outlet_Identifier', 'Outlet_Establishment_Year', 'Outlet_Size', 'Outlet_Location_Type', 'Outlet_Type', 'Outlet_Visibility_Size', 'Outlet_Total_Sales'])
outlets_cds = ColumnDataSource(outlets_df)

factor = 'Outlet_Type'
factors = outlets_df[factor].unique().tolist()

### STARTING BOKEH PLOT
fig= figure(
    title='Total Sales for Outlets',
    x_axis_label='Outlets', 
    y_axis_label='USD',
    x_range=outlets
)
fig.vbar(
    x='Outlet_Identifier', 
    top='Outlet_Total_Sales', 
    source=outlets_cds, 
    width=.9, 
    fill_color=factor_cmap(factor, palette= palette(len(factors)), factors=factors),
    line_color='black',
    line_width=2,
    alpha=1,
    legend_field = factor,
    )

fig.add_tools(HoverTool(
    tooltips=[('Outlet', '@Outlet_Identifier'),
    ('Establishment Year', '@Outlet_Establishment_Year'),
    ('Outlet_Size', '@Outlet_Size'),
    ('Outlet_Type', '@Outlet_Type'),
    ('Total Sales', '@Outlet_Total_Sales USD')], 
    attachment= 'vertical',
    ))

fig.legend.location = 'top_left'

# show(fig)
save(fig, 'plots/outlets_totalsales.html');

  warn("save() called but no resources were supplied and output_file(...) was never called, defaulting to resources.CDN")
  warn("save() called but no title was supplied and output_file(...) was never called, using default title 'Bokeh Plot'")


## ITEM TYPE SALES BASED ON OUTLET TYPE (GROUPPLOT)

In [271]:
f1 = 'Outlet_Type'
outlets = df[f1].unique().tolist()
Item_Types = df['Item_Type'].unique().tolist()

items_dict= {}
for i in range(len(Item_Types)):
    items_dict[i] = [Item_Types[i], df[(df[f1] == outlets[0]) & (df['Item_Type'] == Item_Types[i])]['Item_Outlet_Sales'].sum().round(2), df[(df[f1] == outlets[1]) & (df['Item_Type'] == Item_Types[i])]['Item_Outlet_Sales'].sum().round(2), df[(df[f1] == outlets[2]) & (df['Item_Type'] == Item_Types[i])]['Item_Outlet_Sales'].sum().round(2), df[(df[f1] == outlets[3]) & (df['Item_Type'] == Item_Types[i])]['Item_Outlet_Sales'].sum().round(2)]
items_df = pd.DataFrame.from_dict(items_dict, orient='index', columns=('Item_Type', *outlets))
items_cds = ColumnDataSource(items_df)

factor = 'Item_Type'
factors = items_df[factor].unique().tolist()
plots = []
for i in range(len(outlets)):
    fig= figure(
        title=f'Total Item Sales for {outlets[i]}',
        x_axis_label='Item Type', 
        y_axis_label='USD',
        y_range=factors
    )
    fig.hbar(
        y='Item_Type', 
        right=outlets[i], 
        left=0,
        source=items_cds, 
        height=.8, 
        fill_color=factor_cmap(factor, palette= palette(len(factors)), factors=factors),
        line_color='black',
        line_width=2,
        alpha=1,
        # legend_field = factor,
        )
    # fig.add_tools(HoverTool(
    #     tooltips=[('Item Type', '@Item_Type'),
    #     ('Total Sales', f'@{outlets[i]} USD')], 
    #     attachment= 'vertical',))
    # fig.xaxis.major_label_orientation = 'vertical'
    # fig.legend.location = 'top_left'
    plots.append(fig)
layout = (gridplot(plots, ncols=4, toolbar_location= None, plot_height=500))
save(layout, f'plots/item_types.html');
# show(layout)

## OUTLET ITEM SALES (GROUPPLOT)

In [5]:
### CREATING NEW DF to make CDS
plots = []
for i in range(10):
    outlet_filter = df['Outlet_Identifier'] == outlets[i]
    outlet_Items = df[outlet_filter].drop(
        axis = 'columns', 
        columns=['Outlet_Identifier', 'Outlet_Establishment_Year', 'Outlet_Size', 'Outlet_Location_Type', 'Outlet_Type']
        )
    outlet_Items['Item_Outlet_Sales'] = outlet_Items['Item_Outlet_Sales'].round()
    outlet_Items['Units_Sold'] = (outlet_Items['Item_Outlet_Sales'] / outlet_Items['Item_MRP']).round()
    outlet_Items['Item_Visibility_Adjusted'] = outlet_Items['Item_Visibility']*100
    outlet_Items['Item_Visibility'] = outlet_Items['Item_Visibility'].round(5)
    outlet_Items_CDS = ColumnDataSource(outlet_Items)

    ### STARTING BOKEH PLOT
    fig_outlet_items_sales = figure(
        title=f'{outlets[i]} Total Sales',
        x_axis_label='Item MRP',
        y_axis_label='Total Sales USD'
        )

    fig_outlet_items_sales.circle(
        x='Item_MRP', 
        y='Item_Outlet_Sales', 
        size='Item_Visibility_Adjusted',
        source=outlet_Items_CDS, 
        color=factor_cmap('Item_Type', palette= palette(len(outlet_Items.Item_Type.unique())),
        factors=outlet_Items.Item_Type.unique()),
        line_color='black',
        alpha=.75,
        muted_alpha=.2
        )

    fig_outlet_items_sales.add_tools(HoverTool(
        tooltips=[
            ('Item', '@Item_Identifier'),
            ('Item Type', '@Item_Type'),
            ('Item Visibility', "@Item_Visibility"),
            ('Item MRP','@Item_MRP'), 
            ('Total Units Sold', '@Units_Sold'),
            ('Total Sales', '@Item_Outlet_Sales USD')
            ], 
        ))
    plots.append(fig_outlet_items_sales)

layout = (gridplot(plots, ncols=2, toolbar_location= None, plot_height=300))
# show(layout)
# save(fig_outlet_items_sales, f'../plots/fig_{outlets[i]}_items_sales.html');
save(layout, f'plots/outlets_items.html');

  warn("save() called but no resources were supplied and output_file(...) was never called, defaulting to resources.CDN")
  warn("save() called but no title was supplied and output_file(...) was never called, using default title 'Bokeh Plot'")


## UNITS SOLD VS MRP

In [284]:
df = pd.read_csv('data/sales_predictions_clean.csv')
outlets = df['Outlet_Identifier'].unique()
outlets= list(outlets)

### CREATING NEW DF to make CDS
plots = []
for i in range(10):
    outlet_filter = df['Outlet_Identifier'] == outlets[i]
    outlet_Items = df[outlet_filter].drop(
        axis = 'columns', 
        columns=['Outlet_Identifier', 'Outlet_Establishment_Year', 'Outlet_Size', 'Outlet_Location_Type', 'Outlet_Type']
        )
    outlet_Items.sort_values('Item_MRP')
    outlet_Items['Item_Outlet_Sales'] = outlet_Items['Item_Outlet_Sales'].round()
    outlet_Items['Units_Sold'] = (outlet_Items['Item_Outlet_Sales'] / outlet_Items['Item_MRP']).round()
    outlet_Items['Item_Visibility_Adjusted'] = outlet_Items['Item_Visibility']*100
    outlet_Items['Item_Visibility'] = outlet_Items['Item_Visibility'].round(5)
    outlet_Items_CDS = ColumnDataSource(outlet_Items)

    ### STARTING BOKEH PLOT
    fig = figure(
        title=f'{outlets[i]} Unit Sales',
        x_axis_label='Item MRP',
        y_axis_label='Units Sold'
        )

    fig.circle(
        x='Item_MRP', 
        y='Units_Sold', 
        size='Item_Visibility_Adjusted',
        source=outlet_Items_CDS, 
        color=factor_cmap('Item_Type', palette= palette(len(outlet_Items.Item_Type.unique())),
        factors=outlet_Items.Item_Type.unique()),
        line_color='black',
        alpha=.75,
        muted_alpha=.2
        )

    fig.add_tools(HoverTool(
        tooltips=[
            ('Item', '@Item_Identifier'),
            ('Item Type', '@Item_Type'),
            ('Item Visibility', "@Item_Visibility"),
            ('Item MRP','@Item_MRP'), 
            ('Total Units Sold', '@Units_Sold'),
            ('Total Sales', '@Item_Outlet_Sales USD')
            ], 
        ))
    plots.append(fig)

layout = (gridplot(plots, ncols=2, toolbar_location= None, plot_height=300))
# show(layout)
# save(fig_outlet_items_sales, f'../plots/fig_{outlets[i]}_items_sales.html');
save(layout, f'plots/mrp_vs_unitssold.html');

  warn("save() called but no resources were supplied and output_file(...) was never called, defaulting to resources.CDN")
  warn("save() called but no title was supplied and output_file(...) was never called, using default title 'Bokeh Plot'")


## Visibility / Total Sales / Item Category

In [350]:
df = pd.read_csv('data/sales_predictions_clean.csv')
df['Item_Outlet_Sales'] = df['Item_Outlet_Sales'].round()
df['Units_Sold'] = (df['Item_Outlet_Sales'] / df['Item_MRP']).round()
df['Item_Visibility_Adjusted'] = df['Item_Visibility']*100
df['Item_Visibility'] = df['Item_Visibility'].round(5)
# outlets = df['Outlet_Identifier'].unique()
# outlets= list(outlets)
item_types = df['Item_Type'].unique().tolist()
df_vis = df[df['Item_Type'] == item_types[0]].sort_values('Item_Visibility')[['Item_Identifier','Item_Type','Item_Visibility', 'Item_MRP', 'Units_Sold', 'Item_Outlet_Sales']]
df_vis_CDS = ColumnDataSource(df_vis)
plots = []
for i in range(len(item_types)):
    df_vis = df[df['Item_Type'] == item_types[i]].sort_values('Item_Visibility')[['Item_Identifier','Item_Type','Item_Visibility', 'Item_MRP', 'Units_Sold', 'Item_Outlet_Sales']]
    df_vis_CDS = ColumnDataSource(df_vis)
    fig = figure(
            title= f'{item_types[i]} Unit Sales',
            x_axis_label='Item Visibility',
            y_axis_label='Units Sold',
            x_range=(0.0001,.2),
            y_range=(0,60)
            )

    fig.circle(
        x='Item_Visibility', 
        y='Units_Sold', 
        size=10,
        source=df_vis_CDS, 
        color=factor_cmap('Item_Type', palette= palette(len(outlet_Items.Item_Type.unique())),
        factors=outlet_Items.Item_Type.unique()),
        line_color='black',
        alpha=.75,
        muted_alpha=.2
        )

    fig.add_tools(HoverTool(
        tooltips=[
            ('Item', '@Item_Identifier'),
            ('Item Type', '@Item_Type'),
            ('Item Visibility', "@Item_Visibility"),
            ('Item MRP','@Item_MRP'), 
            ('Total Units Sold', '@Units_Sold'),
            ('Total Sales', '@Item_Outlet_Sales USD')
            ], 
        ))
    plots.append(fig)

layout = (gridplot(plots, ncols=4, toolbar_location= None, plot_height=300))
show(layout)
# save(fig_outlet_items_sales, f'../plots/fig_{outlets[i]}_items_sales.html');
save(layout, 'plots/item_vis_vs_units_sold.html');




In [323]:
df.sort_values('Item_Visibility')[['Item_Identifier','Item_Type','Item_Visibility', 'Item_MRP', 'Units_Sold', 'Item_Outlet_Sales']]


Unnamed: 0,Item_Identifier,Item_Type,Item_Visibility,Item_MRP,Units_Sold,Item_Outlet_Sales
3599,NCS30,Household,0.00000,128.9652,14.0,1808.0
386,FDC50,Canned,0.00000,96.4094,13.0,1238.0
2397,FDZ47,Baking Goods,0.00000,98.7042,19.0,1885.0
1113,FDB21,Fruits and Vegetables,0.00000,242.9854,28.0,6767.0
6183,DRN37,Soft Drinks,0.00000,166.5158,21.0,3509.0
...,...,...,...,...,...,...
2855,NCE31,Household,0.30939,33.2216,4.0,138.0
7250,FDT24,Baking Goods,0.31109,79.2328,2.0,154.0
1805,FDJ56,Fruits and Vegetables,0.32112,100.7700,2.0,200.0
3750,NCZ18,Household,0.32578,252.7698,3.0,761.0
