In [1]:
from jupyter_dash import JupyterDash

from dash import Dash, dcc, html
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import pandas as pd

app = JupyterDash(__name__)

colors = {
    'background': '#a8bf78',
    'text': '#78511f'
}


df = pd.DataFrame({
    "Promo Code": ["No Promo", "SOLEIL10", "BIENVENUE20"],
    "Lifetime Customer Value": [336.12, 365.15, 394.42],
})

df2 = pd.DataFrame({
    "Promo Code": ["No Promo", "SOLEIL10", "BIENVENUE20"],
    "Mean Number of Orders": [2.80, 3.29, 2.71],
})

df3 = pd.DataFrame({
    "Product Category": ['Vegetables', 'Baskets/Paniers', 'Meat and charcuterie', 'Fruits',
                         'Dairy and Cheese Products','Fish and Seafood', 'Pantry/Groceries',
                         'Bread and baked goods', 'Prepared Meals and Dips','Eggs','Drinks','Bulk',
                         'Plants, Flowers and Garden','Honey, Syrups, Jams, Nut Butters','Other products', 
                         'Snacks','Beauty and Body Products','Household Products',],
    "Total Category Value": [525140, 369522, 287166, 285775, 171993, 126786, 120391, 112346, 67242, 35636,
                             33817, 28117, 25683, 23310, 18124, 16621, 4721, 2241],
})

df4 = pd.DataFrame({
    "Product Category": ['Vegetables','Meat and charcuterie','Fruits','Dairy and Cheese Products',
                         'Pantry/Groceries','Baskets/Paniers','Fish and Seafood','Bread and baked goods',
                         'Prepared Meals and Dips','Eggs','Drinks','Honey, Syrups, Jams, Nut Butters',
                         'Snacks','Plants, Flowers and Garden','Beauty and Body Products','Household Products',
                         'Other products'],
    "Total Category Value - 2022": [97977, 50876, 45081, 30006, 28280, 27916, 20758, 18747, 12887, 5499,
                             5165, 3368, 2759, 2567, 474, 391, 231],
})

df5 = pd.DataFrame({
    "Product Category - Excluding Bulk and Other": ['Fish and Seafood', 'Meat and charcuterie', 'Plants, Flowers and Garden',
                                                   'Pantry/Groceries', 'Prepared Meals and Dips', 'Household Products',
                                                   'Dairy and Cheese Products', 'Baskets/Paniers', 'Snacks', 'Honey, Syrups, Jams, Nut Butters',
                                                   'Fruits', 'Beauty and Body Products', 'Bread and baked goods', 'Eggs', 'Vegetables'],
    "Regression Coefficient Rounded to 2 decimal Places": [25.04, 24.06, 23.57, 20.11, 19.39, 18.56,
                                                          16.35, 15.40, 14.32, 14.06, 13.29, 12.13, 9.52, 8.64, 4.89],
})

df6 = pd.DataFrame({
    "Product Category - Excluding Bulk and Other": ['Vegetables', 'Fruits', 'Fish and Seafood', 'Meat and charcuterie', 'Bread and baked goods', 
                                                    'Dairy and Cheese Products', 'Prepared Meals and Dips', 'Plants, Flowers and Garden', 'Pantry/Groceries', 
                                                    'Snacks', 'Drinks', 'Baskets/Paniers',
                                                    'Beauty and Body Products', 'Eggs', 'Honey, Syrups, Jams, Nut Butters', 'Household Products'],
    "Regression Coefficient": [-0.4938, -0.2449, 0.3241, -0.3198, 0.0823, 0.326, -0.0627,
                                                           0.0023, -1.3469, 0.0963, -0.5361, 1.4985, 1.055, 0.4829,
                                                           -0.2275, -0.1387],
})

fig9 = px.bar(df6, x="Product Category - Excluding Bulk and Other", 
              y="Regression Coefficient",
              color="Product Category - Excluding Bulk and Other", barmode="group", 
             color_discrete_sequence=['#2b3b00', '#f7b100', '#eb4200', '#2a6082', '#8a1e3b'])

fig9.update_layout(
    plot_bgcolor=colors['background'],
    paper_bgcolor=colors['background'],
    font_color=colors['text']
)
fig9.update_traces(width=1)

fig8 = px.bar(df5, x="Product Category - Excluding Bulk and Other", 
              y="Regression Coefficient Rounded to 2 decimal Places",
              color="Product Category - Excluding Bulk and Other", barmode="group", 
             color_discrete_sequence=['#2b3b00', '#f7b100', '#eb4200', '#2a6082', '#8a1e3b'])

fig8.update_layout(
    plot_bgcolor=colors['background'],
    paper_bgcolor=colors['background'],
    font_color=colors['text']
)
fig8.update_traces(width=1)


fig6 = px.bar(df3, x="Product Category", y="Total Category Value", color="Product Category", barmode="group", 
             color_discrete_sequence=['#2b3b00', '#f7b100', '#eb4200', '#2a6082', '#8a1e3b'])

fig6.update_layout(
    plot_bgcolor=colors['background'],
    paper_bgcolor=colors['background'],
    font_color=colors['text']
)
fig6.update_traces(width=1)

fig7 = px.bar(df4, x="Product Category", y="Total Category Value - 2022", color="Product Category", barmode="group", 
             color_discrete_sequence=['#2b3b00', '#f7b100', '#eb4200', '#2a6082', '#8a1e3b'])

fig7.update_layout(
    plot_bgcolor=colors['background'],
    paper_bgcolor=colors['background'],
    font_color=colors['text']
)
fig7.update_traces(width=1)

fig10 = px.imshow([[154, 435],
                 [135, 585]], labels=dict(x="Accuracy = 0.56", y="Negative/Positive", color="Number"),
                text_auto=True, color_continuous_scale='RdBu_r')



fig10.update_layout(
    plot_bgcolor=colors['background'],
    paper_bgcolor=colors['background'],
    font_color=colors['text']
)

orderspercustomer = pd.read_csv('orderspercustomer.csv')
orderspercustomer_nopromo = orderspercustomer.loc[(orderspercustomer['soleil10'] == 0) & (orderspercustomer['bienvenue20'] == 0)]
orderspercustomer_promo = orderspercustomer.loc[(orderspercustomer['soleil10'] == 1) | (orderspercustomer['bienvenue20'] == 1)]
orderspercustomer_10 = orderspercustomer.loc[(orderspercustomer['soleil10'] == 1)]
orderspercustomer_20 = orderspercustomer.loc[(orderspercustomer['bienvenue20'] == 1)]

Percentage =['No Promo', 'SOLEIL20', 'BIENVENUE20']



fig = px.bar(df, x="Promo Code", y="Lifetime Customer Value", color="Promo Code", barmode="group", 
             color_discrete_sequence=['#2b3b00', '#f7b100', '#eb4200'])


fig.update_layout(
    plot_bgcolor=colors['background'],
    paper_bgcolor=colors['background'],
    font_color=colors['text']
)

fig2 = px.bar(df2, x="Promo Code", y="Mean Number of Orders", color="Promo Code", barmode="group", 
             color_discrete_sequence=['#2b3b00', '#f7b100', '#eb4200'])

fig2.update_layout(
    plot_bgcolor=colors['background'],
    paper_bgcolor=colors['background'],
    font_color=colors['text']
)

fig3 = go.Figure(data=[
    go.Bar(name='Ordered again', x=Percentage, y=[45, 47, 41], marker_color='#2b3b00'),
    go.Bar(name='Did not order again', x=Percentage, y=[55, 53, 59], marker_color='#eb4200')
])


fig3.update_layout(
    plot_bgcolor=colors['background'],
    paper_bgcolor=colors['background'],
    font_color=colors['text']
)

fig4 = px.histogram(orderspercustomer_nopromo, x='Quantité', title="No Promo Codes", color_discrete_sequence=['#2b3b00'])

fig4.update_layout(
    plot_bgcolor=colors['background'],
    paper_bgcolor=colors['background'],
    font_color=colors['text']
)

fig5 = go.Figure()

fig5.add_trace(go.Histogram(x=orderspercustomer_20['Total payé'], nbinsx =20, name="Bienvenue 20", marker_color='#eb4200'))
fig5.add_trace(go.Histogram(x=orderspercustomer_10['Total payé'], nbinsx =20, name="Soleil 10", marker_color='#f7b100'))

fig5.update_layout(
    plot_bgcolor=colors['background'],
    paper_bgcolor=colors['background'],
    font_color=colors['text'],
    title="Bienvenue 20 v.s. Soleil 10"
)

app.layout = html.Div(style={'backgroundColor': colors['background']}, children=[
    html.H1(
        children='Panier Québécois Sales Analysis',
        style={
            'textAlign': 'center',
            'color': colors['text']
        }
    ),

    html.Div(children='An in-depth look at coupon use and product category as predictors\
        of customer value and customer retention.', style={
        'textAlign': 'center',
        'color': colors['text']
    }),

    
    
    html.Div(children=[
        html.H2(className='heading', children='Coupon Use',
        style={
            'textAlign': 'center',
            'color': colors['text']
        }),
        
        html.P(children='Are customers who use a promo code on their first order more valuable, and do they place more\
        orders?', style={
        'textAlign': 'center',
        'color': colors['text']

        }),

        html.P(children='To answer this question we first compare the lifetime order value of those customers who used\
                    one of two promo codes, bienvenue20 and soleil10 between July 1st and December 31st 2021.\
                    Bienvenue20 allows for 20% off a first order over\
                    $100 and soleil10 allows for 10% off a first order. A third promo code, bienvenue10, was excluded\
                    due to low usage (3 uses during the selected period.) We cut off the orders on December 31st so as to\
                    be able to have sufficient subsequent data to compare after first orders', style={
        'textAlign': 'left',
        'color': colors['text']

        }),
    ]),
        
   html.H2(
        children='Average Total Customer by Promo Code - Linear Regression',
        style={
            'textAlign': 'center',
            'color': colors['text']
            }
    ),
    
    dcc.Graph(
        id='example-graph-1',
        figure=fig
        ),

    html.Div(children=[
        html.P(children='Those customers in the sample period who did not use either BIENVENUE20 or SOLEIL10 spent on average\
        336.12 in total. Those who used SOLEIL10 spent 365.15, wheras those who used BIENVENUE20 spent 394.42.\
        However, these differences may be due to chance. We found p-values of 0.38 for BIENVENUE20\
        and 0.8 for SOLEIL20, meaning there is a 38% and 80% chance respectively of finding a more extreme value by\
        random sampling if there were in fact no difference', style={
        'textAlign': 'left',
        'color': colors['text']
        }),
       
        html.P(children='The high amount of uncertainty could be due to the relatively small sample sizes. BIENVENUE20\
        was used 58 times during the sample period wheras SOLEIL20 was used only 17 times. BIENVENUE10 was excluded because\
        it was only used 2 times! With more data, it might be possible to increase our confidence in the results', style={
        'textAlign': 'left',
        'color': colors['text']

        }),
        
    ]),
    
    html.H2(
        children='Average Number of Orders by Promo Code - Linear Regression',
        style={
            'textAlign': 'center',
            'color': colors['text']
            }
    ),
    
    dcc.Graph(
        id='-graph-2',
        figure=fig2
        ),
    
    html.Div(children=[
        html.P(children='Those customers in the sample period who did not use either BIENVENUE20 or SOLEIL10 made\
        2.9 orders on average. Those who used SOLEIL10 made 3.29 orders on average (p-value of 0.54).\
        Those who used BIENVENUE20 made 2.71 orders on average (p-value of 0.83).\
        Overall, it is interesting that those who used SOLEIL10 seemed to make notably more orders, although there is\
        still around a 50% chance that the observed difference is random', style={
        'textAlign': 'left',
        'color': colors['text']
        }),
    ]),

    html.H2(
        children='Average Number of Orders by Promo Code - Descriptive Statistics',
        style={
            'textAlign': 'center',
            'color': colors['text']
            }
    ),
    
    html.Div(children=[
        html.P(children='To better understand the differences in ordering patterns, we can visualize them in different ways'
        , style={
        'textAlign': 'left',
        'color': colors['text']
        }),
    ]),
    
    html.H3(
        children='Frequency - Histograms',
        style={
            'textAlign': 'center',
            'color': colors['text']
            }
    ),
    
    html.Div(children=[
        html.P(children='Below are two histograms showing the distributions of number of order per customer.\
        The first shows the distribution for customers who did not use either studied promo code\
        The second compares BIENVENUE20 to SOLEIL10. These look quite different. Although it is not conslusive, SOLEIL 10\
        seems to show less of a sharp drop-off between one order and more orders.'
        , style={
        'textAlign': 'left',
        'color': colors['text']
        }),
    ]),
    
    dcc.Graph(
        id='graph-4',
        figure=fig4
        ),
    
    dcc.Graph(
        id='graph-5',
        figure=fig5
        ),
    
    html.H3(
        children='Percentage of Customers Who Ordered More than Once by Promo Code',
        style={
            'textAlign': 'center',
            'color': colors['text']
            }
    ),
    
    html.Div(children=[
        html.P(children='Here we compare the amount of customers who ordered once in each catagory to those who ordered\
        more than once. BIENVENUE20 has a higher proportion of single-order customers than SOLEIL10 or customers without\
        these promo codes. Again we do not have enough data to be conclusive, but BIENVENUE20 appears to underperform for\
        customer retention.'
        , style={
        'textAlign': 'left',
        'color': colors['text']
        }),
    ]),

    dcc.Graph(
        id='graph-3',
        figure=fig3
        ),


        html.H2(
        children='Part 2 - Product Categories',
        style={
            'textAlign': 'center',
            'color': colors['text']
            }
    ),
    
    html.Div(children=[
        html.P(children='For the second part of this analysis, I will endevour to answer questions around product categories.\
        Firstly, what categories of items are highest value? Secondly, what product categories predict high-value customers?'
        , style={
        'textAlign': 'left',
        'color': colors['text']
        }),
        
        html.P(children='The main challenge in this section was determining how to encode product categories. For instance,\
        bananas and oranges are both in a fruits category but also in separate citrus and exotic fruits categories.\
        I reduced the number of categories from ~200 to 17'
        , style={
        'textAlign': 'left',
        'color': colors['text']
        }),
    ]),

    
    html.H2(
        children='What product categories are highest value?',
        style={
            'textAlign': 'center',
            'color': colors['text']
            }
    ),
    
    html.H3(
        children='Descriptive Statistics',
        style={
            'textAlign': 'center',
            'color': colors['text']
            }
    ),
    dcc.Graph(
        id='6',
        figure=fig6
        ),
    
    html.Div(children=[
        html.P(children='The highest value category was unsurprisingly vegetables, followed by multi-item baskets and meat.\
        Fruit, seafood, and dairy all ranked highly.'
        , style={
        'textAlign': 'left',
        'color': colors['text']
        }),
        
    ]),
    
     html.H3(
        children='Did this change in 2022?',
        style={
            'textAlign': 'center',
            'color': colors['text']
            }
    ),
    
    dcc.Graph(
        id='7',
        figure=fig7
        ),
    
    html.Div(children=[
        html.P(children='Examining 2022 data, we see that baskets have decreased in importance and grocery items have\
        increased in importance. This may in part be due to bulk being declassified and included in grocery items.'
        , style={
        'textAlign': 'left',
        'color': colors['text']
        }),
        
    ]),
    
    html.H2(
        children='Which Product Categories are Associated with Higher Value Orders?',
        style={
            'textAlign': 'center',
            'color': colors['text']
            }
    ),
    
    html.H3(
        children='Linear Regression',
        style={
            'textAlign': 'center',
            'color': colors['text']
            }
    ),
    
    html.Div(children=[
        html.P(children='Each product category was assigned an equal weight (i.e. capped at 1 instance per order)\
        and the presence or absence\
        was used as a predictor of order value.'
        , style={
        'textAlign': 'left',
        'color': colors['text']
        }),
        
    ]),
    
    html.Div(children=[
        html.P(children='For this regression, there was an R-squared of 0.434 and a p-value of <0.05 accross all categories.\
        The constant was 40.70'
        , style={
        'textAlign': 'left',
        'color': colors['text']
        }),
        
    ]),
    
    
    dcc.Graph(
        id='8',
        figure=fig8
        ),
    
    html.Div(children=[
        html.P(children='To interpret this graph, we can say that the presence of any of these categories adds on average\
        the amount shown compared to a baseline of $40.70 dollars. Fish/Seafood, Meat Products, and Plants/Garden Products\
        performed the best. It is interesting to note that although vegetables are the overall most valuable category, they\
        add the least to order value, perhaps because they are almost always present and therefore are similar to the baseline'
        , style={
        'textAlign': 'left',
        'color': colors['text']
        }),
        
    ]),
    
    html.H2(
        children='Which Product Categories are Associated More Orders Per Customer?',
        style={
            'textAlign': 'center',
            'color': colors['text']
            }
    ),
    
    html.H3(
        children='Linear Regression',
        style={
            'textAlign': 'center',
            'color': colors['text']
            }
    ),
    
    html.Div(children=[
        html.P(children='For this regression, there was an R-squared of 0.02(!) and a p-value of <0.05 overall.\
        The constant was 5.25'
        , style={
        'textAlign': 'left',
        'color': colors['text']
        }),
        
    ]),
    
    dcc.Graph(
        id='9',
        figure=fig9
        ),
    

    
    html.Div(children=[
        html.P(children='To interpret this graph, we can say that the presence of any of these product categories\
               in a first order predicts on average the change shown in the number of orders a customer will make,\
               compared to the constant of 5.25 orders. Baskets are associated with the biggest increase in orders, at\
               1.5. This is followed by beauty and body products. Pantry and grocery products predict the largest decrease\
               in subsequent orders, at -1.35. In general, this data is less certain than the previous graph.'
        , style={
        'textAlign': 'left',
        'color': colors['text']
        }),
    ]),  
    
    html.H2(
        children='Can We Predict Whether A Customer Will Order Again Based on Product Categories in their First Order?',
        style={
            'textAlign': 'center',
            'color': colors['text']
            }
    ),
    
    html.H3(
        children='Logistic Regression',
        style={
            'textAlign': 'center',
            'color': colors['text']
            }
    ),
    
    dcc.Graph(
        id='10',
        figure=fig10
        ),
    
    html.Div(children=[
        html.P(children='In short, no, we cannot accurately predict whether a customer will order again based on the products\
        in their first order. We get slightly better than random results of 0.56, which could be used for targeted advertising.\
        In particular, we get a lot of false positives. Neural nets did not perform significantly better for this task.\
        It does not predict a large proportion of the variation in number of orders'
        , style={
        'textAlign': 'left',
        'color': colors['text']
        }),
    ]),    
])
if __name__ == '__main__':
    app.run_server(debug=True)

Dash app running on http://127.0.0.1:8050/
