# KEY QUESTIONS ANSWERED

<a id='0'></a> <br>
 # Table of Contents  
 
[Import data and libraries](#0.1)     
[Key Questions](#0.2) 

1. [Busiest Days and Times](#1)
    1. [Busiest days of the week](#2)
    1. [Busiest hours of the day](#3)
1. [Highest Revenue Times](#4)
1. [Price Range Groupings](#5)
1. [Department Popularity](#6)
1. [Ordering Behaviors](#7)
    1. [Brand Loyalty Distribution](#8)
    1. [Brand Loyalty Ordering Habits](#9)
        1. [Loyalty and days_since_prior_order](#10)
        1. [Loyalty and ave. price per product](#11)
        1. [Loyalty and department](#12)
        1. [Loyalty and hour_of_day/day_of_week](#13)
    1. [Regional Ordering Habits](#14)
        1. [orders_day_of_week](#15)
        1. [order_hour_of_day](#16)
        1. [prior_order_median](#17)
        1. [most_ordered_product](#18)
        1. [order_total](#19)
        1. [max_order](#20)
        1. [Price_range](#21)
        1. [Loyalty_flag](#22)
        1. [Spending_flag](#23)
        1. [Order_frequency_flag](#24)
    1. [Age and Family Status](#25)
        1. [Most Frequent Day](#26)
        1. [Most Frequent Hour](#27)
        1. [Frequency of Orders](#28)
        1. [Avg. Number of Orders](#29)
        1. [Avg. Order Total](#30)
        1. [Avg. Price Per Item](#31)
    1. [Demographics](#32)
    1. [Customer Profile Ordering Habits](#33)
        1. [Price per Item](#34)
        1. [Products](#35)
        1. [Department](#36)
        1. [Orders_day_of_week](#37)
        1. [Order_hour_of_day](#38)
        1. [Days between orders](#39)
        1. [Order Total](#40)
        1. [Max_order](#41)

<a id='0.1'></a> <br>
# Import data and libraries

In [None]:
# Import libraries

import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import seaborn as sns
import scipy
import plotly.express as px
import plotly.offline as py
import plotly.graph_objs as go
from plotly.offline import init_notebook_mode, iplot, plot
from plotly import tools
from plotly.subplots import make_subplots

In [None]:
# Create path

path = r'G:\My Drive\CareerFoundry\Python Projects\2023-10 Instacart Basket Analysis'

In [None]:
# Import data

ict_final_df = pd.read_pickle(os.path.join(path, '02 Data', 'Prepared Data', 'ict_final_df.pkl'))

In [None]:
ict_final_df.shape

In [None]:
# shows all columns

pd.set_option('display.max_columns', None)

In [None]:
# shows all rows

pd.set_option('display.max_rows', None)

In [None]:
ict_final_df.head()

In [None]:
# Change outliers to NaN to correct skew of distribution

ict_final_df.loc[ict_final_df['prices'] >100, 'prices'] = np.nan

<a id='0.2'></a> <br>
# Key Questions

<a id='1'></a> <br>
# 1. Busiest Days and Times
# Days and times with most orders?
<a id='2'></a> <br>
> ## A. Busiest days of the week

In [None]:
# creates a ct of order_ids and dow order

ct_busiest_dow = pd.crosstab(ict_final_df['orders_day_of_week'], ict_final_df['order_id'])

In [None]:
# check output

ct_busiest_dow

In [None]:
# changes all data greater than 0 to 1 - I only need one instance of the order_id for each dow

ct_busiest_dow[ct_busiest_dow > 0] = 1

In [None]:
# check output

ct_busiest_dow

In [None]:
# create Total column

ct_busiest_dow['Total'] = ct_busiest_dow.sum(axis=1)

In [None]:
# check output

ct_busiest_dow

In [None]:
# Remove all columns except 'Total

ct_busiest_dow = ct_busiest_dow[['Total']]

In [None]:
# check output

ct_busiest_dow

In [None]:
# create df for easy manipulation
# create data

data = {'Day': ['Saturday', 'Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday'], 
        'Count': [557745, 556679, 441929, 412371, 401190, 425943, 418828]}

In [None]:
# create df

df_opm_for_bar = pd.DataFrame(data)

In [None]:
# check output

df_opm_for_bar

In [None]:
norm = plt.Normalize(0, df_opm_for_bar['Count'].values.max()) # uses values from 'count' col. as the color range
colors = plt.cm.Blues(norm(df_opm_for_bar['Count'].values)) # creates a blue color map

bar_busiest_days_of_week = df_opm_for_bar.sort_values(by='Count', ascending=False).plot.bar(x='Day', 
                                                                                            y='Count', 
                                                                                            figsize=(8,6), 
                                                                                            legend=False, 
                                                                                            color=colors)
for count in bar_busiest_days_of_week.containers:
    
    # adds commas
    for c in bar_busiest_days_of_week.containers:
        bar_busiest_days_of_week.bar_label(c, fmt='{:,.0f}', 
                                           fontweight='bold',
                                           fontsize=10)

plt.yticks([]) # removes yticks
plt.xticks(rotation=0) # rotates xtick labels to neutral position
plt.ylabel('Number of Orders')
plt.xlabel('') # removes x axis label
plt.margins(0.2) # adds 20% whitespace to margins to keep labels inside the box
plt.title('Busiest Days of the Week', fontsize=14, fontweight='bold')

# Export chart to the visualizations folder using savefig() function

bar_busiest_days_of_week.figure.savefig(os.path.join(path, '04 Analysis','Visualizations', 'bar_busiest_days_of_week.png'), 
                                        bbox_inches='tight') # keeps entire fig in the png

<a id='3'></a> <br>
> ## B. Busiest hours of the day

In [None]:
# creates a df of order_ids and hod order

df_busiest_hod = ict_final_df[['order_id', 'order_hour_of_day']].copy() # use copy() to avoid 'copy of a slice' error

In [None]:
# check output

df_busiest_hod

In [None]:
# Drop duplicates from one row of df - I only need one instance of the hod for each order_id

df_busiest_hod.drop_duplicates(subset=['order_id'], keep='first', inplace=True)

In [None]:
# check output

df_busiest_hod

In [None]:
df_busiest_hod = df_busiest_hod.drop('order_id', axis=1)

In [None]:
# check output

df_busiest_hod

In [None]:
# create histogram of busiest hod

fig, ax = plt.subplots(figsize=(10,4), facecolor='w')
cnts, values, bars = ax.hist(df_busiest_hod, edgecolor='k', bins=np.arange(-0.5, 24), # one bin per hour
                             rwidth=0.85) 

cmap = plt.cm.PuBu # creates the purple/blue colormap

for i, (cnt, value, bar) in enumerate(zip(cnts, values, bars)):
    bar.set_facecolor(cmap(cnt/cnts.max())) # applies color to each bar based on y axis values

plt.xticks(np.arange(0, 24)) # makes all ticks visible
plt.xlabel('Time of Day \n(0=midnight, 23=11pm)') # \n wraps text
plt.title('Busiest Hour of the Day', fontsize=18)

# Export chart to the visualizations folder using savefig() function

fig.figure.savefig(os.path.join(path, '04 Analysis','Visualizations', 'hist_busiest_hour_of_day.png'), 
                   bbox_inches='tight') # keeps entire fig in the png

### Analysis shows that most users place orders between 9am and 5pm, while the least amount of orders take place between 12am and 6am.

<a id='4'></a> <br>
# 2. Highest Revenue Times
# What time of day do people spend the most money?

In [None]:
# First change outliers to NaN to correct skew of distribution

ict_final_df.loc[ict_final_df['prices'] >100, 'prices'] = np.nan

In [None]:
# create sample data to save time and memory

In [None]:
# Create random list

np.random.seed(4)
dev = np.random.rand(len(ict_final_df)) <= 0.7

In [None]:
# check output

dev

In [None]:
# Store 30% of the sample in the small and 70% in big

big = ict_final_df[dev]
small = ict_final_df[~dev]

In [None]:
# Check results by adding big and small df to see if their combined length = the entire df length

len(ict_final_df)

In [None]:
len(big) + len(small)

In [None]:
# Remove unnecessary columns 

small = small[['order_hour_of_day', 'prices']]
big = big[['order_hour_of_day', 'prices']]

In [None]:
# check output

small

In [None]:
big

In [None]:
# aggregate prices for each hod on both dfs (this will give total spend)

small = small.groupby('order_hour_of_day').agg({'prices': 'sum'})
big = big.groupby('order_hour_of_day').agg({'prices': 'sum'})

In [None]:
# check output

small

In [None]:
big

In [None]:
# create simple line chart of small and big to confirm they are a match

sns.lineplot(data = small, x = 'order_hour_of_day', y = 'prices')

In [None]:
sns.lineplot(data = big, x = 'order_hour_of_day', y = 'prices')

## - Line charts are a match for distribution

In [None]:
# create more readable line chart

sns.set(rc={'figure.figsize':(10, 6)}) # changes size of figure

line_revenue_hod = sns.lineplot(data = small, x = 'order_hour_of_day', y = 'prices', 
                                marker='o', # adds a marker (dot) for each data point
                                markersize=5, # adjusts the size of the dot
                                markerfacecolor='darkblue') # changes the color of the dot

plt.title('Total Revenue by Hour of Day')
plt.xlabel('Time of Day \n(0=midnight, 23=11pm)') # \n wraps text
plt.xticks(np.arange(0, 24)) # makes all ticks visible
plt.ylabel('Revenue')

# Export chart to the visualizations folder using savefig() function

line_revenue_hod.figure.savefig(os.path.join(path, '04 Analysis','Visualizations', 'line_rev_hod.png'), 
                                bbox_inches='tight') # keeps entire fig in the png

## -- Note: This is sample data - Revenue is not accurate but distribution of hourly spend is.

<a id='5'></a> <br>
# 3. Price Range Groupings
# Create simpler price range groupings.

In [None]:
# Change outliers to NaN to correct skew of distribution

ict_final_df.loc[ict_final_df['prices'] >100, 'prices'] = np.nan

In [None]:
# Check if replacement was successful

ict_final_df['prices'].max()

In [None]:
# create histogram of 'prices' with finer granularity

hist_prices = ict_final_df['prices'].plot.hist(bins = 70)

plt.title('Prices Frequency')

# Export chart to the visualizations folder using savefig() function

hist_prices.figure.savefig(os.path.join(path, '04 Analysis','Visualizations', 'hist_prices.png'), 
                                bbox_inches='tight') # keeps entire fig in the png

In [None]:
# rename order hour of day x column

ict_final_df = ict_final_df.rename(columns={'price_range_loc': 'price_range'})

In [None]:
# check output

ict_final_df.head()

#### -- Price Range variable previously created
       - High-range product: >15
       - Mid-range product: >=5, <15
       - Low-range product: <5

In [None]:
# creates bar chart of price_range to show 

barh_price_range = ict_final_df['price_range'].value_counts(ascending=True).plot.barh(legend=None, edgecolor='black')
for price_range in barh_price_range.containers:
    
    # adds commas
    for c in barh_price_range.containers:
        barh_price_range.bar_label(c, fmt=' {:,.0f}', fontstyle='italic')
        
    # adds 20% whitespace to margins to keep labels inside the box
    barh_price_range.margins(x=0.25)

plt.xlabel('Count', fontsize=12, fontweight='bold')
plt.ylabel('Price Ranges', fontsize=12, fontweight='bold')
plt.title('Total Items Purchased by Price Range', fontsize=14, fontweight='bold')

# Export chart to the visualizations folder using savefig() function

barh_price_range.figure.savefig(os.path.join(path, '04 Analysis','Visualizations', 'barh_price_range.png'), 
                                bbox_inches='tight') # keeps entire fig in the png

<a id='6'></a> <br>
# 4. Department Popularity
# Which departments have the highest frequency of product orders?

In [None]:
# creates bar graph of departments

barh_dep_distr = ict_final_df['department'].value_counts().sort_values().plot.barh(figsize=(9, 9), linewidth=1, edgecolor='black')
for department in barh_dep_distr.containers:
    
    # adds commas
    for c in barh_dep_distr.containers:
        barh_dep_distr.bar_label(c, fmt=' {:,.0f}', fontweight='medium', fontsize=12)

# adds 20% whitespace to margins to keep labels inside the box
barh_dep_distr.margins(x=0.2, y=0.3)

plt.xlabel('Units Sold (Millions)', fontsize=12, fontweight='bold')
plt.ylabel('Departments', fontsize=12, fontweight='bold')
plt.title('Total Number of Producst Sold by Department', fontsize=14, fontweight='bold')

plt.xticks(rotation = 360)

# Export chart to the visualizations folder using savefig() function

barh_dep_distr.figure.savefig(os.path.join(path, '04 Analysis','Visualizations', 'barh_dep_distr.png'), 
                                bbox_inches='tight') # keeps entire fig in the png

<a id='7'></a> <br>
# 5. Ordering Behaviors

In [None]:
# creates a df of brand loyalty, region, fam status, age, income, profile, ord freq., user id only

ic_df = ict_final_df[['user_id','customer_profile', 'region', 'age', 'income', 'fam_status', 'loyalty_flag', 'order_freq_flag']].copy()

In [None]:
ic_df.shape

In [None]:
ic_df.head()

In [None]:
ic_df.tail()

In [None]:
# remove all dupes

ic_df = ic_df.drop_duplicates()

ic_df.shape

<a id='8'></a> <br>
> ## A. Brand Loyalty Distribution
> ## Distribution among users in regards to their brand loyalty

In [None]:
# creates bar chart of loyalty_flag

loyal_patch = mpatches.Patch(color='lightgray', label='Loyal (>40)')
new_patch = mpatches.Patch(color='cornflowerblue', label='New (<=10)')
regular_patch = mpatches.Patch(color='orangered', label='Regular (11-40)')


bar_loyalty_flag = ic_df['loyalty_flag'].value_counts(ascending=True).plot.bar(edgecolor='black', color=['lightgray', 'cornflowerblue', 'orangered'])
for loyalty_flag in bar_loyalty_flag.containers:
    
    # adds commas
    for c in bar_loyalty_flag.containers:
        bar_loyalty_flag.bar_label(c, fmt=' {:,.0f}', fontstyle='italic')
        
    # adds 20% whitespace to margins to keep labels inside the box
    bar_loyalty_flag.margins(x=0.25)

plt.xlabel('Brand Loyalty Level', fontsize=12, fontweight='bold')
plt.xticks(rotation=0)
plt.ylabel('Count', fontsize=12, fontweight='bold')
plt.title('Distribution of Customer Loyalty \nby Number of Orders', fontsize=14, fontweight='bold')
plt.legend(handles=[loyal_patch, new_patch, regular_patch])

# Export chart to the visualizations folder using savefig() function

bar_loyalty_flag.figure.savefig(os.path.join(path, '04 Analysis','Visualizations', 'bar_loyalty_flag.png'), 
                                bbox_inches='tight') # keeps entire fig in the png

<a id='9'></a> <br>
> ## B. Brand Loyalty Ordering Habits
> ## Are there differences in ordering habits based on a customer's loyalty status?

In [None]:
# create a pivot table of loyalty_flag, find ave. days_since_prior_order, mode product_name, ave. of prices, mode department, mode most_freq_day, mode most_freq_hour

pv_final_df = ict_final_df.pivot_table(values=['days_since_prior_order', 'prices'], 
                                      index='loyalty_flag', 
                                      aggfunc={'days_since_prior_order': 'mean', 'prices': 'mean'})

In [None]:
pv_final_df

In [None]:
pv_final_df = pv_final_df.reset_index()

In [None]:
# check output

pv_final_df

<a id='10'></a> <br>
>> ## a. Loyalty and days_since_prior_order

In [None]:
# creates bar chart of ave. prior order

barh_loyalty_prior_ave = pv_final_df.plot.barh(y='days_since_prior_order', x='loyalty_flag', legend=False, edgecolor='black', color=['lightgray', 'cornflowerblue', 'orangered'])
for days_since_prior_order in barh_loyalty_prior_ave.containers:
    
    # adds commas
    for c in barh_loyalty_prior_ave.containers:
        barh_loyalty_prior_ave.bar_label(c, fmt=' {:,.0f} days', fontsize=14, fontstyle='italic')
        
    # adds 20% whitespace to margins to keep labels inside the box
    barh_loyalty_prior_ave.margins(x=0.25)

plt.xlabel('')
plt.ylabel('Brand Loyalty Level', fontsize=12, fontweight='bold')
plt.title('Average Days Between Orders \nby Loyalty Level', fontsize=14, fontweight='bold')

# Export chart to the visualizations folder using savefig() function

barh_loyalty_prior_ave.figure.savefig(os.path.join(path, '04 Analysis','Visualizations', 'barh_loyalty_prior_ave.png'), 
                                bbox_inches='tight') # keeps entire fig in the png

<a id='11'></a> <br>
>> ## b. Loyalty and ave. price per product

In [None]:
# creates bar chart of ave. price per product

barh_loyalty_ppprod_ave = pv_final_df.plot.barh(y='prices', x='loyalty_flag', legend=False, edgecolor='black', color=['lightgray', 'cornflowerblue', 'orangered'])
for days_since_prior_order in barh_loyalty_ppprod_ave.containers:
    
    # adds commas
    for c in barh_loyalty_ppprod_ave.containers:
        barh_loyalty_ppprod_ave.bar_label(c, fmt=' ${:,.2f} per item', fontsize=14, fontstyle='italic')
        
    # adds 20% whitespace to margins to keep labels inside the box
    barh_loyalty_ppprod_ave.margins(x=0.25)

plt.xlabel('')
plt.ylabel('Brand Loyalty Level', fontsize=12, fontweight='bold')
plt.title('Average Price Per Item \nby Loyalty Level', fontsize=14, fontweight='bold')

# Export chart to the visualizations folder using savefig() function

barh_loyalty_ppprod_ave.figure.savefig(os.path.join(path, '04 Analysis','Visualizations', 'barh_loyalty_ppprod_ave.png'), 
                                bbox_inches='tight') # keeps entire fig in the png

<a id='12'></a> <br>
>> ## c. Loyalty and department

In [None]:
# First change outliers to NaN to correct skew of distribution

ict_final_df.loc[ict_final_df['prices'] >100, 'prices'] = np.nan

In [None]:
# creates a pivot table of profile, region and dept to find most profitable department

pv_loyalty_dept_reg_prices = ict_final_df.pivot_table(index=['loyalty_flag', 'department'], columns=['region'], values='prices',aggfunc = 'sum')

In [None]:
# check output

pv_loyalty_dept_reg_prices

In [None]:
# reset index

pv_loyalty_dept_reg_prices = pv_loyalty_dept_reg_prices.reset_index()

In [None]:
# check output

pv_loyalty_dept_reg_prices

In [None]:
# create dropdown menu graph to access 3 charts (1 per loylalty) of 4 subplots (1 per region) each

mypull = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.1, 0] # explodes 'produce' wedge

fig = make_subplots(
    rows=2, cols=2,
    specs=[[{'type': 'pie'}, {'type': 'pie'}],
           [{'type': 'pie'}, {'type': 'pie'}]]
) # creates a 2 x 2 subplot and specifies type of charts

for t in pv_loyalty_dept_reg_prices['loyalty_flag'].unique(): # groups unique 'loyalty_flag'
    dff = pv_loyalty_dept_reg_prices[pv_loyalty_dept_reg_prices['loyalty_flag'] == t]
    fig.add_trace(go.Pie(
        labels=dff['department'],
        values=dff['Midwest'],
        title='Midwest',
        title_font_size=20,
        visible=True,
        domain=dict(x=[0.0, 0.5], y=[0.5, 1]) # sets chart in top-left quadrant
    ))
    
    fig.add_trace(go.Pie(
        labels=dff['department'],
        values=dff['Northeast'],
        title='Northeast',
        title_font_size=20,
        visible=True,
        domain=dict(x=[0.5, 1], y=[0.5, 1]) # sets chart in top-right quadrant
    ))
    
    fig.add_trace(go.Pie(
        labels=dff['department'],
        values=dff['South'],
        title='South',
        title_font_size=20,
        visible=True,
        domain=dict(x=[0.0, 0.5], y=[0.0, 0.5]) # sets chart in bottome-left quadrant
    ))
    
    fig.add_trace(go.Pie(
        labels=dff['department'],
        values=dff['West'],
        title='West',
        title_font_size=20,
        visible=True,
        domain=dict(x=[0.5, 1], y=[0, 0.5]) # sets chart in bottom-right quadrant
    )) 

    
# creates the dropdown selections
fig.update_layout(
    updatemenus=[
        dict(
            active=0,
            buttons=list([
                dict(label='Select a Profile',
                     method='update',
                     args=[{'visible': [False]*12}]), # shorthand for true, false, true, false...
                dict(label='Loyal customer',
                     method='update',
                     args=[{'visible': [True]*4 + [False]*8}]),
                dict(label='New customer',
                     method='update',
                     args=[{'visible': [False]*4 + [True]*4 + [False]*4}]),
                dict(label='Regular customer',
                     method='update',
                     args=[{'visible': [False]*8 + [True]*4}]),
            ]),
        )
    ])

fig.update_layout(
    legend=dict(
    orientation='v',
    yanchor='auto',
    y=0.9,
    xanchor='right',
    x=-0.1), # sets legend on left and below dropdown
    autosize=True, # keeps charts/layout evenly displayed
    uniformtext_minsize=13.5,
    uniformtext_mode='hide', # sets inside labels to at least fontsize 13.5 and hides the ones that don't fit
    height=900,
    width=1050, # height and width adjusted to increase chart sizes
    title='Customer Loyalty Expenditure by Department & Region',
    font_size=16,
    title_x=0.5,
    title_xanchor='center', # centers the title
    legend_title='Department'
)

fig.update_traces(textposition='inside', # moves all text/labels inside the wedges
                  marker=dict(line=dict(color='#000000', width=1)), # gives outline to charts
                  hoverinfo='label+percent', 
                  pull=mypull, # applies the explode parameter from above
                  textinfo='label+value', 
                  texttemplate = '%{label}:<br>(%{value:$,.2s})' # changes percent to value and truncates
)

fig.update_annotations(font_size=20) # increases the subplot title fontsize

# save.fig saved a blank screen, used screenshot instead to save to 'Visualizations' folder 

fig.show()

In [None]:
# create 3 individual pie charts if needed

In [None]:
# create 3 subsets (1 for each loyalty profile) by select rows

loyal_subset = pv_loyalty_dept_reg_prices.iloc[0:21].copy() # Loyal customer - copy() to avoid 'slice' error

new_subset = pv_loyalty_dept_reg_prices.iloc[21:42].copy() # New customer - copy() to avoid 'slice' error

reg_subset = pv_loyalty_dept_reg_prices.iloc[42:63].copy() # Regular customer - copy() to avoid 'slice' error

In [None]:
# check outputs 

loyal_subset

In [None]:
# check outputs 

new_subset

In [None]:
# check outputs 

reg_subset

In [None]:
# create list of dfs

df_list = [loyal_subset, new_subset, reg_subset]

In [None]:
# remove 'loyalty_flag' from all subsets

for i in df_list:
    i.columns = ['loyalty_flag', 'department', 'Midwest', 'Northeast', 'South', 'West']
    i.drop(i.columns[0], axis=1, inplace=True)

In [None]:
# check output

loyal_subset

In [None]:
# set the index of all new subsets to department

for df in df_list:
    # reset index
    df.set_index('department', inplace=True)

In [None]:
# check output

new_subset

In [None]:
# remove 'department' header from all subsets

for df in df_list:
    # remove header
    df.index.name=''

In [None]:
# check output

reg_subset

In [None]:
# create 3 different pie charts (4 subsets each)

In [None]:
# create loyal customer pie charts

myexplode = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.1] # sets 'produce' wedge to pop out
mywedge_props = {'linewidth' : 1, 'edgecolor' : 'black'}
mytext_props = {'fontsize': 11, 'fontstyle': 'normal'} # sets wedges outline and text properties

# Create a figure and a set of subplots
fig1, axes = plt.subplots(2, 2, figsize=(12, 12))

def my_autopct(pct):
    return ('%1.2f%%' % pct) if pct > 6 else '' # sets percent to display only if greater than 5.5

# Plot each column of the dataframe as a pie chart
for i, column in enumerate(loyal_subset.columns): # iterates following commands over each column of subset
    wedges, texts, autotexts = axes[i // 2, i % 2].pie(loyal_subset[column].sort_values(), 
                                           labels=loyal_subset.sort_values(by=[column], ascending=True).index, 
                                           autopct='%1.2f%%', 
                                           wedgeprops=mywedge_props, 
                                           textprops=mytext_props,  
                                           explode=myexplode,
                                           startangle=90) # creates 2x2 fig of 4 pie charts (1 for each region) 
                                                          # and rotates charts so 'produce' is prominently displayed
    
    threshold = 6
    for label, pct_label in zip(texts, autotexts):
        pct_value = pct_label.get_text().rstrip('%')
        if float(pct_value) < threshold:
            label.set_text('')
            pct_label.set_text('') # sets labels to display only if corresponding percentage is greater than 6
            
    axes[i // 2, i % 2].set_title(column, fontsize='12', fontweight='bold') # adds title to each subplot

# get current handles and labels
# this must be done AFTER plotting
current_handles, current_labels = plt.gca().get_legend_handles_labels()

# sort or reorder the labels and handles
reversed_handles = list(reversed(current_handles))
reversed_labels = list(reversed(current_labels))

# call plt.legend() with the new values
plt.legend(reversed_handles,reversed_labels, title='Departments', bbox_to_anchor=(1.5, 1.8)) 

fig1.suptitle('Customer Loyalty Expenditure by Department & Region \n\nLOYAL CUSTOMER', fontsize='20', fontweight='bold')

# Export chart to the visualizations folder using savefig() function

plt.savefig(os.path.join(path, '04 Analysis','Visualizations', 'pie_loyal_subset.png'), bbox_inches='tight') # saves png w/out cutting off x,yaxis labels
plt.show()  

In [None]:
# create new customer pie charts

myexplode = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.1] # sets 'produce' wedge to pop out
mywedge_props = {'linewidth' : 1, 'edgecolor' : 'black'}
mytext_props = {'fontsize': 11, 'fontstyle': 'normal'} # sets wedges outline and text properties

# Create a figure and a set of subplots
fig1, axes = plt.subplots(2, 2, figsize=(12, 12))

def my_autopct(pct):
    return ('%1.2f%%' % pct) if pct > 6 else '' # sets percent to display only if greater than 5.5

# Plot each column of the dataframe as a pie chart
for i, column in enumerate(new_subset.columns): # iterates following commands over each column of subset
    wedges, texts, autotexts = axes[i // 2, i % 2].pie(new_subset[column].sort_values(), 
                                           labels=new_subset.sort_values(by=[column], ascending=True).index, 
                                           autopct='%1.2f%%', 
                                           wedgeprops=mywedge_props, 
                                           textprops=mytext_props,  
                                           explode=myexplode,
                                           startangle=90) # creates 2x2 fig of 4 pie charts (1 for each region) 
                                                          # and rotates charts so 'produce' is prominently displayed
    
    threshold = 6
    for label, pct_label in zip(texts, autotexts):
        pct_value = pct_label.get_text().rstrip('%')
        if float(pct_value) < threshold:
            label.set_text('')
            pct_label.set_text('') # sets labels to display only if corresponding percentage is greater than 6
            
    axes[i // 2, i % 2].set_title(column, fontsize='12', fontweight='bold') # adds title to each subplot

# get current handles and labels
# this must be done AFTER plotting
current_handles, current_labels = plt.gca().get_legend_handles_labels()

# sort or reorder the labels and handles
reversed_handles = list(reversed(current_handles))
reversed_labels = list(reversed(current_labels))

# call plt.legend() with the new values
plt.legend(reversed_handles,reversed_labels, title='Departments', bbox_to_anchor=(1.5, 1.8)) 

fig1.suptitle('Customer Loyalty Expenditure by Department & Region \n\nNEW CUSTOMER', fontsize='20', fontweight='bold')

# Export chart to the visualizations folder using savefig() function

plt.savefig(os.path.join(path, '04 Analysis','Visualizations', 'pie_new_subset.png'), bbox_inches='tight') # saves png w/out cutting off x,yaxis labels
plt.show()  

In [None]:
# create reg customer pie charts

myexplode = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.1] # sets 'produce' wedge to pop out
mywedge_props = {'linewidth' : 1, 'edgecolor' : 'black'}
mytext_props = {'fontsize': 11, 'fontstyle': 'normal'} # sets wedges outline and text properties

# Create a figure and a set of subplots
fig1, axes = plt.subplots(2, 2, figsize=(12, 12))

def my_autopct(pct):
    return ('%1.2f%%' % pct) if pct > 6 else '' # sets percent to display only if greater than 5.5

# Plot each column of the dataframe as a pie chart
for i, column in enumerate(reg_subset.columns): # iterates following commands over each column of subset
    wedges, texts, autotexts = axes[i // 2, i % 2].pie(reg_subset[column].sort_values(), 
                                           labels=reg_subset.sort_values(by=[column], ascending=True).index, 
                                           autopct='%1.2f%%', 
                                           wedgeprops=mywedge_props, 
                                           textprops=mytext_props,  
                                           explode=myexplode,
                                           startangle=90) # creates 2x2 fig of 4 pie charts (1 for each region) 
                                                          # and rotates charts so 'produce' is prominently displayed
    
    threshold = 6
    for label, pct_label in zip(texts, autotexts):
        pct_value = pct_label.get_text().rstrip('%')
        if float(pct_value) < threshold:
            label.set_text('')
            pct_label.set_text('') # sets labels to display only if corresponding percentage is greater than 6
            
    axes[i // 2, i % 2].set_title(column, fontsize='12', fontweight='bold') # adds title to each subplot

# get current handles and labels
# this must be done AFTER plotting
current_handles, current_labels = plt.gca().get_legend_handles_labels()

# sort or reorder the labels and handles
reversed_handles = list(reversed(current_handles))
reversed_labels = list(reversed(current_labels))

# call plt.legend() with the new values
plt.legend(reversed_handles,reversed_labels, title='Departments', bbox_to_anchor=(1.5, 1.8)) 

fig1.suptitle('Customer Loyalty Expenditure by Department & Region \n\nREGULAR CUSTOMER', fontsize='20', fontweight='bold')

# Export chart to the visualizations folder using savefig() function

plt.savefig(os.path.join(path, '04 Analysis','Visualizations', 'pie_regular_subset.png'), bbox_inches='tight') # saves png w/out cutting off x,yaxis labels
plt.show()  

<a id='13'></a> <br>
>> ## d. Loyalty and hour_of_day/day_of_week

In [None]:
# creates dfs of order_ids, loyalty_flag, hod, dow

df_dow_valcnts = pd.DataFrame(ict_final_df, columns=['order_id', 'loyalty_flag', 'orders_day_of_week']).copy() # use copy() to avoid 'copy of a slice' error
df_hod_valcnts = pd.DataFrame(ict_final_df, columns=['order_id', 'loyalty_flag', 'order_hour_of_day']).copy() # use copy() to avoid 'copy of a slice' error
df_dow_prices = pd.DataFrame(ict_final_df, columns=['order_id', 'loyalty_flag', 'orders_day_of_week', 'prices']).copy() # use copy() to avoid 'copy of a slice' error
df_hod_prices = pd.DataFrame(ict_final_df, columns=['order_id', 'loyalty_flag', 'order_hour_of_day', 'prices']).copy() # use copy() to avoid 'copy of a slice' error

In [None]:
df_dow_valcnts

In [None]:
df_hod_valcnts

In [None]:
df_dow_prices

In [None]:
df_hod_prices

In [None]:
# create df list to filter and clean faster

dfs_vals = [df_dow_valcnts, df_hod_valcnts]

In [None]:
# check all dfs_vals shape

for df in dfs_vals:
  print(df.shape)

In [None]:
# find dupes

for df in dfs_vals:
    print(df.duplicated().sum())

In [None]:
# drop dupes

for df in dfs_vals:
    df.drop_duplicates(inplace=True)

In [None]:
# check all dfs_vals shape

for df in dfs_vals:
  print(df.shape)

In [None]:
# check all dfs_vals head

for df in dfs_vals:
  print(df.head())

In [None]:
# check all dfs_vals tail

for df in dfs_vals:
  print(df.tail())

In [None]:
# drop order_id column from all dfs_vals

df_dow_valcnts = df_dow_valcnts.drop('order_id', axis=1)

In [None]:
df_dow_valcnts

In [None]:
df_hod_valcnts = df_hod_valcnts.drop('order_id', axis=1)

In [None]:
df_hod_valcnts

In [None]:
# use groupby to manipulate data for charting

df_dow_valcnts_grouped = df_dow_valcnts.groupby(['loyalty_flag', 'orders_day_of_week']).aggregate({'orders_day_of_week':'count'})
df_hod_valcnts_grouped = df_hod_valcnts.groupby(['loyalty_flag', 'order_hour_of_day']).aggregate({'order_hour_of_day':'count'})

In [None]:
df_dow_valcnts_grouped

In [None]:
df_hod_valcnts_grouped

In [None]:
# rename order_dow and hod cols

df_dow_valcnts_grouped.rename(columns= {'orders_day_of_week':'count'}, inplace=True)

In [None]:
df_dow_valcnts_grouped.head()

In [None]:
df_hod_valcnts_grouped.rename(columns= {'order_hour_of_day':'count'}, inplace=True)

In [None]:
df_hod_valcnts_grouped.head()

In [None]:
# convert grouped dfs to dataframes and reset index

df_dow_valcnts_final = pd.DataFrame(df_dow_valcnts_grouped).reset_index()
df_hod_valcnts_final = pd.DataFrame(df_hod_valcnts_grouped).reset_index()

In [None]:
df_dow_valcnts_final

In [None]:
df_hod_valcnts_final

In [None]:
# same process for prices dfs

# create df list to filter and clean faster

dfs_prices = [df_dow_prices, df_hod_prices]

In [None]:
# check all dfs_prices shape

for df in dfs_prices:
  print(df.shape)

In [None]:
# check all dfs_prices head

for df in dfs_prices:
  print(df.head)

In [None]:
# check all dfs_prices tail

for df in dfs_prices:
  print(df.tail)

In [None]:
# drop order_id column from all dfs_prices

df_dow_prices = df_dow_prices.drop('order_id', axis=1)

In [None]:
df_dow_prices

In [None]:
df_hod_prices = df_hod_prices.drop('order_id', axis=1)

In [None]:
df_hod_prices

In [None]:
# use groupby to manipulate data for charting

df_dow_prices_grouped = df_dow_prices.groupby(['loyalty_flag', 'orders_day_of_week']).aggregate({'prices':'sum'})
df_hod_prices_grouped = df_hod_prices.groupby(['loyalty_flag', 'order_hour_of_day']).aggregate({'prices':'sum'})

In [None]:
# check results

df_dow_prices_grouped

In [None]:
df_hod_prices_grouped

In [None]:
# convert grouped dfs to dataframes and reset index

df_dow_prices_final = pd.DataFrame(df_dow_prices_grouped).reset_index()

In [None]:
df_dow_prices_final

In [None]:
df_hod_prices_final = pd.DataFrame(df_hod_prices_grouped).reset_index()

In [None]:
df_hod_prices_final

In [None]:
# create dropdown menu graph to access 3 charts (1 per loylalty) of 4 subplots each
# use 4 different dfs:
    # df_dow_valcnts_final
    # df_hod_valcnts_final
    # df_dow_prices_final
    # df_hod_prices_final

my_tickvals=[0, 1, 2, 3, 4, 5, 6] 
my_ticktext=['Saturday', 'Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday']

fig = make_subplots(
    specs=[[{'type':'xy'}], [{'type':'xy'}], [{'type':'xy'}], [{'type':'xy'}]],
    rows=4, cols=1,
    subplot_titles=('Plot 1', 'Plot 2', 'Plot 3', 'Plot 4')
) # creates a 4 x 1 subplot and specifies type of charts

for t in df_dow_valcnts_final['loyalty_flag'].unique(): # groups unique 'loyalty_flag'
    dff1 = df_dow_valcnts_final[df_dow_valcnts_final['loyalty_flag'] == t]
    fig.add_trace(go.Bar(
        x=dff1['orders_day_of_week'].unique(),
        y=dff1['count'],
        visible=True,
        name=''),
        row=1, col=1
    )

for t in df_dow_prices_final['loyalty_flag'].unique(): # groups unique 'loyalty_flag'
    dff2 = df_dow_prices_final[df_dow_prices_final['loyalty_flag'] == t]
    fig.add_trace(go.Bar(
        x=dff2['orders_day_of_week'].unique(),
        y=dff2['prices'],
        visible=True,
        name=''),
        row=2, col=1
    )
    
for t in df_hod_valcnts_final['loyalty_flag'].unique(): # groups unique 'loyalty_flag'
    dff3 = df_hod_valcnts_final[df_hod_valcnts_final['loyalty_flag'] == t]
    fig.add_trace(go.Bar(
        x=dff3['order_hour_of_day'].unique(),
        y=dff3['count'],
        visible=True,
        name=''),
        row=3, col=1
    )
    
for t in df_hod_prices_final['loyalty_flag'].unique(): # groups unique 'loyalty_flag'
    dff4 = df_hod_prices_final[df_hod_prices_final['loyalty_flag'] == t]
    fig.add_trace(go.Bar(
        x=dff4['order_hour_of_day'].unique(),
        y=dff4['prices'],
        visible=True,
        name=''), 
        row=4, col=1
    )
    
# creates the dropdown selections
fig.update_layout(dragmode=False,
    updatemenus=[
        dict(
            active=0,
            buttons=list([
                dict(label='Select a Profile',
                     method='update',
                     args=[{'visible': [False]*12}]), # shorthand for true, false, true, false...
                dict(label='New customer',
                     method='update',
                     args=[{'visible': [False]*1 + [True]*1 + [False]*2 + [True]*1 + [False]*2 + [True]*1 + [False]*2 + [True]*1 + [False]*1}]),
                dict(label='Regular customer',
                     method='update',
                     args=[{'visible': [False]*2 + [True]*1 + [False]*2 + [True]*1 + [False]*2 + [True]*1 + [False]*2 + [True]*1}]),
                dict(label='Loyal customer',
                     method='update',
                     args=[{'visible': [True]*1 + [False]*2 + [True]*1 + [False]*2 + [True]*1 + [False]*2 + [True]*1 + [False]*2}]),
            ]),
        )
    ])

fig.update_layout(barmode='group',
    showlegend=False, # removes legend
    autosize=True, # keeps charts/layout evenly displayed
    uniformtext_minsize=22,
    uniformtext_mode='hide', # sets inside labels to at least fontsize 13.5 and hides the ones that don't fit
    height=1200,
    width=1000, # height and width adjusted to increase chart sizes
    title='Customer Loyalty Expenditure by Day & Hour of Day',
    font_size=16,
    title_x=0.5,
    title_xanchor='center' # centers the title
)

fig.update_xaxes(tick0=0, dtick=1) # displays all tickmarks
fig.update_xaxes(tickvals=my_tickvals, ticktext=my_ticktext, row=1, col=1) # changes ticklabels to my_tickvals
fig.update_xaxes(tickvals=my_tickvals, ticktext=my_ticktext, row=2, col=1) # changes ticklabels to my_tickvals
fig.update_xaxes(title='(0 = 12am, 23 = 11pm)', row=3, col=1) # changes xaxis label
fig.update_xaxes(title='(0 = 12am, 23 = 11pm)', row=4, col=1) # changes xaxis label

fig.update_traces(textposition='inside', # moves all text/labels inside the wedges
                  marker=dict(line=dict(color='#000000', width=1)), # gives outline to charts
                  hoverinfo='all'
)

names={'Plot 1':'ORDERS: Day of Week', 'Plot 2':'SPEND: Day of Week', 'Plot 3':'ORDERS: Hour of Day', 'Plot 4':'SPEND: Hour of Day'}
fig.for_each_annotation(lambda a: a.update(text = names[a.text]))
fig.update_annotations(font_size=20) # increases the subplot title fontsize

# save.fig saved a blank screen, used screenshot instead to save to 'Visualizations' folder 

fig.show()

<a id='14'></a> <br>
> ## C. Regional Ordering Habits

In [None]:
ict_final_df.head()

In [None]:
# First change outliers to NaN to correct skew of distribution

ict_final_df.loc[ict_final_df['prices'] >100, 'prices'] = np.nan

In [None]:
# create 'order_total' col

ict_final_df['order_total'] = ict_final_df.groupby('order_id')['prices'].transform('sum')

In [None]:
ict_final_df['order_total'].describe()

In [None]:
# check df

ict_final_df.head()

In [None]:
# Export new df as pkl

ict_final_df.to_pickle(os.path.join(path, '02 Data','Prepared Data', 'ict_final_df.pkl'))

## --------------------IMPORT ICT_FINAL_DF

In [None]:
# Import data

ict_final_df = pd.read_pickle(os.path.join(path, '02 Data', 'Prepared Data', 'ict_final_df.pkl'))

In [None]:
# create df of region and cols relating to order habits

df_reg_order_habits = pd.DataFrame(ict_final_df, columns=['order_id', 'user_id', 'region', 'orders_day_of_week', 'order_hour_of_day', 'prior_order_median', 'most_ordered_product', 'max_order', 'order_total']).copy() # use copy() to avoid 'copy of a slice' error

In [None]:
# check df

df_reg_order_habits.shape

In [None]:
df_reg_order_habits.head()

In [None]:
# remove all dupes

df_reg_order_habits = df_reg_order_habits.drop_duplicates()

df_reg_order_habits.shape

In [None]:
df_reg_order_habits

In [None]:
# create separate dfs for each col

# avg dow
df_reg_order_dow = df_reg_order_habits.groupby('region').agg({'orders_day_of_week': lambda x: x.mode()})

# avg hod
df_reg_order_hod = df_reg_order_habits.groupby('region').agg({'order_hour_of_day': lambda x: x.mode()})

# avg prior order
df_reg_prior = df_reg_order_habits.groupby('region').agg({'prior_order_median': 'mean'})

# avg product_name
df_reg_prod = df_reg_order_habits.groupby('region').agg({'most_ordered_product': lambda x: x.mode()})

# avg order and total spend
df_reg_prices = df_reg_order_habits.groupby('region').agg({'order_total': ('mean', 'sum')})

# avg product_name
df_reg_max_order = df_reg_order_habits.groupby('region').agg({'max_order': 'mean'})

<a id='15'></a> <br>
>> ## a. orders_day_of_week

In [None]:
# check dfs

df_reg_order_dow

<a id='16'></a> <br>
>> ## b. order_hour_of_day

In [None]:
df_reg_order_hod

<a id='17'></a> <br>
>> ## c. prior_order_median

In [None]:
df_reg_prior

<a id='18'></a> <br>
>> ## d. most_ordered_product

In [None]:
df_reg_prod

<a id='19'></a> <br>
>> ## e. order_total

In [None]:
df_reg_prices

<a id='20'></a> <br>
>> ## f. max_order

In [None]:
df_reg_max_order

### --- NO BIG DIFFERENCE

In [None]:
# create df2 of region and cols relating to order habits

df_reg_order_habits2 = pd.DataFrame(ict_final_df, columns=['user_id', 'region', 'price_range', 'loyalty_flag', 'spending_flag', 'order_freq_flag']).copy() # use copy() to avoid 'copy of a slice' error

In [None]:
# check df

df_reg_order_habits2.shape

In [None]:
df_reg_order_habits2.head()

In [None]:
# creates cts of region and others

ct_reg_price_range = pd.crosstab(ict_final_df['price_range'], ict_final_df['region'])

ct_reg_loyalty_flag = pd.crosstab(ict_final_df['loyalty_flag'], ict_final_df['region'])

ct_reg_spending_flag = pd.crosstab(ict_final_df['spending_flag'], ict_final_df['region'])

ct_reg_order_freq_flag = pd.crosstab(ict_final_df['order_freq_flag'], ict_final_df['region'])

In [None]:
# check results

ct_reg_price_range.shape

In [None]:
ct_reg_price_range.head()

In [None]:
ct_reg_loyalty_flag.shape

In [None]:
ct_reg_loyalty_flag.head()

In [None]:
ct_reg_spending_flag.shape

In [None]:
ct_reg_spending_flag.head()

In [None]:
ct_reg_order_freq_flag.shape

In [None]:
ct_reg_order_freq_flag.head()

<a id='21'></a> <br>
>> ## g. Price_range

In [None]:
# create pie chart of region and price_range

mywedge_props = {'linewidth' : 1, 'edgecolor' : 'black'}
mytext_props = {'fontsize': 16, 'fontstyle': 'normal', 'fontweight': 'bold'} # sets wedges outline and text properties

# create patches for legend
mid_patch = mpatches.Patch(color='orangered', label='Mid-range (\$6-\$15)')
high_patch = mpatches.Patch(color='lightgray', label='High-range (>$15)')
low_patch = mpatches.Patch(color='cornflowerblue', label='Low-range (<$6)')

# Create a figure and a set of subplots
fig, axes = plt.subplots(2, 2, figsize=(12, 12))

def my_autopct(pct):
    return ('%1.2f%%' % pct)

# Plot each column of the dataframe as a pie chart
for i, column in enumerate(ct_reg_price_range.columns): # iterates following commands over each column of subset
    wedges, texts, autotexts = axes[i // 2, i % 2].pie(ct_reg_price_range[column].sort_values(),
                                           autopct='%1.2f%%',
                                           wedgeprops=mywedge_props,
                                           colors=('lightgray', 'cornflowerblue', 'orangered'),
                                           textprops=mytext_props,
                                           startangle=180) # creates 2x2 fig of 4 pie charts (1 for each region) 
                                                          # and rotates charts so 'produce' is prominently displayed
   
    for patch, txt in zip(wedges, autotexts): # moves pct txt too small for wedge to outside of chart
        if (patch.theta2 - patch.theta1) <= 5:
            # the angle at which the text is normally located
            angle = (patch.theta2 + patch.theta1) / 2.
            # new distance to the pie center
            x = patch.r * 1.2 * np.cos(angle * np.pi / 180)
            y = patch.r * 1.2 * np.sin(angle * np.pi / 180)
            # move text to new position
            txt.set_position((x, y))
        
    axes[i // 2, i % 2].set_title(column, fontsize='14', fontweight='normal') # adds title to each subplot

# call plt.legend() with the new values
plt.legend(handles=[mid_patch, low_patch, high_patch], title='Product Pricing', bbox_to_anchor=(1.5, 1.8))

fig.suptitle('Proportion of Product Price-range \nby Customer Region', fontsize='20', fontweight='bold')

# Export chart to the visualizations folder using savefig() function

plt.savefig(os.path.join(path, '04 Analysis','Visualizations', 'pie_reg_price_range.png'), bbox_inches='tight') # saves png w/out cutting off x,yaxis labels
plt.show() 

<a id='22'></a> <br>
>> ## h. Loyalty_flag

In [None]:
# create pie chart of region and loyalty flag

mywedge_props = {'linewidth' : 1, 'edgecolor' : 'black'}
mytext_props = {'fontsize': 16, 'fontstyle': 'normal', 'fontweight': 'bold'} # sets wedges outline and text properties

# create patches for legend
loyal_patch = mpatches.Patch(color='lightgray', label='Loyal (>40 orders)')
new_patch = mpatches.Patch(color='cornflowerblue', label='New (<=10 orders)')
regular_patch = mpatches.Patch(color='orangered', label='Regular (11-40 orders)')

# Create a figure and a set of subplots
fig, axes = plt.subplots(2, 2, figsize=(12, 12))

def my_autopct(pct):
    return ('%1.2f%%' % pct)

# Plot each column of the dataframe as a pie chart
for i, column in enumerate(ct_reg_loyalty_flag.columns): # iterates following commands over each column of subset
    wedges, texts, autotexts = axes[i // 2, i % 2].pie(ct_reg_loyalty_flag[column].sort_values(),
                                           autopct='%1.2f%%',
                                           wedgeprops=mywedge_props,
                                           colors=('lightgray', 'cornflowerblue', 'orangered'),
                                           textprops=mytext_props,
                                           startangle=180) # creates 2x2 fig of 4 pie charts (1 for each region) 
                                                          # and rotates charts so 'produce' is prominently displayed
   
    for patch, txt in zip(wedges, autotexts): # moves pct txt too small for wedge to outside of chart
        if (patch.theta2 - patch.theta1) <= 5:
            # the angle at which the text is normally located
            angle = (patch.theta2 + patch.theta1) / 2.
            # new distance to the pie center
            x = patch.r * 1.2 * np.cos(angle * np.pi / 180)
            y = patch.r * 1.2 * np.sin(angle * np.pi / 180)
            # move text to new position
            txt.set_position((x, y))
        
    axes[i // 2, i % 2].set_title(column, fontsize='14', fontweight='normal') # adds title to each subplot

# call plt.legend() with the new values
plt.legend(handles=[regular_patch, new_patch, loyal_patch], title='Loyalty Level', bbox_to_anchor=(1.5, 1.8))

fig.suptitle('Proportion of Customer Loyalty \nby Region', fontsize='20', fontweight='bold')

# Export chart to the visualizations folder using savefig() function

plt.savefig(os.path.join(path, '04 Analysis','Visualizations', 'pie_reg_loyalty_flag.png'), bbox_inches='tight') # saves png w/out cutting off x,yaxis labels
plt.show() 

<a id='23'></a> <br>
>> ## i. Spending_flag

In [None]:
# create pie chart of region and spending flag

mywedge_props = {'linewidth' : 1, 'edgecolor' : 'black'}
mytext_props = {'fontsize': 16, 'fontstyle': 'normal', 'fontweight': 'bold'} # sets wedges outline and text properties

# create patches for legend
low_patch = mpatches.Patch(color='orangered', label='Low Spender (<$10 avg.)')
high_patch = mpatches.Patch(color='cornflowerblue', label='High Spender (>$10 avg.)')

# Create a figure and a set of subplots
fig, axes = plt.subplots(2, 2, figsize=(12, 12))

def my_autopct(pct):
    return ('%1.2f%%' % pct)

# Plot each column of the dataframe as a pie chart
for i, column in enumerate(ct_reg_spending_flag.columns): # iterates following commands over each column of subset
    wedges, texts, autotexts = axes[i // 2, i % 2].pie(ct_reg_spending_flag[column].sort_values(),
                                           autopct='%1.2f%%',
                                           wedgeprops=mywedge_props,
                                           colors=('cornflowerblue', 'orangered'),
                                           textprops=mytext_props,
                                           startangle=180) # creates 2x2 fig of 4 pie charts (1 for each region) 
                                                          # and rotates charts so 'produce' is prominently displayed
   
    for patch, txt in zip(wedges, autotexts): # moves pct txt too small for wedge to outside of chart
        if (patch.theta2 - patch.theta1) <= 10:
            # the angle at which the text is normally located
            angle = (patch.theta2 + patch.theta1) / 2.
            # new distance to the pie center
            x = patch.r * 1.2 * np.cos(angle * np.pi / 180)
            y = patch.r * 1.2 * np.sin(angle * np.pi / 180)
            # move text to new position
            txt.set_position((x, y))
        
    axes[i // 2, i % 2].set_title(column, fontsize='14', fontweight='normal') # adds title to each subplot

# call plt.legend() with the new values
plt.legend(handles=[low_patch, high_patch], title='Spending Level \n(avg. price per item)', bbox_to_anchor=(1.5, 1.8))

fig.suptitle('Proportion of Customer Spending Level \nby Region', fontsize='20', fontweight='bold')

# Export chart to the visualizations folder using savefig() function

plt.savefig(os.path.join(path, '04 Analysis','Visualizations', 'pie_reg_spending_flag.png'), bbox_inches='tight') # saves png w/out cutting off x,yaxis labels
plt.show() 

<a id='24'></a> <br>
>> ## j. Order_frequency_flag

In [None]:
# create pie chart of region and order_frequency_flag

mywedge_props = {'linewidth' : 1, 'edgecolor' : 'black'}
mytext_props = {'fontsize': 16, 'fontstyle': 'normal', 'fontweight': 'bold'} # sets wedges outline and text properties

# create patches for legend
non_freq_patch = mpatches.Patch(color='lightgray', label='Non-frequent (>20 days)')
regular_patch = mpatches.Patch(color='cornflowerblue', label='Regular (11-20 days)')
freq_patch = mpatches.Patch(color='orangered', label='Frequent (<=10 days)')

# Create a figure and a set of subplots
fig, axes = plt.subplots(2, 2, figsize=(12, 12))

def my_autopct(pct):
    return ('%1.2f%%' % pct)

# Plot each column of the dataframe as a pie chart
for i, column in enumerate(ct_reg_order_freq_flag.columns): # iterates following commands over each column of subset
    wedges, texts, autotexts = axes[i // 2, i % 2].pie(ct_reg_order_freq_flag[column].sort_values(),
                                           autopct='%1.2f%%',
                                           wedgeprops=mywedge_props,
                                           colors=('lightgray', 'cornflowerblue', 'orangered'),
                                           textprops=mytext_props,
                                           startangle=180) # creates 2x2 fig of 4 pie charts (1 for each region) 
                                                          # and rotates charts so 'produce' is prominently displayed
   
    for patch, txt in zip(wedges, autotexts): # moves pct txt too small for wedge to outside of chart
        if (patch.theta2 - patch.theta1) <= 10:
            # the angle at which the text is normally located
            angle = (patch.theta2 + patch.theta1) / 2.
            # new distance to the pie center
            x = patch.r * 1.2 * np.cos(angle * np.pi / 180)
            y = patch.r * 1.2 * np.sin(angle * np.pi / 180)
            # move text to new position
            txt.set_position((x, y))
        
    axes[i // 2, i % 2].set_title(column, fontsize='14', fontweight='normal') # adds title to each subplot

# call plt.legend() with the new values
plt.legend(handles=[freq_patch, regular_patch, non_freq_patch], title='Order Frequency \n(avg. days between orders)', bbox_to_anchor=(1.5, 1.8))

fig.suptitle('Customer Order Frequency \nby Region', fontsize='20', fontweight='bold')

# Export chart to the visualizations folder using savefig() function

plt.savefig(os.path.join(path, '04 Analysis','Visualizations', 'pie_reg_freq_flag.png'), bbox_inches='tight') # saves png w/out cutting off x,yaxis labels
plt.show() 

### --- STILL NO DIFFERENCE BETWEEN REGION AND ORDERING HABITS

<a id='25'></a> <br>
> ## D. Age and Family Status

In [None]:
ict_final_df.head()

In [None]:
# create multiple dfs of dependants/user_id/age and cols relating to order habits

df_deps_age_dow = pd.DataFrame(ict_final_df, columns=['order_id', 'user_id', 'dependants', 'age', 'orders_day_of_week']).copy() # use copy() to avoid 'copy of a slice' error
df_deps_age_hod = pd.DataFrame(ict_final_df, columns=['order_id', 'user_id', 'dependants', 'age', 'order_hour_of_day']).copy() # use copy() to avoid 'copy of a slice' error
df_deps_age_prices = pd.DataFrame(ict_final_df, columns=['order_id', 'user_id', 'dependants', 'age', 'prices']).copy() # use copy() to avoid 'copy of a slice' error
df_deps_age_total = pd.DataFrame(ict_final_df, columns=['order_id', 'user_id', 'dependants', 'age', 'order_total']).copy() # use copy() to avoid 'copy of a slice' error
df_deps_age_prior = pd.DataFrame(ict_final_df, columns=['user_id', 'dependants', 'age', 'prior_order_median']).copy() # use copy() to avoid 'copy of a slice' error
df_deps_age_max = pd.DataFrame(ict_final_df, columns=['user_id', 'dependants', 'age', 'max_order']).copy() # use copy() to avoid 'copy of a slice' error

In [None]:
# check result

df_deps_age_dow.shape

In [None]:
df_deps_age_dow.head()

In [None]:
# remove all dupes

df_deps_age_dow = df_deps_age_dow.drop_duplicates()

df_deps_age_dow.shape

In [None]:
# find mode of orders_dow

df_deps_age_dow['most_ordered_day'] = df_deps_age_dow.groupby(['dependants', 'age'])['orders_day_of_week'].transform(lambda x: x.mode().iloc[0])

In [None]:
# check result

df_deps_age_dow.head()

In [None]:
# remove all dupes

df_deps_age_dow = df_deps_age_dow.drop_duplicates(subset=['dependants', 'age'])

df_deps_age_dow.shape

In [None]:
df_deps_age_hod.shape

In [None]:
df_deps_age_hod.head()

In [None]:
# remove all dupes

df_deps_age_hod = df_deps_age_hod.drop_duplicates()

df_deps_age_hod.shape

In [None]:
# find mode of orders_hod

df_deps_age_hod['most_ordered_hour'] = df_deps_age_hod.groupby(['dependants', 'age'])['order_hour_of_day'].transform(lambda x: x.mode().iloc[0])

In [None]:
# check result

df_deps_age_hod.head()

In [None]:
# remove all dupes

df_deps_age_hod = df_deps_age_hod.drop_duplicates(subset=['dependants', 'age'])

df_deps_age_hod.shape

In [None]:
df_deps_age_prices.shape

In [None]:
df_deps_age_prices.head()

In [None]:
# find avg. price_per_item

df_deps_age_prices['avg_price_per_item'] = df_deps_age_prices.groupby(['dependants', 'age'])['prices'].transform('mean')

In [None]:
df_deps_age_prices.head()

In [None]:
# remove all dupes

df_deps_age_prices = df_deps_age_prices.drop_duplicates(subset=['dependants', 'age'])

df_deps_age_prices.shape

In [None]:
df_deps_age_total.shape

In [None]:
df_deps_age_total.head()

In [None]:
# remove all dupes

df_deps_age_total = df_deps_age_total.drop_duplicates()

df_deps_age_total.shape

In [None]:
# find avg order_total and add as new col

df_deps_age_total['avg_order'] = df_deps_age_total.groupby(['dependants', 'age'])['order_total'].transform('mean')

In [None]:
df_deps_age_total.head()

In [None]:
# remove all dupes

df_deps_age_total = df_deps_age_total.drop_duplicates(subset=['dependants', 'age'])

df_deps_age_total.shape

In [None]:
df_deps_age_prior.shape

In [None]:
df_deps_age_prior.head()

In [None]:
# remove all dupes

df_deps_age_prior = df_deps_age_prior.drop_duplicates()

df_deps_age_prior.shape

In [None]:
# find median days_since_prior and add as new col

df_deps_age_prior['avg_days_between_orders'] = df_deps_age_prior.groupby(['dependants', 'age'])['prior_order_median'].transform('mean')

In [None]:
df_deps_age_prior.head()

In [None]:
# remove all dupes

df_deps_age_prior = df_deps_age_prior.drop_duplicates(subset=['dependants', 'age'])

df_deps_age_prior.shape

In [None]:
df_deps_age_max.shape

In [None]:
df_deps_age_max.head()

In [None]:
# remove all dupes

df_deps_age_max = df_deps_age_max.drop_duplicates()

df_deps_age_max.shape

In [None]:
# find avg. max_order

df_deps_age_max['avg_max_order'] = df_deps_age_max.groupby(['dependants', 'age'])['max_order'].transform('mean')

In [None]:
df_deps_age_max.head()

In [None]:
# remove all dupes

df_deps_age_max = df_deps_age_max.drop_duplicates(subset=['dependants', 'age'])

df_deps_age_max.shape

In [None]:
# create df_list

df_list=[df_deps_age_dow, df_deps_age_hod, df_deps_age_prior, df_deps_age_max, df_deps_age_total, df_deps_age_prices]

In [None]:
# check all df_list shape

for df in df_list:
  print(df.shape)

In [None]:
# check all df_list head()

for df in df_list:
  print(df.head())

In [None]:
# remove all unnecessary cols

df_deps_age_dow = df_deps_age_dow.drop(columns=['order_id', 'user_id', 'orders_day_of_week'], axis=1)
df_deps_age_hod = df_deps_age_hod.drop(columns=['order_id', 'user_id', 'order_hour_of_day'], axis=1)
df_deps_age_prior = df_deps_age_prior.drop(columns=['user_id', 'prior_order_median'], axis=1)
df_deps_age_max = df_deps_age_max.drop(columns=['user_id', 'max_order'], axis=1)
df_deps_age_total = df_deps_age_total.drop(columns=['order_id', 'user_id', 'order_total'], axis=1)
df_deps_age_prices = df_deps_age_prices.drop(columns=['order_id', 'user_id', 'prices'], axis=1)

<a id='26'></a> <br>
>> ## a. Most Frequent Day

In [None]:
sc_deps_age_dow = sns.lmplot(x='age', y='most_ordered_day', data=df_deps_age_dow, hue='dependants', fit_reg=False)

# Edit the labels, title, legend
plt.xlabel('Age', fontsize=14)
plt.ylabel('Order Day', fontsize=14)
plt.title('Age/Family Status Ordering Habits \n\nMOST FREQUENT DAY')

# Export chart to the visualizations folder using savefig() function

sc_deps_age_dow.figure.savefig(os.path.join(path, '04 Analysis','Visualizations', 'sc_deps_age_dow.png'), bbox_inches='tight') # saves png w/out cutting off x,yaxis labels

<a id='27'></a> <br>
>> ## b. Most Frequent Hour

In [None]:
sc_deps_age_hod = sns.lmplot(x='age', y='most_ordered_hour', data=df_deps_age_hod, hue='dependants', fit_reg=False)

# Edit the labels, title, legend
plt.xlabel('Age', fontsize=14)
plt.ylabel('Hour of Day', fontsize=14)
plt.title('Age/Family Status Ordering Habits \n\nMOST FREQUENT HOUR')

# Export chart to the visualizations folder using savefig() function

sc_deps_age_hod.figure.savefig(os.path.join(path, '04 Analysis','Visualizations', 'sc_deps_age_hod.png'), bbox_inches='tight') # saves png w/out cutting off x,yaxis labels

<a id='28'></a> <br>
>> ## c. Frequency of Orders

In [None]:
sc_deps_age_prior = sns.lmplot(x='age', y='avg_days_between_orders', data=df_deps_age_prior, hue='dependants', fit_reg=False)

# Edit the labels, title, legend
plt.xlabel('Age', fontsize=14)
plt.ylabel('Avg. Days b/w Orders', fontsize=14)
plt.title('Age/Family Status Ordering Habits \n\nFREQUENCY OF ORDERS')

# Export chart to the visualizations folder using savefig() function

sc_deps_age_prior.figure.savefig(os.path.join(path, '04 Analysis','Visualizations', 'sc_deps_age_prior.png'), bbox_inches='tight') # saves png w/out cutting off x,yaxis labels

<a id='29'></a> <br>
>> ## d. Avg. Number of Orders

In [None]:
# create scatter plots to check for connection b/w age and fam_status

sc_age_deps_max = sns.lmplot(x='age', y='avg_max_order', data=df_deps_age_max, hue='dependants', fit_reg=False)

# Edit the labels, title, legend
plt.xlabel('Age', fontsize=14)
plt.ylabel('Avg. # of Orders', fontsize=14)
plt.title('Age/Family Status Ordering Habits \n\nAVG # ORDERS')

# Export chart to the visualizations folder using savefig() function

sc_age_deps_max.figure.savefig(os.path.join(path, '04 Analysis','Visualizations', 'sc_age_deps_max.png'), bbox_inches='tight') # saves png w/out cutting off x,yaxis labels

<a id='30'></a> <br>
>> ## e. Avg. Order Total

In [None]:
sc_deps_age_total = sns.lmplot(x='age', y='avg_order', data=df_deps_age_total, hue='dependants', fit_reg=False)

# Edit the labels, title, legend
plt.xlabel('Age', fontsize=14)
plt.ylabel('Order Total', fontsize=14)
plt.title('Age/Family Status Ordering Habits \n\nAVG. ORDER TOTAL')

# Export chart to the visualizations folder using savefig() function

sc_deps_age_total.figure.savefig(os.path.join(path, '04 Analysis','Visualizations', 'sc_deps_age_total.png'), bbox_inches='tight') # saves png w/out cutting off x,yaxis labels

<a id='31'></a> <br>
>> ## f. Avg. Price Per Item

In [None]:
sc_deps_age_prices = sns.lmplot(x='age', y='avg_price_per_item', data=df_deps_age_prices, hue='dependants', fit_reg=False)

# Edit the labels, title, legend
plt.xlabel('Age', fontsize=14)
plt.ylabel('Price Per Item', fontsize=14)
plt.title('Age/Family Status Ordering Habits \n\nAVG. PRICE PER ITEM')

# Export chart to the visualizations folder using savefig() function

sc_deps_age_prices.figure.savefig(os.path.join(path, '04 Analysis','Visualizations', 'sc_deps_age_prices.png'), bbox_inches='tight') # saves png w/out cutting off x,yaxis labels

### - There is NO connection between Age and Fam_Status in terms of ordering habits.

<a id='32'></a> <br>
> ## E. Demographics
> ## What different classificatinos does the demographic information suggest?

### - Age
- 18-81   
### - State
- 50 US States + D.C.
### - Gender
- Male or Female
### - Family Status
- single, divorced/widowed, living with parents and siblings, married
### - Income
- 25,903-593,901
### - Region
- Northeast, South, Midwest, West
### - Number of Dependants
- 0-3

<a id='33'></a> <br>
> ## F. Customer Profile Ordering Habits

In [None]:
# First change outliers to NaN to correct skew of distribution

ict_final_df.loc[ict_final_df['prices'] >100, 'prices'] = np.nan

In [None]:
# create df of cust_prof and ordering stats

df_prof_order_habits = pd.DataFrame(ict_final_df, columns=['order_id', 'user_id', 'customer_profile', 'prices', 'product_name', 'department', 'orders_day_of_week', 'order_hour_of_day',
                                                          'days_since_prior_order', 'max_order', 'order_total']).copy() # use copy() to avoid 'copy of a slice' error

In [None]:
# check result

df_prof_order_habits.shape

<a id='34'></a> <br>
>> ## a. Price per Item

In [None]:
# Group the DataFrame by profile and calculate the average price per item

avg_price = df_prof_order_habits.groupby('customer_profile')['prices'].mean()

# Create a bar plot
plt.barh(avg_price.index, avg_price.values, edgecolor='black')

# Add bar labels
for i, bar in enumerate(plt.gca().patches):
    plt.text(bar.get_width(), bar.get_y() + bar.get_height() / 4, f'${bar.get_width():.2f}', ha='right', va='center', fontsize=14)

plt.xlabel('Average Price Per Item')
plt.ylabel('Customer Profile', fontsize=12, fontweight='bold')
plt.title('Average Price Per Item \nby Customer Profile', fontsize=14, fontweight='bold')

# Export chart to the visualizations folder using savefig() function

plt.savefig(os.path.join(path, '04 Analysis','Visualizations', 'barh_prof_ppprod_avg.png'), 
                                bbox_inches='tight') # keeps entire fig in the png

<a id='35'></a> <br>
>> ## b. Products

In [None]:
# creates a ct of customer_profile and product

ct_prod_val_cnts = pd.crosstab(df_prof_order_habits['product_name'], df_prof_order_habits['customer_profile'])

In [None]:
# check results

ct_prod_val_cnts.shape

In [None]:
ct_prod_val_cnts = ct_prod_val_cnts.sort_values(by='Middle aged married parent', ascending=False)

In [None]:
# check result

ct_prod_val_cnts.head()

In [None]:
ct_prod_val_cnts = ct_prod_val_cnts.head(10)

In [None]:
ct_prod_val_cnts.shape

In [None]:
norm = plt.Normalize(0, ct_prod_val_cnts[column].values.max()) # uses values from 'count' col. as the color range
colors = plt.cm.Blues_r(norm(ct_prod_val_cnts[column].values)) # creates a blue color map
    
plt.suptitle('Top 10 Products Purchased by Customer Profile', fontsize=18, fontweight='bold')
for column in ct_prod_val_cnts.columns:
    # Create a bar plot
    fig = ct_prod_val_cnts[column].sort_values(ascending=True).plot.barh(linewidth=1, edgecolor='black', color=colors)
    
    # adds commas
    for c in fig.containers:
        fig.bar_label(c, fmt='  {:,.0f} \n units sold', fontweight='medium', fontsize=10)
        
        # Add a title and y_axix label to the bar plot
        plt.title(column, fontsize=14)
        plt.ylabel('Products')
                
        # adds n% whitespace to margins to keep labels inside the box
        fig.margins(x=0.3)

    # Export chart to the visualizations folder using savefig() function
    plt.savefig(os.path.join(path, '04 Analysis','Visualizations', 'barh_prof_fav_prod.png'), bbox_inches='tight') # keeps entire fig in the png
    plt.tight_layout()
    plt.show()

<a id='36'></a> <br>
>> ## c. Department

In [None]:
# creates a ct of customer_profile and department

ct_dept_val_cnts = pd.crosstab(df_prof_order_habits['department'], df_prof_order_habits['customer_profile'])

In [None]:
# check result

ct_dept_val_cnts.shape

In [None]:
ct_dept_val_cnts.head()

In [None]:
ct_dept_val_cnts = ct_dept_val_cnts.sort_values(by='Middle aged married parent', ascending=False)

In [None]:
ct_dept_val_cnts.head()

In [None]:
ct_dept_val_cnts = ct_dept_val_cnts.head(10)

In [None]:
ct_dept_val_cnts

In [None]:
norm = plt.Normalize(0, ct_dept_val_cnts[column].values.max()) # uses values from 'count' col. as the color range
colors = plt.cm.Reds_r(norm(ct_dept_val_cnts[column].values)) # creates a blue color map
    
plt.suptitle('Top 10 Departments by Customer Profile', fontsize=18, fontweight='bold')
for column in ct_dept_val_cnts.columns:
    # Create a bar plot
    fig = ct_dept_val_cnts[column].sort_values(ascending=True).plot.barh(linewidth=1, edgecolor='black', color=colors)
    
    # adds commas
    for c in fig.containers:
        fig.bar_label(c, fmt='  {:,.0f} \n  visits', fontweight='medium', fontsize=10)
        
        # Add a title and y_axix label to the bar plot
        plt.title(column, fontsize=14)
        plt.ylabel('Departments')
                
        # adds n% whitespace to margins to keep labels inside the box
        fig.margins(x=0.3)

    # Export chart to the visualizations folder using savefig() function
    plt.savefig(os.path.join(path, '04 Analysis','Visualizations', 'barh_prof_fav_dept.png'), bbox_inches='tight') # keeps entire fig in the png
    plt.tight_layout()
    plt.show()

<a id='37'></a> <br>
>> ## d. Orders_day_of_week

In [None]:
# remove all dupes on order_id level to calculate dow, hod, prior and avg. order total

df_prof_order_habits = df_prof_order_habits.drop_duplicates(subset='order_id')

df_prof_order_habits.shape

In [None]:
# creates a ct of customer_profile and order_dow

ct_dow_val_cnts = pd.crosstab(df_prof_order_habits['orders_day_of_week'], df_prof_order_habits['customer_profile'])

In [None]:
# check result

ct_dow_val_cnts.shape

In [None]:
ct_dow_val_cnts

In [None]:
ct_dow_val_cnts.reset_index(inplace=True)

In [None]:
# create a map of numbers to day of week

days = {0:'Saturday', 1:'Sunday', 2:'Monday', 3:'Tuesday', 4:'Wednesday', 5:'Thursday', 6:'Friday'}

# add new Weekday_name col to df

ct_dow_val_cnts['weekday_name'] = ct_dow_val_cnts['orders_day_of_week'].map(days)

In [None]:
ct_dow_val_cnts

In [None]:
# drop orders_day_of_week 

ct_dow_val_cnts = ct_dow_val_cnts.drop('orders_day_of_week', axis=1)

ct_dow_val_cnts.head()

In [None]:
# reset index by weekday_name 

ct_dow_val_cnts = ct_dow_val_cnts.reindex(columns=['weekday_name', 'Middle aged married parent', 'Middle aged single adult', 'Older married parent', 'Older single adult', 'Young married parent', 'Young single adult', 'Young single parent'])

ct_dow_val_cnts.head()

In [None]:
# set index by 'weekday_name'

ct_dow_val_cnts = ct_dow_val_cnts.set_index('weekday_name')

In [None]:
ct_dow_val_cnts.head()

In [None]:
norm = plt.Normalize(0, ct_dow_val_cnts[column].values.max()) # uses values from 'count' col. as the color range
colors = plt.cm.Greens_r(norm(ct_dow_val_cnts[column].values)) # creates a green color map in reverse (_r)
    
plt.suptitle('Most Ordered Day of Week by Customer Profile', fontsize=18, fontweight='bold')
for column in ct_dow_val_cnts.columns:
    # Create a bar plot
    fig = ct_dow_val_cnts[column].sort_values(ascending=True).plot.barh(linewidth=1, edgecolor='black', color=colors)
    
    # adds commas
    for c in fig.containers:
        fig.bar_label(c, fmt='  {:,.0f} \n orders', fontweight='medium', fontsize=10)
        
        # Add a title and y_axix label to the bar plot
        plt.title(column, fontsize=14)
        plt.ylabel('Day of Week')
                
        # adds n% whitespace to margins to keep labels inside the box
        fig.margins(x=0.3)

    # Export chart to the visualizations folder using savefig() function
    plt.savefig(os.path.join(path, '04 Analysis','Visualizations', 'barh_prof_freq_dow.png'), bbox_inches='tight') # keeps entire fig in the png
    plt.tight_layout()
    plt.show()

<a id='38'></a> <br>
>> ## e. Order_hour_of_day

In [None]:
# creates a ct of customer_profile and order_hod

ct_hod_val_cnts = pd.crosstab(df_prof_order_habits['order_hour_of_day'], df_prof_order_habits['customer_profile'])

In [None]:
# check result

ct_hod_val_cnts.shape

In [None]:
ct_hod_val_cnts

In [None]:
ct_hod_val_cnts.reset_index(inplace=True)

In [None]:
# create a map of numbers to hour of day

hours = {0:'12am', 1:'1am', 2:'2am', 3:'3am', 4:'4am', 5:'5am', 6:'6am', 7:'7am', 8:'8am', 9:'9am', 10:'10am', 11:'11am', 12:'12pm', 13:'1pm', 14:'2pm', 15:'3pm', 16:'4pm', 17:'5pm', 18:'6pm', 19:'7pm', 20:'8pm', 21:'9pm', 22:'10pm', 23:'11pm'}

# add new Weekday_name col to df

ct_hod_val_cnts['hour_of_day'] = ct_hod_val_cnts['order_hour_of_day'].map(hours)

In [None]:
ct_hod_val_cnts

In [None]:
# drop order_hour_of_day 

ct_hod_val_cnts = ct_hod_val_cnts.drop('order_hour_of_day', axis=1)

ct_hod_val_cnts.head()

In [None]:
# reindex by hour_of_day 

ct_hod_val_cnts = ct_hod_val_cnts.reindex(columns=['hour_of_day', 'Middle aged married parent', 'Middle aged single adult', 'Older married parent', 'Older single adult', 'Young married parent', 'Young single adult', 'Young single parent'])

ct_hod_val_cnts.head()

In [None]:
# set_index to hour_of_day 

ct_hod_val_cnts = ct_hod_val_cnts.set_index('hour_of_day')

In [None]:
ct_hod_val_cnts.head()

In [None]:
norm = plt.Normalize(0, ct_hod_val_cnts[column].values.max()) # uses values from 'count' col. as the color range
colors = plt.cm.Oranges_r(norm(ct_hod_val_cnts[column].values)) # creates an orange color map in reverse (_r)
    
plt.suptitle('Most Ordered Hour of Day by Customer Profile', fontsize=18, fontweight='bold')
for column in ct_hod_val_cnts.columns:
    # Create a bar plot
    fig = ct_hod_val_cnts[column].sort_values(ascending=True).plot.barh(linewidth=1, edgecolor='black', color=colors)
    
    # adds commas
    for c in fig.containers:
        fig.bar_label(c, fmt=' {:,.0f} orders', fontweight='medium', fontsize=10)
        
        # Add a title and y_axix label to the bar plot
        plt.title(column, fontsize=14)
        plt.ylabel('Hour of Day')
                
        # adds n% whitespace to margins to keep labels inside the box
        fig.margins(x=0.3)

    # Export chart to the visualizations folder using savefig() function
    plt.savefig(os.path.join(path, '04 Analysis','Visualizations', 'barh_prof_freq_hod.png'), bbox_inches='tight') # keeps entire fig in the png
    plt.tight_layout()
    plt.show()

<a id='39'></a> <br>
>> ## f. Days between orders

In [None]:
# Group the DataFrame by profile and calculate the median prior_order

med_prior = df_prof_order_habits.groupby('customer_profile')['days_since_prior_order'].median()

# Create a bar plot
plt.barh(med_prior.index, med_prior.values, edgecolor='black')

# Add bar labels
for i, bar in enumerate(plt.gca().patches):
    plt.text(bar.get_width(), bar.get_y() + bar.get_height() / 4, f'{bar.get_width():.0f} days', ha='right', va='center', fontsize=14)

plt.xlabel('Days Between Orders')
plt.ylabel('Customer Profile', fontsize=12, fontweight='bold')
plt.title('Average Days Between Orders \nby Customer Profile', fontsize=14, fontweight='bold')

# Export chart to the visualizations folder using savefig() function

plt.savefig(os.path.join(path, '04 Analysis','Visualizations', 'barh_prof_prior_median.png'), 
                                bbox_inches='tight') # keeps entire fig in the png

<a id='40'></a> <br>
>> ## g. Order Total

In [None]:
# Group the DataFrame by profile and calculate the average order total

avg_order = df_prof_order_habits.groupby('customer_profile')['order_total'].mean()

# Create a bar plot
plt.barh(avg_order.index, avg_order.values, edgecolor='black')

# Add bar labels
for i, bar in enumerate(plt.gca().patches):
    plt.text(bar.get_width(), bar.get_y() + bar.get_height() / 4, f'${bar.get_width():.2f}', ha='right', va='center', fontsize=14)

plt.xlabel('Order Total')
plt.ylabel('Customer Profile', fontsize=12, fontweight='bold')
plt.title('Average Order Total \nby Customer Profile', fontsize=14, fontweight='bold')

# Export chart to the visualizations folder using savefig() function

plt.savefig(os.path.join(path, '04 Analysis','Visualizations', 'barh_prof_avg_order.png'), 
                                bbox_inches='tight') # keeps entire fig in the png

<a id='41'></a> <br>
>> ## h. Max_order

In [None]:
# remove all dupes on user_id level to calculate avg. max order

df_prof_order_habits = df_prof_order_habits.drop_duplicates(subset='user_id')

df_prof_order_habits.shape

In [None]:
# Group the DataFrame by profile and calculate the average max_order

avg_max_order = df_prof_order_habits.groupby('customer_profile')['max_order'].mean()

# Create a bar plot
plt.barh(avg_max_order.index, avg_max_order.values, edgecolor='black')

# Add bar labels
for i, bar in enumerate(plt.gca().patches):
    plt.text(bar.get_width(), bar.get_y() + bar.get_height() / 4, f'{bar.get_width():.0f} orders', ha='right', va='center', fontsize=14)

plt.xlabel('Total Orders')
plt.ylabel('Customer Profile', fontsize=12, fontweight='bold')
plt.title('Average Number of Orders \nby Customer Profile', fontsize=14, fontweight='bold')

# Export chart to the visualizations folder using savefig() function

plt.savefig(os.path.join(path, '04 Analysis','Visualizations', 'barh_prof_avg_max_order.png'), 
                                bbox_inches='tight') # keeps entire fig in the png