In [1]:
import pandas as pd
import seaborn as sns

import matplotlib.pyplot as plt

In [2]:
## load cleaned data
segment_revenue_and_count = pd.read_pickle("segment_revenue_and_count.pickle")
segment_count = pd.read_pickle("segment_count.pickle")

In [3]:
segment_count.sort_values("count", ascending = False)

Unnamed: 0,Age,Gender,Income,Children,Housing,Relationship,count
636,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,64538
634,Unknown,Female,Unknown,Unknown,Unknown,Unknown,34395
734,Unknown,Female,Middle,,House,Married,22247
1438,65+,Female,Low,,House,Married,19944
2239,Unknown,Female,Unknown,Unknown,House,Unknown,18974
...,...,...,...,...,...,...,...
3450,65+,Male,High,Baby,House,Single,1
3463,55-64,Unknown,VeryHigh,Youth,Flat,Unknown,1
3480,18-24,Male,VeryHigh,,Flat,Married,1
3481,18-24,Male,Middle,,Unknown,Unknown,1


In [4]:
segment_revenue_and_count.head()

Unnamed: 0,Age,Gender,Income,Children,Housing,Relationship,PurchaseCategoryA,PurchaseCategoryB,revenue,total_purchases
0,35-44,Male,Middle,Youth,Flat,Married,Home,Beds & Mattresses,733.45,6
1,Unknown,Female,Low,Youth,House,Unknown,Home,Beds & Mattresses,10745.972,67
2,45-54,Unknown,VeryHigh,,House,Separated,Home,Beds & Mattresses,2204.83,16
3,25-34,Unknown,Middle,Teenager,House,Married,Home,Beds & Mattresses,25627.18,145
4,35-44,Unknown,Middle,,Flat,Unknown,Home,Beds & Mattresses,3405.24,21


In [5]:
## avg revenue per purchase
segment_revenue_and_count["avg_spend"] = segment_revenue_and_count.revenue/segment_revenue_and_count.total_purchases

In [6]:
segment_revenue_and_count.head()

Unnamed: 0,Age,Gender,Income,Children,Housing,Relationship,PurchaseCategoryA,PurchaseCategoryB,revenue,total_purchases,avg_spend
0,35-44,Male,Middle,Youth,Flat,Married,Home,Beds & Mattresses,733.45,6,122.241667
1,Unknown,Female,Low,Youth,House,Unknown,Home,Beds & Mattresses,10745.972,67,160.387642
2,45-54,Unknown,VeryHigh,,House,Separated,Home,Beds & Mattresses,2204.83,16,137.801875
3,25-34,Unknown,Middle,Teenager,House,Married,Home,Beds & Mattresses,25627.18,145,176.739172
4,35-44,Unknown,Middle,,Flat,Unknown,Home,Beds & Mattresses,3405.24,21,162.154286


In [7]:
def get_spend_stats_by_col(spend_df,
                           col_name, 
                           filter_col = None, 
                           filter_value = None, 
                           remove_unknowns = False, 
                           include_segment_count = False,
                           sort_order = ["revenue","avg_spend"]):
    
    """
    This function takes the spend data and generates revenue and total purchase stats based on a field
    We determine:
        1. Grouped total revenue for categories in that field
        2. Grouped total purchases for categories in that field
        3. The percentage share of revenue for a group
        4. The percentage share of total purchases for a group
        5. The total number (and %) of customers in a group
        6. The revenue generated per customers in that group
    
    Parameters:
        spend_df: spend data (segment_revenue_and_count)
        col_name: the column to generate stats on (eg.: Age, Income, Gender etc.)
        filter_col and filter_value: To filter the spend data on a purchase subc4ategory and generated some fine grained analysis
        remove_unknowns: Boolean to consider unknowns in the dataset or not
        include_segment_count: Boolean to include segment counts from segment_count dataframe
        sort_order: return the dataframe sorted in some specific order
    
    """
    
    if filter_col:
        spend_df = spend_df[spend_df[filter_col] == filter_value]
        
    if remove_unknowns:
        spend_df = spend_df[spend_df[col_name] != 'Unknown']
        spend_df = spend_df[spend_df[col_name] != 'Other']
    
    ## group spend data on column and find total revenue and puchases
    col_groups = spend_df.groupby(col_name).agg({'revenue': 'sum', 
                                                 'total_purchases': 'sum'})
    
    col_groups = col_groups[col_groups.revenue > 0]
    
    ## column of avg spend per group per transaction
    col_groups["avg_spend"] = col_groups.revenue/col_groups.total_purchases

    if sort_order != "index":
        col_groups = col_groups.sort_values(sort_order, ascending = False)
    
    ## percentage of revenue share and total purchases share for each group
    col_groups["pct_rev"] = 100*col_groups["revenue"]/col_groups["revenue"].sum()   
    col_groups["pct_total_purchases"] = 100*col_groups["total_purchases"]/col_groups["total_purchases"].sum()
    
    col_groups = col_groups.round(2)
    
    ## add count of customers in a group
    if include_segment_count:
        col_count = segment_count.groupby(col_name).size().reset_index().rename(columns = {0:"cust_count"})
        col_groups = pd.merge(col_groups, col_count, left_index=True, right_on=col_name)
        col_groups.set_index(col_name, inplace = True)
        
        col_groups["pct_cust_count"] = 100*col_groups["cust_count"]/col_groups["cust_count"].sum()
        
        col_groups["rev_per_customer"] = col_groups["revenue"]/col_groups["cust_count"]
    
    display(col_groups.style.background_gradient(cmap="Blues"))

In [8]:
def spending_category_stats_by_col(spend_df, 
                                   col_name, 
                                   transpose = True):
    """
    This function takes the spend data and generates top purchase categories for individual groups in a field
    We create a pivot table which depicts a cross tabulation of groups in a field with all spend categories:

    Parameters:
        spend_df: spend data (segment_revenue_and_count)
        col_name: the column to generate stats on (eg.: Age, Income, Gender etc.)
        
    """
    
    ## change data type to str  
    spend_df[col_name] = spend_df[col_name].astype(str)
    spend_df["PurchaseCategoryA"] = spend_df["PurchaseCategoryA"].astype(str)
    
    ## remove unknowns
    filtered_segment_revenue_and_count = spend_df[(spend_df[col_name] != 'Unknown') & (spend_df["PurchaseCategoryA"] != 'Unknown')]
    
    ## create pivot table 
    col_name_purchase_pivot = pd.pivot_table(filtered_segment_revenue_and_count, columns = col_name, index = "PurchaseCategoryA", aggfunc={"revenue":np.sum, "total_purchases":np.sum})
    
    ## add revenue share for Gender
    if col_name == "Gender":
        col_name_purchase_pivot["female/male rev"] = col_name_purchase_pivot.revenue.Female / col_name_purchase_pivot.revenue.Male
    
    ## add revenue share for Housing catagories
    if col_name == "Housing":
        col_name_purchase_pivot["House/Flat rev"] = col_name_purchase_pivot.revenue.House / col_name_purchase_pivot.revenue.Flat
    
    
    display(col_name_purchase_pivot.style.background_gradient(cmap="Blues"))
    
    return col_name_purchase_pivot

In [9]:
def spending_category_stats_by_col_transpose(spend_df, 
                                             col_name):
    
    """
    This function serves the same object as the spending_category_stats_by_col but inverts the pivot index and columns

    Parameters:
        spend_df: spend data (segment_revenue_and_count)
        col_name: the column to generate stats on (eg.: Age, Income, Gender etc.)
        
    """
    
    ## change data type to str  
    spend_df[col_name] = spend_df[col_name].astype(str)
    spend_df["PurchaseCategoryA"] = spend_df["PurchaseCategoryA"].astype(str)
    
    ## remove unknowns
    filtered_segment_revenue_and_count = spend_df[(spend_df[col_name] != 'Unknown') & (spend_df["PurchaseCategoryA"] != 'Unknown')]
    
    ## create pivot table 
    col_name_purchase_pivot = pd.pivot_table(filtered_segment_revenue_and_count, index = col_name, columns = "PurchaseCategoryA", aggfunc={"revenue":np.sum, "total_purchases":np.sum})

    display(col_name_purchase_pivot.style.background_gradient(cmap="Blues"))

In [10]:
def spending_category_stats_by_col_rev_per_cust(spending_category_pivot, 
                                                col_name):
    """
    This function manipulates the pivot table generated by spending_category_stats_by_col method
    and determines the revenue per customer for each field-group * spend category.
    
    Parameters:
        spending_category_pivot: pivot table genearted by spending_category_stats_by_col function
        col_name: the column to generate stats on (eg.: Age, Income, Gender etc.)
        
    """
    
    ## find count of customers in segment
    col_count = segment_count.groupby(col_name).size().reset_index().rename(columns = {0:"cust_count"})
    
    ## merge count with groups in the spending pivot 
    spending_category_pivot = pd.merge(spending_category_pivot.revenue.T, col_count, left_index=True, right_on=col_name)
    spending_category_pivot.set_index(col_name, inplace = True)
    
    ## the percentage of cystomers in a category
    spending_category_pivot["pct_cust_count"] = 100*spending_category_pivot["cust_count"]/spending_category_pivot["cust_count"].sum()
    
    ## revenue per customer in a category
    spending_category_pivot[spending_category_pivot.columns[:-2]] = spending_category_pivot.iloc[:, :-2]/np.array(spending_category_pivot[["cust_count"]])
    spending_category_pivot = spending_category_pivot.iloc[:, :-2]
    spending_category_pivot = spending_category_pivot.T
    
    display(spending_category_pivot.style.background_gradient(cmap="Blues"))

# EDA

## Highest revenue generating product categories

In [11]:
get_spend_stats_by_col(segment_revenue_and_count, "PurchaseCategoryA")

## The first five spend categories (Home, Garden, Beauty, Electronics and Spas & Country House) 
## account for over 60% of the total revenue.

## Home, Garden, Beauty and Electronics are also the top four areas of footfall for consumers. 
## If we don't consider "Other" category, the next most transacted category is Fashion 
## (which is another common cusumer spend in general)

## The average ticket size is highest in Beach Holidays and Wedding, which also makes sense.
## But both of them combined account for less than 1% revenue and transactions.

## Another high ticket size and revenue generating categories are UK City Breaks and European City Breaks.

## In fact, every category that has over 1% of total revenue and pct_total_purchases are all commonly transacted areas.

Unnamed: 0_level_0,revenue,total_purchases,avg_spend,pct_rev,pct_total_purchases
PurchaseCategoryA,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Home,46606606.3,698896,66.69,23.01,13.48
Garden,31731150.9,549247,57.77,15.67,10.6
Beauty,18362390.62,466295,39.38,9.07,9.0
Electronics,17152439.75,478333,35.86,8.47,9.23
Spas & Country House,13089396.26,93329,140.25,6.46,1.8
UK City Breaks,12750782.25,111729,114.12,6.3,2.16
European City Breaks,9224984.61,70656,130.56,4.56,1.36
Restaurants & Bars,7657227.81,241948,31.65,3.78,4.67
Activities,6765654.63,273882,24.7,3.34,5.28
Fashion,5772682.84,346211,16.67,2.85,6.68


### Top sub-categories in Home Spending

In [12]:
get_spend_stats_by_col(segment_revenue_and_count, "PurchaseCategoryB", filter_col="PurchaseCategoryA", filter_value="Home")

## Beds & Mattresses primarily dominates Home spending. The most revenue and footfall comes from this sub-category.

## Bedding is another area many purchases take place, however, the average ticket size is small.

## Relatively less transactions take place to purchase Sofas & Futons, but this area has highest ticket size.

## Dining Furniture, Bedroom Furniture and Chairs & Stools & Beanbags are three more areas of high ticket size

## Canvas/Art/Accessories and Cleaning & Home Maintenance also receive high footfall for Home spending

Unnamed: 0_level_0,revenue,total_purchases,avg_spend,pct_rev,pct_total_purchases
PurchaseCategoryB,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Beds & Mattresses,27158579.37,163993,165.61,58.27,23.46
Sofas & Futons,3523261.25,8283,425.36,7.56,1.19
Bedding,3375152.25,149480,22.58,7.24,21.39
Chairs & Stools & Beanbags,2295070.56,28598,80.25,4.92,4.09
Bedroom Furniture,1788195.51,18731,95.47,3.84,2.68
Cleaning & Home Maintenance,1775733.41,60601,29.3,3.81,8.67
Storage Solutions,1268211.23,30797,41.18,2.72,4.41
Canvas/Art/Accessories,870657.23,74581,11.67,1.87,10.67
Soft Furnishings,727407.25,36309,20.03,1.56,5.2
Laundry,711271.96,16396,43.38,1.53,2.35


### Top sub-categories in Garden Spending

In [13]:
get_spend_stats_by_col(segment_revenue_and_count, "PurchaseCategoryB", filter_col="PurchaseCategoryA", filter_value="Garden")

## Garden Furniture provides the highest revenue in Garden Spending, over 50% of total revenue

## Plants & Flowers has the highest footfall though, which is also explained by its avg ticket size,
## indicating more affordability

Unnamed: 0_level_0,revenue,total_purchases,avg_spend,pct_rev,pct_total_purchases
PurchaseCategoryB,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Garden Furniture,17029006.97,68573,248.33,53.67,12.48
Garden Leisure,5117384.32,118608,43.15,16.13,21.59
Plants & Flowers,3782637.12,159318,23.74,11.92,29.01
Storage Solutions,1688849.62,31335,53.9,5.32,5.71
Garden Lighting,1490336.85,74345,20.05,4.7,13.54
Garden Tools,1365630.15,65641,20.8,4.3,11.95
BBQ & Picnic,1017973.72,19862,51.25,3.21,3.62
Indoor Plants,205457.24,9681,21.22,0.65,1.76
Other,33874.9,1884,17.98,0.11,0.34


### Top sub-categories in Electronics Spending

In [14]:
get_spend_stats_by_col(segment_revenue_and_count, "PurchaseCategoryB", remove_unknowns=True, filter_col="PurchaseCategoryA", filter_value="Electronics")

Unnamed: 0_level_0,revenue,total_purchases,avg_spend,pct_rev,pct_total_purchases
PurchaseCategoryB,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Accessories,4229225.9,182606,23.16,24.67,38.18
Computing,3271715.57,32162,101.73,19.09,6.72
Tablets,2651540.72,29797,88.99,15.47,6.23
Camera & Photo,1357962.2,42034,32.31,7.92,8.79
Software,1253550.18,41274,30.37,7.31,8.63
Headphones,1033343.92,48291,21.4,6.03,10.1
White Goods,946423.9,27420,34.52,5.52,5.73
Gaming,735983.27,11125,66.16,4.29,2.33
Mobile Phones,633218.94,4665,135.74,3.69,0.98
Phone Accessories,410593.74,41185,9.97,2.4,8.61


## Does Age Influence Revenue and Total Transactions?

### Without unknown

In [15]:
get_spend_stats_by_col(segment_revenue_and_count, "Age", remove_unknowns=True, sort_order="index", include_segment_count = True)

## 1) average revenue per transaction is roughly the same in each age group.
## This shows that customers are equally engaged irrespective of the age group.

## 2) The total revenue, as well as number of transactions are the highest for 45-55 (not considering unknown)
## These are the most actively engaged customers

## 3) Despite point 2), roughly 65% of the total revenue is coming from 25-65+ group.

## 4) Overall, there is no major indication that Age has any influence on generating revenue except for kids
## and all age-groups are equally important for revenue.

## 5) The average revenue per customer of Age 18-14 is also the lowest

## 6) Although 45-55 age group has the highest revenue, the rev_per_customer is the highest for 65+, which also has
## the least number of customers

Unnamed: 0_level_0,revenue,total_purchases,avg_spend,pct_rev,pct_total_purchases,cust_count,pct_cust_count,rev_per_customer
Age,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
18-24,5791287.28,145401,39.83,4.17,4.09,714,16.331199,8111.046611
25-34,27680683.21,707211,39.14,19.94,19.88,833,19.053065,33230.111897
35-44,28377308.78,727407,39.01,20.44,20.45,818,18.709973,34691.086528
45-54,29592209.31,747204,39.6,21.32,21.01,748,17.108875,39561.777152
55-64,23644179.32,604612,39.11,17.03,17.0,663,15.164684,35662.412247
65+,23729479.17,624715,37.98,17.09,17.57,596,13.632205,39814.562366


### Top Spending Categories by Age

In [16]:
col_groups = spending_category_stats_by_col(segment_revenue_and_count, "Age")

## Irrespective of age group, Home spending remains the dominating category for revenue and total purchases

## However, home spending typically decreases with age

## Instead, there is a transitional shift in spending towards Garden.

## Beauty spending also decreases with increasing Age

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Unnamed: 0_level_0,revenue,revenue,revenue,revenue,revenue,revenue,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases
Age,18-24,25-34,35-44,45-54,55-64,65+,18-24,25-34,35-44,45-54,55-64,65+
PurchaseCategoryA,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
Activities,199582.262812,948392.575514,1032243.402178,1017368.788901,752224.121107,749683.367317,7796,39472,43475,40577,29532,29638
Adult,16313.782497,89663.467153,83433.51782,82024.466163,59151.016126,65010.710154,954,5272,4936,4694,3521,3854
Beach Holidays,42628.294998,231079.289977,247814.097637,220727.166654,158427.124986,151130.096651,252,1386,1500,1335,941,941
Beauty,571655.166765,2727339.614331,2882635.959776,2734494.11054,1969740.131393,1824134.731267,14060,67401,71717,68521,51337,49566
Children and Baby,151903.161139,819192.59284,813831.615023,734482.416479,616023.182817,623563.930721,7795,43212,43314,39442,33076,34152
Electronics,501611.20204,2486699.518114,2554553.294086,2384285.100315,1864150.270206,1900697.595781,13517,68116,69348,66651,53068,55617
Entertainment,101822.145558,492835.405503,560715.397791,544040.510821,414340.211625,396293.467695,6540,32488,35618,33675,25708,24714
European City Breaks,332395.117618,1348086.626421,1240520.915126,1336181.38314,1013262.257514,908357.935181,2611,10467,9290,10234,7712,6811
Fashion,154485.123971,728066.362911,768513.169615,835413.994114,691600.58207,744250.312176,9218,43596,46041,50132,41614,45007
Fitness,129643.499994,610396.099286,633122.876099,656462.531859,503634.135997,531584.149668,2595,12560,12632,12790,10127,10187


In [17]:
spending_category_stats_by_col_transpose(segment_revenue_and_count, "Age")

## With Age, Spending decreases in almost all categories

## It reaches the peak at 35-44 and 45-54 age grooups

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Unnamed: 0_level_0,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases
PurchaseCategoryA,Activities,Adult,Beach Holidays,Beauty,Children and Baby,Electronics,Entertainment,European City Breaks,Fashion,Fitness,Food and Drink,Garden,Gifts,Healthcare,Holiday Extras,Home,Home Essentials,Ireland,Jewellery,Learning,Legal Services,Leisure,Long Haul & Cruises,Motoring,Other,Personalised Gifts,Pets,Restaurants & Bars,Spas & Country House,Sports & Adventure,Tradesmen,UK City Breaks,UK Seaside,Wedding,Activities,Adult,Beach Holidays,Beauty,Children and Baby,Electronics,Entertainment,European City Breaks,Fashion,Fitness,Food and Drink,Garden,Gifts,Healthcare,Holiday Extras,Home,Home Essentials,Ireland,Jewellery,Learning,Legal Services,Leisure,Long Haul & Cruises,Motoring,Other,Personalised Gifts,Pets,Restaurants & Bars,Spas & Country House,Sports & Adventure,Tradesmen,UK City Breaks,UK Seaside,Wedding
Age,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2,Unnamed: 24_level_2,Unnamed: 25_level_2,Unnamed: 26_level_2,Unnamed: 27_level_2,Unnamed: 28_level_2,Unnamed: 29_level_2,Unnamed: 30_level_2,Unnamed: 31_level_2,Unnamed: 32_level_2,Unnamed: 33_level_2,Unnamed: 34_level_2,Unnamed: 35_level_2,Unnamed: 36_level_2,Unnamed: 37_level_2,Unnamed: 38_level_2,Unnamed: 39_level_2,Unnamed: 40_level_2,Unnamed: 41_level_2,Unnamed: 42_level_2,Unnamed: 43_level_2,Unnamed: 44_level_2,Unnamed: 45_level_2,Unnamed: 46_level_2,Unnamed: 47_level_2,Unnamed: 48_level_2,Unnamed: 49_level_2,Unnamed: 50_level_2,Unnamed: 51_level_2,Unnamed: 52_level_2,Unnamed: 53_level_2,Unnamed: 54_level_2,Unnamed: 55_level_2,Unnamed: 56_level_2,Unnamed: 57_level_2,Unnamed: 58_level_2,Unnamed: 59_level_2,Unnamed: 60_level_2,Unnamed: 61_level_2,Unnamed: 62_level_2,Unnamed: 63_level_2,Unnamed: 64_level_2,Unnamed: 65_level_2,Unnamed: 66_level_2,Unnamed: 67_level_2,Unnamed: 68_level_2
18-24,199582.262812,16313.782497,42628.294998,571655.166765,151903.161139,501611.20204,101822.145558,332395.117618,154485.123971,129643.499994,78113.929114,759445.078524,42.491667,56617.911512,3133.69612,1377698.061548,19801.221333,590.151666,134740.470336,37942.435055,2133.468335,3876.257334,13569.918333,4225.415999,42808.784155,22534.690001,11697.598378,219261.56231,352382.694751,11462.098326,4109.766674,379877.200824,51820.438301,1362.185,7796,954,252,14060,7795,13517,6540,2611,9218,2595,4834,13462,1,2695,834,20014,1218,6,3367,3158,152,371,501,408,12298,2165,881,6974,2583,103,187,3395,448,8
25-34,948392.575514,89663.467153,231079.289977,2727339.614331,819192.59284,2486699.518114,492835.405503,1348086.626421,728066.362911,610396.099286,353441.068914,3604759.733737,23.97,267998.845743,11910.937225,6859596.054384,114812.981828,3660.113329,649654.625752,180596.279971,9589.790025,18224.297171,66596.414995,21746.204334,212848.168914,114821.662472,58604.130577,999179.190197,1655772.225523,48614.82998,23945.396047,1682879.728184,238145.52983,1509.478334,39472,5272,1386,67401,43212,68116,32488,10467,43596,12560,22243,62691,2,13175,3586,100195,6782,36,16077,15015,679,1711,2348,2012,58686,11514,4329,31486,12180,455,1068,14899,2064,8
35-44,1032243.402178,83433.51782,247814.097637,2882635.959776,813831.615023,2554553.294086,560715.397791,1240520.915126,768513.169615,633122.876099,368412.550193,3670512.607949,7.0,291159.850906,13612.807263,6943222.272806,110278.40816,3831.126664,662757.209342,184786.770801,11696.897694,18521.423842,51643.720995,21336.274668,215608.421352,115123.9798,57527.604196,1129626.632771,1715364.323228,43577.12664,24806.229218,1675674.178188,229263.642528,1573.48,43475,4936,1500,71717,43314,69348,35618,9290,46041,12632,22743,63437,1,13789,3716,100702,6553,35,15981,15268,822,1808,2370,2015,59660,11346,4131,34591,12422,414,1098,14651,1969,14
45-54,1017368.788901,82024.466163,220727.166654,2734494.11054,734482.416479,2384285.100315,544040.510821,1336181.38314,835413.994114,656462.531859,457250.375294,4785793.307737,82.248334,297967.060395,17015.963767,6558487.988271,108385.559661,6455.449998,604453.624683,170040.387626,14420.276707,22596.90218,63435.54933,26598.369663,226155.588903,121564.710631,67921.377121,1192475.875767,1934054.054491,48527.306637,27265.285052,2023232.371349,272362.553169,186.656667,40577,4694,1335,68521,39442,66651,33675,10234,50132,12790,26448,79694,3,14835,4247,98218,6319,48,14652,14009,973,2008,2397,2533,63036,12233,4889,37073,13892,460,1182,17653,2338,13
55-64,752224.121107,59151.016126,158427.124986,1969740.131393,616023.182817,1864150.270206,414340.211625,1013262.257514,691600.58207,503634.135997,384568.344296,4263698.33785,42.491667,232465.542889,17543.463779,5058082.147631,94960.041994,2080.115,431309.662181,120180.19289,11956.708694,17735.611672,43786.643327,23595.071833,187341.794267,99650.342498,56143.253702,929525.876532,1707435.298387,47519.223305,22035.462711,1584357.030573,263470.683157,2142.945,29532,3521,941,51337,33076,53068,25708,7712,41614,10127,22250,73749,1,11728,3487,78190,5523,28,10920,9984,826,1559,2021,2274,51967,10008,4033,29907,11957,410,952,14004,2188,10
65+,749683.367317,65010.710154,151130.096651,1824134.731267,623563.930721,1900697.595781,396293.467695,908357.935181,744250.312176,531584.149668,398085.779361,4522173.294036,43.325,228286.656661,13727.544701,5108653.617573,106888.389592,4210.63,442054.758002,118284.24636,12854.090032,18233.101172,46522.531995,27960.904001,195690.41218,100974.069332,58563.588414,841730.369473,1727122.432801,43731.131642,24249.965045,1530084.538219,264618.3125,29.186667,29638,3854,941,49566,34152,55617,24714,6811,45007,10187,23012,83231,1,12172,3309,81753,6225,36,11778,9803,878,1627,1905,2758,54184,10314,4155,27747,12288,425,1046,13355,2223,3


### Top Spending Categories for different Age groups in terms of Revenue per customer

In [18]:
spending_category_stats_by_col_rev_per_cust(col_groups, "Age")

## Irrespective of age group, Home spending generated the highest rev per customer

## Avg rev increases in Garden activities for people in higher age groupds

<IPython.core.display.Javascript object>

Age,18-24,25-34,35-44,45-54,55-64,65+
Activities,279.526979,1138.526501,1261.91125,1360.118702,1134.576352,1257.857999
Adult,22.848435,107.639216,101.996966,109.658377,89.217219,109.078373
Beach Holidays,59.703494,277.40611,302.95122,295.089795,238.95494,253.573988
Beauty,800.637488,3274.117184,3524.004841,3655.740789,2970.950424,3060.628744
Children and Baby,212.749525,983.424481,994.904175,981.928364,929.145072,1046.248206
Electronics,702.536698,2985.233515,3122.925787,3187.546926,2811.689699,3189.089926
Entertainment,142.608047,591.639142,685.471146,727.326886,624.947529,664.921926
European City Breaks,465.53938,1618.351292,1516.529236,1786.338748,1528.299031,1524.090495
Fashion,216.36572,874.029247,939.502652,1116.863628,1043.138133,1248.742135
Fitness,181.573529,732.768427,773.988846,877.623706,759.629164,891.919714


## What is the Gender-wise share on Revenue and Total Transactions

### Without Unknowns

In [19]:
get_spend_stats_by_col(segment_revenue_and_count, "Gender", remove_unknowns=True, sort_order="index", include_segment_count = True)

## Females spend less per transaction but they have significantly higher revenue share and footfall

## The difference between in the total female and male customers is just 200. However, the difference in revenue
## generated is dispropotionately higher. This requirs further investigation into what categories female
## tend to spend the most

## The average revenue for females is roughly 2.5 times that of Males

Unnamed: 0_level_0,revenue,total_purchases,avg_spend,pct_rev,pct_total_purchases,cust_count,pct_cust_count,rev_per_customer
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Female,97184006.5,2623816,37.04,74.22,77.13,1783,52.986627,54505.892597
Male,33748503.75,777886,43.38,25.78,22.87,1582,47.013373,21332.808944


### Top Spending Categories by Gender

In [20]:
col_groups = spending_category_stats_by_col(segment_revenue_and_count, "Gender")

## Males spend most towards Electronics, Garden and Home

## Females spend most towards Home, over 3 times as such as Males

## Other top categories are Beauty, Garden, Fashion, Electronics.

## Females spend 7.5 times more in weddings than Males
## Females also have a stronger liking for Pets than Males
## They spend more in Learning than Males

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Unnamed: 0_level_0,revenue,revenue,total_purchases,total_purchases,female/male rev
Gender,Female,Male,Female,Male,Unnamed: 5_level_1
PurchaseCategoryA,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
Activities,3394040.478109,899206.556615,143603,31927,3.774484
Adult,198920.354817,199184.358807,11442,11838,0.998675
Beach Holidays,713529.956591,303621.720985,4482,1667,2.350062
Beauty,9363768.923966,2160166.934989,250249,52628,4.334743
Children and Baby,2873771.766524,700081.0098,159093,33247,4.104913
Electronics,7229769.533058,3835696.462735,206986,103519,1.884865
Entertainment,1839810.618086,504878.989767,115224,33188,3.644063
European City Breaks,3965633.889327,1574257.548063,29826,11880,2.51905
Fashion,3172444.115224,671999.583007,192878,38319,4.720902
Fitness,2005762.871645,879743.904808,43795,14447,2.279939


In [21]:
spending_category_stats_by_col_transpose(segment_revenue_and_count, "Gender")

## For all categories except Adult, female spending and footfall is higher

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Unnamed: 0_level_0,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases
PurchaseCategoryA,Activities,Adult,Beach Holidays,Beauty,Children and Baby,Electronics,Entertainment,European City Breaks,Fashion,Fitness,Food and Drink,Garden,Gifts,Healthcare,Holiday Extras,Home,Home Essentials,Ireland,Jewellery,Learning,Legal Services,Leisure,Long Haul & Cruises,Motoring,Other,Personalised Gifts,Pets,Restaurants & Bars,Spas & Country House,Sports & Adventure,Tradesmen,UK City Breaks,UK Seaside,Wedding,Activities,Adult,Beach Holidays,Beauty,Children and Baby,Electronics,Entertainment,European City Breaks,Fashion,Fitness,Food and Drink,Garden,Gifts,Healthcare,Holiday Extras,Home,Home Essentials,Ireland,Jewellery,Learning,Legal Services,Leisure,Long Haul & Cruises,Motoring,Other,Personalised Gifts,Pets,Restaurants & Bars,Spas & Country House,Sports & Adventure,Tradesmen,UK City Breaks,UK Seaside,Wedding
Gender,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2,Unnamed: 24_level_2,Unnamed: 25_level_2,Unnamed: 26_level_2,Unnamed: 27_level_2,Unnamed: 28_level_2,Unnamed: 29_level_2,Unnamed: 30_level_2,Unnamed: 31_level_2,Unnamed: 32_level_2,Unnamed: 33_level_2,Unnamed: 34_level_2,Unnamed: 35_level_2,Unnamed: 36_level_2,Unnamed: 37_level_2,Unnamed: 38_level_2,Unnamed: 39_level_2,Unnamed: 40_level_2,Unnamed: 41_level_2,Unnamed: 42_level_2,Unnamed: 43_level_2,Unnamed: 44_level_2,Unnamed: 45_level_2,Unnamed: 46_level_2,Unnamed: 47_level_2,Unnamed: 48_level_2,Unnamed: 49_level_2,Unnamed: 50_level_2,Unnamed: 51_level_2,Unnamed: 52_level_2,Unnamed: 53_level_2,Unnamed: 54_level_2,Unnamed: 55_level_2,Unnamed: 56_level_2,Unnamed: 57_level_2,Unnamed: 58_level_2,Unnamed: 59_level_2,Unnamed: 60_level_2,Unnamed: 61_level_2,Unnamed: 62_level_2,Unnamed: 63_level_2,Unnamed: 64_level_2,Unnamed: 65_level_2,Unnamed: 66_level_2,Unnamed: 67_level_2,Unnamed: 68_level_2
Female,3394040.478109,198920.354817,713529.956591,9363768.923966,2873771.766524,7229769.533058,1839810.618086,3965633.889327,3172444.115224,2005762.871645,1580505.521822,14854577.843752,112.286667,1012460.786265,56716.756927,22884967.391408,384784.413739,168197.945463,1871459.592881,639455.916044,42896.343108,71050.18953,193224.034315,86769.976167,763593.95264,440716.370397,259590.617566,3926328.110333,6076269.76528,156118.030911,78034.457157,5939765.661416,929355.951723,9602.076001,143603,11442,4482,250249,159093,206986,115224,29826,192878,43795,96956,273376,6,52040,14383,360936,22976,1443,50476,53401,3002,6714,8431,8517,211273,45065,18573,126552,44958,1628,3401,54039,8057,35
Male,899206.556615,199184.358807,303621.720985,2160166.934989,700081.0098,3835696.462735,504878.989767,1574257.548063,671999.583007,879743.904808,410206.185519,5643908.438351,97.963334,313450.54049,16907.713847,7562520.869495,177692.142493,67583.746595,822735.519349,123857.231369,15731.356039,25733.398005,79181.810997,35759.407,253832.190415,121675.694832,46116.582916,1079622.903542,2517363.544093,67923.797632,50283.398938,2243234.905568,342968.279793,1279.056667,31927,11838,1667,52628,33247,103519,33188,11880,38319,14447,22371,92428,3,14769,4205,103798,10055,564,17779,9993,1071,2287,2814,3117,70256,11311,3556,33342,17323,561,2160,18645,2815,3


### Top Spending Categories for different Genders in terms of Revenue per customer

In [22]:
spending_category_stats_by_col_rev_per_cust(col_groups, "Gender")

## Irrespective of the Gender, Home and Garden spending generated the highest rev per customer

## Females spend much more in Beauty than Males

<IPython.core.display.Javascript object>

Gender,Female,Male
Activities,1903.556073,568.398582
Adult,111.564977,125.906674
Beach Holidays,400.185057,191.922706
Beauty,5251.693171,1365.465825
Children and Baby,1611.762068,442.529083
Electronics,4054.834287,2424.586892
Entertainment,1031.862377,319.13969
European City Breaks,2224.135664,995.105909
Fashion,1779.2732,424.778497
Fitness,1124.937113,556.096021


## Does Income translate into Revenue and Total Transactions?

### Without Unknowns

In [23]:
get_spend_stats_by_col(segment_revenue_and_count, "Income",remove_unknowns=True, sort_order="index", include_segment_count = True)

Unnamed: 0_level_0,revenue,total_purchases,avg_spend,pct_rev,pct_total_purchases,cust_count,pct_cust_count,rev_per_customer
Income,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
High,29642422.43,705471,42.02,15.95,14.83,1343,26.567755,22071.796299
Low,84461846.44,2257105,37.42,45.44,47.45,1403,27.754698,60200.888411
Middle,63574250.3,1601121,39.71,34.2,33.66,1369,27.082097,46438.458948
VeryHigh,8209065.51,193197,42.49,4.42,4.06,940,18.59545,8733.048415


In [24]:
## Low and middle income categories collectively generate 80% of the revenue

## The fraction (or total number) of VeryHigh income category inviduals isn't that far off from other categories.
## Yet, the revenue, total purchases and total rev per customers is significantly lower

## Low income category have the highest revenue per customer, followed by Mid.

### Top Spending Categories by Income

In [25]:
col_groups = spending_category_stats_by_col(segment_revenue_and_count, "Income")
## Males spend most towards Electronics, Garden and Home

## Females spend most towards Home, over 3 times as such as Males

## Other top categories are Beauty, Garden, Fashion, Electronics.

## Females spend 7.5 times more in weddings than Males
## Females also have a stronger liking for Pets than Males
## They spend more in Learning than Males

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Unnamed: 0_level_0,revenue,revenue,revenue,revenue,total_purchases,total_purchases,total_purchases,total_purchases
Income,High,Low,Middle,VeryHigh,High,Low,Middle,VeryHigh
PurchaseCategoryA,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
Activities,1092249.660285,2689234.171638,2205837.646106,287794.943694,43151.0,110507.0,89587.0,10649.0
Adult,59649.919163,284495.541767,163126.15197,15350.668332,3408.0,16908.0,9521.0,878.0
Beach Holidays,256521.154975,615925.405939,470618.024964,71091.958324,1414.0,3778.0,2780.0,396.0
Beauty,2748666.642142,7638498.775726,5692295.392583,737557.240009,62857.0,207527.0,140760.0,17049.0
Children and Baby,689494.453941,2525937.9904,1644527.612217,173774.284164,36480.0,135086.0,88786.0,9234.0
Electronics,2025198.156702,8002709.716191,4986232.62206,533613.983685,55918.0,223744.0,139931.0,14873.0
Entertainment,601688.957792,1408255.014745,1167513.435377,170216.373565,34974.0,94292.0,72326.0,9803.0
European City Breaks,1486193.048423,3438362.369447,2871440.809175,433928.254945,11413.0,25953.0,21878.0,3377.0
Fashion,723480.316364,2579316.364787,1790338.732527,188922.637122,43203.0,155769.0,106788.0,11315.0
Fitness,648712.630651,1877584.68559,1399165.793356,175333.628834,11422.0,39932.0,26967.0,3088.0


In [26]:
spending_category_stats_by_col_transpose(segment_revenue_and_count, "Income")

## For all categories except Adult, female spending and footfall is higher

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Unnamed: 0_level_0,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases
PurchaseCategoryA,Activities,Adult,Beach Holidays,Beauty,Children and Baby,Electronics,Entertainment,European City Breaks,Fashion,Fitness,Food and Drink,Garden,Gifts,Healthcare,Holiday Extras,Home,Home Essentials,Ireland,Jewellery,Learning,Legal Services,Leisure,Long Haul & Cruises,Motoring,Other,Personalised Gifts,Pets,Restaurants & Bars,Spas & Country House,Sports & Adventure,Tradesmen,UK City Breaks,UK Seaside,Wedding,Activities,Adult,Beach Holidays,Beauty,Children and Baby,Electronics,Entertainment,European City Breaks,Fashion,Fitness,Food and Drink,Garden,Gifts,Healthcare,Holiday Extras,Home,Home Essentials,Ireland,Jewellery,Learning,Legal Services,Leisure,Long Haul & Cruises,Motoring,Other,Personalised Gifts,Pets,Restaurants & Bars,Spas & Country House,Sports & Adventure,Tradesmen,UK City Breaks,UK Seaside,Wedding
Income,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2,Unnamed: 24_level_2,Unnamed: 25_level_2,Unnamed: 26_level_2,Unnamed: 27_level_2,Unnamed: 28_level_2,Unnamed: 29_level_2,Unnamed: 30_level_2,Unnamed: 31_level_2,Unnamed: 32_level_2,Unnamed: 33_level_2,Unnamed: 34_level_2,Unnamed: 35_level_2,Unnamed: 36_level_2,Unnamed: 37_level_2,Unnamed: 38_level_2,Unnamed: 39_level_2,Unnamed: 40_level_2,Unnamed: 41_level_2,Unnamed: 42_level_2,Unnamed: 43_level_2,Unnamed: 44_level_2,Unnamed: 45_level_2,Unnamed: 46_level_2,Unnamed: 47_level_2,Unnamed: 48_level_2,Unnamed: 49_level_2,Unnamed: 50_level_2,Unnamed: 51_level_2,Unnamed: 52_level_2,Unnamed: 53_level_2,Unnamed: 54_level_2,Unnamed: 55_level_2,Unnamed: 56_level_2,Unnamed: 57_level_2,Unnamed: 58_level_2,Unnamed: 59_level_2,Unnamed: 60_level_2,Unnamed: 61_level_2,Unnamed: 62_level_2,Unnamed: 63_level_2,Unnamed: 64_level_2,Unnamed: 65_level_2,Unnamed: 66_level_2,Unnamed: 67_level_2,Unnamed: 68_level_2
High,1092249.660285,59649.919163,256521.154975,2748666.642142,689494.453941,2025198.156702,601688.957792,1486193.048423,723480.316364,648712.630651,519338.598177,5445731.53835,,319531.951862,20880.183723,5533738.800472,100736.21399,6109.333332,494349.955405,154886.131788,16965.697048,22711.468176,60053.581659,23956.867334,204230.772179,109177.018464,57668.372062,1431388.164417,2323544.243206,64785.221644,22744.062708,2092902.856679,285048.576507,87.88,43151.0,3408.0,1414.0,62857.0,36480.0,55918.0,34974.0,11413.0,43203.0,11422.0,26640.0,81740.0,,14291.0,4397.0,83340.0,5737.0,47.0,11197.0,12721.0,1123.0,1995.0,2569.0,2369.0,57660.0,10819.0,4145.0,43167.0,15621.0,571.0,1000.0,17790.0,2286.0,6.0
Low,2689234.171638,284495.541767,615925.405939,7638498.775726,2525937.9904,8002709.716191,1408255.014745,3438362.369447,2579316.364787,1877584.68559,1072314.44942,11668640.574314,102.748334,789137.149049,40713.335308,21960078.292033,377123.426816,9663.724994,2064488.57456,524626.736698,31251.133746,53897.976357,152323.084318,80330.183832,698925.283339,371178.090602,195824.485798,2714063.126677,4736437.375802,114133.490927,81713.492506,4878120.251959,776619.893086,9819.521668,110507.0,16908.0,3778.0,207527.0,135086.0,223744.0,94292.0,25953.0,155769.0,39932.0,71876.0,225432.0,4.0,41482.0,11005.0,328936.0,22402.0,103.0,53248.0,43747.0,2232.0,5146.0,6836.0,7534.0,192756.0,37087.0,14206.0,88648.0,35315.0,1141.0,3561.0,43957.0,6914.0,41.0
Middle,2205837.646106,163126.15197,470618.024964,5692295.392583,1644527.612217,4986232.62206,1167513.435377,2871440.809175,1790338.732527,1399165.793356,981149.498639,10702041.918495,149.768334,614533.92462,36958.974639,13665856.443385,230149.116323,10853.491661,1232153.264854,353308.310741,31561.78341,50747.882181,126563.302992,58850.270432,488618.585587,265712.291169,141642.091207,2525923.191322,4477268.43571,115094.660932,55607.78594,4384157.555763,632844.588295,1406.941667,89587.0,9521.0,2780.0,140760.0,88786.0,139931.0,72326.0,21878.0,106788.0,26967.0,56658.0,177627.0,6.0,31135.0,9350.0,205357.0,13565.0,98.0,29377.0,29282.0,2155.0,4550.0,5147.0,5668.0,135278.0,26473.0,10209.0,80472.0,32257.0,1112.0,2442.0,38286.0,5281.0,12.0
VeryHigh,287794.943694,15350.668332,71091.958324,737557.240009,173774.284164,533613.983685,170216.373565,433928.254945,188922.637122,175333.628834,156156.265019,1577931.246868,9.99,104729.403844,4398.462106,1377499.088541,30442.255099,1585.846667,119302.008832,41763.339586,3936.178341,5896.813999,17325.871665,6620.849333,55955.901037,28850.021994,16165.047226,432707.109752,753278.096283,23831.669989,6771.551674,579760.274747,76489.341638,74.9,10649.0,878.0,396.0,17049.0,9234.0,14873.0,9803.0,3377.0,11315.0,3088.0,7435.0,25608.0,1.0,3988.0,1012.0,21323.0,1686.0,16.0,2794.0,3368.0,267.0,520.0,750.0,654.0,15608.0,2998.0,1197.0,12621.0,4742.0,183.0,290.0,4874.0,595.0,5.0


### Top Spending Categories for different Income groups in terms of Revenue per customer

In [27]:
spending_category_stats_by_col_rev_per_cust(col_groups, "Income")

## rev per customer is highest for VeryHigh income groups

## Garden and Home is a top choice for all groups

<IPython.core.display.Javascript object>

Income,High,Low,Middle,VeryHigh
Activities,813.290886,1916.774178,1611.276586,306.164834
Adult,44.415428,202.77658,119.15716,16.330498
Beach Holidays,191.006072,439.005991,343.767732,75.629743
Beauty,2046.661684,5444.403974,4157.995174,784.635362
Children and Baby,513.3987,1800.383457,1201.261952,184.86626
Electronics,1507.965865,5703.998372,3642.244428,567.674451
Entertainment,448.018584,1003.745556,852.822086,181.081248
European City Breaks,1106.621778,2450.721575,2097.473199,461.625803
Fashion,538.704629,1838.429341,1307.771171,200.981529
Fitness,483.032487,1338.264209,1022.034911,186.525137


## Children-wise Revenue and Total Transactions

### Without Unknowns

In [28]:
get_spend_stats_by_col(segment_revenue_and_count, "Children", remove_unknowns=True, sort_order="index", include_segment_count = True)

Unnamed: 0_level_0,revenue,total_purchases,avg_spend,pct_rev,pct_total_purchases,cust_count,pct_cust_count,rev_per_customer
Children,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Baby,5417206.01,136735,39.62,2.91,2.87,923,18.259149,5869.128938
,113640747.1,2928895,38.8,61.13,61.57,1166,23.066271,97462.047256
Preschool,6903361.32,175404,39.36,3.71,3.69,908,17.962413,7602.820837
Teenager,51133677.44,1288502,39.68,27.51,27.09,1119,22.136499,45695.869026
Youth,8792592.8,227358,38.67,4.73,4.78,939,18.575668,9363.7836


In [29]:
## The highest revenue is generated by individuals with no children. It would be good to consider
## relationship status together to get more insights

In [30]:
filtered_segment_revenue_and_count = segment_revenue_and_count[segment_revenue_and_count.Relationship != 'Unknown']
filtered_segment_revenue_and_count = filtered_segment_revenue_and_count[filtered_segment_revenue_and_count.Children != 'Unknown']
filtered_segment_revenue_and_count["Children"] = filtered_segment_revenue_and_count["Children"].astype(str)
filtered_segment_revenue_and_count["Relationship"] = filtered_segment_revenue_and_count["Relationship"].astype(str)


Relationship_child_pivot = pd.pivot_table(filtered_segment_revenue_and_count, index = "Children", columns = "Relationship", aggfunc={"revenue":np.sum, "total_purchases":np.sum})
display(Relationship_child_pivot.style.background_gradient(cmap="Blues"))

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Unnamed: 0_level_0,revenue,revenue,revenue,revenue,total_purchases,total_purchases,total_purchases,total_purchases
Relationship,Cohabiting,Married,Separated,Single,Cohabiting,Married,Separated,Single
Children,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
Baby,653730.259328,922257.795126,957501.635622,2410418.35509,16693,22929,24392,60702
,6800300.988496,48393603.913516,18565768.138519,24065482.907595,180237,1215132,500827,625620
Preschool,995539.353768,1510569.048699,1159040.9392,2648475.495559,24864,37856,29077,67750
Teenager,4620847.935372,22324678.756662,5643880.483822,11325230.629564,121181,546469,144251,291466
Youth,1263372.663095,2518851.969014,1326531.333161,2695091.305218,32645,64355,34099,70415


In [31]:
## Irrespective of the Relationship status, folks spend the most and transact the most who have no children

In [32]:
Relationship_child_pivot_transpose = pd.pivot_table(filtered_segment_revenue_and_count, columns = "Children", index = "Relationship", aggfunc={"revenue":np.sum, "total_purchases":np.sum})
display(Relationship_child_pivot_transpose.style.background_gradient(cmap="Blues"))

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Unnamed: 0_level_0,revenue,revenue,revenue,revenue,revenue,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases
Children,Baby,None,Preschool,Teenager,Youth,Baby,None,Preschool,Teenager,Youth
Relationship,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
Cohabiting,653730.259328,6800300.988496,995539.353768,4620847.935372,1263372.663095,16693,180237,24864,121181,32645
Married,922257.795126,48393603.913516,1510569.048699,22324678.756662,2518851.969014,22929,1215132,37856,546469,64355
Separated,957501.635622,18565768.138519,1159040.9392,5643880.483822,1326531.333161,24392,500827,29077,144251,34099
Single,2410418.35509,24065482.907595,2648475.495559,11325230.629564,2695091.305218,60702,625620,67750,291466,70415


### Top Spending Categories by Children

In [33]:
col_groups = spending_category_stats_by_col(segment_revenue_and_count, "Children")

## 

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Unnamed: 0_level_0,revenue,revenue,revenue,revenue,revenue,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases
Children,Baby,None,Preschool,Teenager,Youth,Baby,None,Preschool,Teenager,Youth
PurchaseCategoryA,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
Activities,182110.909289,3662297.944609,245318.24023,1844740.063973,340649.263622,7170.0,146233.0,10099.0,75686.0,14706.0
Adult,19096.660661,316638.732259,22522.027827,138369.768322,25995.092163,1129.0,18643.0,1306.0,8104.0,1533.0
Beach Holidays,53690.644996,824892.56659,76466.004326,400942.128293,58165.199997,307.0,4966.0,412.0,2306.0,377.0
Beauty,582007.260798,10128009.709161,691676.613317,4582925.405731,832399.061453,13860.0,260394.0,16943.0,116248.0,20748.0
Children and Baby,150021.44211,2981657.84814,208029.076881,1397753.561623,296272.411968,7910.0,160764.0,10936.0,74424.0,15552.0
Electronics,489655.781459,9246183.717579,626723.583097,4357065.051593,828126.34491,13445.0,263233.0,16911.0,118872.0,22005.0
Entertainment,97209.783795,2045113.283005,129595.823988,911428.637607,164326.253084,6447.0,128756.0,8483.0,57012.0,10697.0
European City Breaks,309510.226617,4906527.611172,352562.014932,2279495.417667,381829.211602,2336.0,37092.0,2706.0,17609.0,2878.0
Fashion,134659.613966,3298851.561817,175039.556136,1437538.953126,235968.365755,8153.0,198313.0,10558.0,85836.0,14215.0
Fitness,116078.409994,2506432.285391,152389.633179,1131149.060869,194747.348998,2413.0,49457.0,3039.0,22629.0,3871.0


In [34]:
spending_category_stats_by_col_transpose(segment_revenue_and_count, "Children")

## 

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Unnamed: 0_level_0,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases
PurchaseCategoryA,Activities,Adult,Beach Holidays,Beauty,Children and Baby,Electronics,Entertainment,European City Breaks,Fashion,Fitness,Food and Drink,Garden,Gifts,Healthcare,Holiday Extras,Home,Home Essentials,Ireland,Jewellery,Learning,Legal Services,Leisure,Long Haul & Cruises,Motoring,Other,Personalised Gifts,Pets,Restaurants & Bars,Spas & Country House,Sports & Adventure,Tradesmen,UK City Breaks,UK Seaside,Wedding,Activities,Adult,Beach Holidays,Beauty,Children and Baby,Electronics,Entertainment,European City Breaks,Fashion,Fitness,Food and Drink,Garden,Gifts,Healthcare,Holiday Extras,Home,Home Essentials,Ireland,Jewellery,Learning,Legal Services,Leisure,Long Haul & Cruises,Motoring,Other,Personalised Gifts,Pets,Restaurants & Bars,Spas & Country House,Sports & Adventure,Tradesmen,UK City Breaks,UK Seaside,Wedding
Children,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2,Unnamed: 24_level_2,Unnamed: 25_level_2,Unnamed: 26_level_2,Unnamed: 27_level_2,Unnamed: 28_level_2,Unnamed: 29_level_2,Unnamed: 30_level_2,Unnamed: 31_level_2,Unnamed: 32_level_2,Unnamed: 33_level_2,Unnamed: 34_level_2,Unnamed: 35_level_2,Unnamed: 36_level_2,Unnamed: 37_level_2,Unnamed: 38_level_2,Unnamed: 39_level_2,Unnamed: 40_level_2,Unnamed: 41_level_2,Unnamed: 42_level_2,Unnamed: 43_level_2,Unnamed: 44_level_2,Unnamed: 45_level_2,Unnamed: 46_level_2,Unnamed: 47_level_2,Unnamed: 48_level_2,Unnamed: 49_level_2,Unnamed: 50_level_2,Unnamed: 51_level_2,Unnamed: 52_level_2,Unnamed: 53_level_2,Unnamed: 54_level_2,Unnamed: 55_level_2,Unnamed: 56_level_2,Unnamed: 57_level_2,Unnamed: 58_level_2,Unnamed: 59_level_2,Unnamed: 60_level_2,Unnamed: 61_level_2,Unnamed: 62_level_2,Unnamed: 63_level_2,Unnamed: 64_level_2,Unnamed: 65_level_2,Unnamed: 66_level_2,Unnamed: 67_level_2,Unnamed: 68_level_2
Baby,182110.909289,19096.660661,53690.644996,582007.260798,150021.44211,489655.781459,97209.783795,309510.226617,134659.613966,116078.409994,65570.943784,588111.661677,42.491667,55454.478026,1973.800101,1353690.779581,24496.70533,811.441666,124366.536268,37623.237741,1608.247672,3074.074834,12411.903332,3392.153,39784.513796,20662.435305,10736.104719,215975.488155,327023.513084,8045.648325,4495.76251,339235.539832,44562.8383,14.98,7170.0,1129.0,307.0,13860.0,7910.0,13445.0,6447.0,2336.0,8153.0,2413.0,4255.0,10813.0,1.0,2636.0,609.0,19564.0,1436.0,10.0,3088.0,3105.0,117.0,306.0,501.0,335.0,11182.0,2111.0,816.0,6618.0,2415.0,77.0,200.0,2981.0,388.0,1.0
,3662297.944609,316638.732259,824892.56659,10128009.709161,2981657.84814,9246183.717579,2045113.283005,4906527.611172,3298851.561817,2506432.285391,1746762.981867,18829497.426737,168.210001,1136089.416232,64951.791117,25308720.986275,471135.564743,19092.01666,2249002.976214,631279.886075,55527.75214,80667.676694,208611.062982,111540.441599,901834.760829,473717.614112,262066.079034,4440562.396301,7886445.608285,183670.517562,103754.603194,7391408.130029,1162819.233245,4814.708334,146233.0,18643.0,4966.0,260394.0,160764.0,263233.0,128756.0,37092.0,198313.0,49457.0,102101.0,335385.0,7.0,56663.0,15894.0,388623.0,27604.0,176.0,57130.0,52358.0,3838.0,7352.0,9421.0,10688.0,249286.0,47511.0,18841.0,141067.0,56066.0,1846.0,4581.0,64746.0,9836.0,24.0
Preschool,245318.24023,22522.027827,76466.004326,691676.613317,208029.076881,626723.583097,129595.823988,352562.014932,175039.556136,152389.633179,94949.540634,823204.283108,,68499.347855,3254.26349,1711421.225926,29338.749832,2248.963331,160813.03617,45221.205082,2556.014338,4364.945004,32334.443332,4753.780999,50349.466577,29077.63917,13348.841389,258441.253575,411855.903034,14854.389993,6114.881681,401701.098146,54293.734959,41.74,10099.0,1306.0,412.0,16943.0,10936.0,16911.0,8483.0,2706.0,10558.0,3039.0,5614.0,14546.0,,3252.0,958.0,24757.0,1721.0,16.0,3915.0,3793.0,184.0,418.0,685.0,454.0,14185.0,2863.0,1008.0,8091.0,3090.0,121.0,274.0,3580.0,483.0,3.0
Teenager,1844740.063973,138369.768322,400942.128293,4582925.405731,1397753.561623,4357065.051593,911428.637607,2279495.417667,1437538.953126,1131149.060869,706460.324103,7995982.378631,51.805,486498.491766,28570.319219,11931140.408238,181565.374825,5552.241665,1180007.31733,303324.623811,21211.631718,38838.87351,89628.326657,44027.842999,388876.325983,214705.769471,107440.356075,1884054.456513,3158831.66695,96638.414953,44031.592089,3302117.711056,439680.354732,3032.788334,75686.0,8104.0,2306.0,116248.0,74424.0,118872.0,57012.0,17609.0,85836.0,22629.0,43430.0,129923.0,3.0,24216.0,7063.0,173528.0,10696.0,57.0,27540.0,25088.0,1442.0,3529.0,3998.0,4178.0,108072.0,21299.0,7778.0,59648.0,22658.0,818.0,1892.0,29117.0,3773.0,30.0
Youth,340649.263622,25995.092163,58165.199997,832399.061453,296272.411968,828126.34491,164326.253084,381829.211602,235968.365755,194747.348998,115215.020867,1157549.527874,,81390.695496,4200.781849,2232199.224411,31914.617498,507.733332,196103.937669,57135.566104,2811.146677,6308.570671,13280.104331,6043.952334,66885.474957,36753.964171,17708.615076,305047.997624,506371.459648,14636.072659,8440.053354,500478.460085,69646.23829,3485.026667,14706.0,1533.0,377.0,20748.0,15552.0,22005.0,10697.0,2878.0,14215.0,3871.0,7209.0,19740.0,,4129.0,1240.0,32484.0,1933.0,5.0,4943.0,4774.0,196.0,606.0,697.0,570.0,18577.0,3593.0,1314.0,9484.0,3706.0,145.0,346.0,4483.0,596.0,6.0


### Top Spending Categories for different Children groups in terms of Revenue per customer

In [35]:
spending_category_stats_by_col_rev_per_cust(col_groups, "Children")



<IPython.core.display.Javascript object>

Children,Baby,None,Preschool,Teenager,Youth
Activities,197.30326,3140.907328,270.174273,1648.561273,362.778769
Adult,20.689773,271.559805,24.803995,123.654842,27.683804
Beach Holidays,58.169713,707.455031,84.213661,358.303957,61.94377
Beauty,630.560413,8686.114673,761.758385,4095.554429,886.473974
Children and Baby,162.536774,2557.167966,229.106913,1249.109528,315.519076
Electronics,530.504639,7929.831662,690.22421,3893.713183,881.92369
Entertainment,105.319376,1753.956503,142.726678,814.502804,175.001334
European City Breaks,335.33069,4207.999667,388.284157,2037.08259,406.633878
Fashion,145.893406,2829.203741,192.774842,1284.663944,251.297514
Fitness,125.762091,2149.598873,167.829992,1010.85707,207.398668


## Revenue and Total Transactions Split by Housing

### Without Unknowns

In [36]:
get_spend_stats_by_col(segment_revenue_and_count, "Housing", remove_unknowns=True, sort_order="index", include_segment_count = True)

Unnamed: 0_level_0,revenue,total_purchases,avg_spend,pct_rev,pct_total_purchases,cust_count,pct_cust_count,rev_per_customer
Housing,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Flat,28799436.54,757598,38.01,15.09,15.49,1853,47.598253,15542.059655
House,162017837.22,4133337,39.2,84.91,84.51,2040,52.401747,79420.508441


### Top Spending Categories by Housing

In [37]:
col_groups = spending_category_stats_by_col(segment_revenue_and_count, "Housing")

## Irrespective of House or Flat, highest spending is in Home

## Moreover, for folks living in House, Garden spending is significantly higher

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Unnamed: 0_level_0,revenue,revenue,total_purchases,total_purchases,House/Flat rev
Housing,Flat,House,Flat,House,Unnamed: 5_level_1
PurchaseCategoryA,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
Activities,815933.818179,5562972.683433,32655,225878,6.817921
Adult,111683.560153,428080.215245,6778,25023,3.832974
Beach Holidays,301933.122643,1145266.384893,1832,6731,3.793113
Beauty,3354491.925905,13886819.043523,85508,355766,4.139768
Children and Baby,710672.841214,4485277.275509,38503,239775,6.311311
Electronics,2792153.132204,13248198.818218,77114,371413,4.744797
Entertainment,583484.8432,2829345.133828,38156,177860,4.849046
European City Breaks,1341703.680085,7053438.750211,10060,53974,5.257076
Fashion,809718.535654,4632724.466279,49138,277909,5.721401
Fitness,629262.243652,3605811.245774,13732,70288,5.73022


In [38]:
spending_category_stats_by_col_transpose(segment_revenue_and_count, "Housing")

## Overall, people living in house spend more than those living in a flat

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Unnamed: 0_level_0,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases
PurchaseCategoryA,Activities,Adult,Beach Holidays,Beauty,Children and Baby,Electronics,Entertainment,European City Breaks,Fashion,Fitness,Food and Drink,Garden,Gifts,Healthcare,Holiday Extras,Home,Home Essentials,Ireland,Jewellery,Learning,Legal Services,Leisure,Long Haul & Cruises,Motoring,Other,Personalised Gifts,Pets,Restaurants & Bars,Spas & Country House,Sports & Adventure,Tradesmen,UK City Breaks,UK Seaside,Wedding,Activities,Adult,Beach Holidays,Beauty,Children and Baby,Electronics,Entertainment,European City Breaks,Fashion,Fitness,Food and Drink,Garden,Gifts,Healthcare,Holiday Extras,Home,Home Essentials,Ireland,Jewellery,Learning,Legal Services,Leisure,Long Haul & Cruises,Motoring,Other,Personalised Gifts,Pets,Restaurants & Bars,Spas & Country House,Sports & Adventure,Tradesmen,UK City Breaks,UK Seaside,Wedding
Housing,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2,Unnamed: 24_level_2,Unnamed: 25_level_2,Unnamed: 26_level_2,Unnamed: 27_level_2,Unnamed: 28_level_2,Unnamed: 29_level_2,Unnamed: 30_level_2,Unnamed: 31_level_2,Unnamed: 32_level_2,Unnamed: 33_level_2,Unnamed: 34_level_2,Unnamed: 35_level_2,Unnamed: 36_level_2,Unnamed: 37_level_2,Unnamed: 38_level_2,Unnamed: 39_level_2,Unnamed: 40_level_2,Unnamed: 41_level_2,Unnamed: 42_level_2,Unnamed: 43_level_2,Unnamed: 44_level_2,Unnamed: 45_level_2,Unnamed: 46_level_2,Unnamed: 47_level_2,Unnamed: 48_level_2,Unnamed: 49_level_2,Unnamed: 50_level_2,Unnamed: 51_level_2,Unnamed: 52_level_2,Unnamed: 53_level_2,Unnamed: 54_level_2,Unnamed: 55_level_2,Unnamed: 56_level_2,Unnamed: 57_level_2,Unnamed: 58_level_2,Unnamed: 59_level_2,Unnamed: 60_level_2,Unnamed: 61_level_2,Unnamed: 62_level_2,Unnamed: 63_level_2,Unnamed: 64_level_2,Unnamed: 65_level_2,Unnamed: 66_level_2,Unnamed: 67_level_2,Unnamed: 68_level_2
Flat,815933.818179,111683.560153,301933.122643,3354491.925905,710672.841214,2792153.132204,583484.8432,1341703.680085,809718.535654,629262.243652,347363.380837,2143520.067525,91.758334,314642.827916,16195.853809,7979702.771988,141612.028158,20388.918315,703982.024688,200563.509668,10726.23436,14212.565175,58132.279326,21474.011668,227440.734365,103632.842841,54783.544871,1356603.83329,1606565.006777,35005.564987,23806.286718,1657026.007877,307556.716798,3370.071667,32655,6778,1832,85508,38503,77114,38156,10060,49138,13732,22592,48181,3,14804,3370,116385,8457,168,18902,16682,793,1477,2742,2012,62010,10564,4187,40149,12008,390,1047,14528,2661,10
House,5562972.683433,428080.215245,1145266.384893,13886819.043523,4485277.275509,13248198.818218,2829345.133828,7053438.750211,4632724.466279,3605811.245774,2439852.888851,28112207.879164,170.748334,1565641.175437,87729.767013,35653616.172053,615051.039065,360427.146221,3320116.121466,907147.704209,74118.208186,122246.675876,300959.827974,152516.049263,1261731.201071,692026.179388,368874.172974,5861840.299509,10814458.035146,291134.068497,147141.622791,10498380.99056,1484481.254381,8033.972668,225878,25023,6731,355766,239775,371413,177860,53974,277909,70288,143940,477146,8,78790,22882,542058,36047,3080,80720,75147,5063,11032,12912,14610,350045,69085,26544,188597,76865,2681,6398,92442,12573,55


### Top Spending Categories for different Housing groups in terms of Revenue per customer

In [39]:
spending_category_stats_by_col_rev_per_cust(col_groups, "Housing")



<IPython.core.display.Javascript object>

Housing,Flat,House
Activities,440.331256,2726.947394
Adult,60.271754,209.843243
Beach Holidays,162.942862,561.405091
Beauty,1810.303252,6807.264237
Children and Baby,383.525548,2198.665331
Electronics,1506.828458,6494.215107
Entertainment,314.886586,1386.933889
European City Breaks,724.071063,3457.568015
Fashion,436.977083,2270.943366
Fitness,339.591065,1767.554532


## Relationship-wise Revenue and Total Transactions

### Without Unknowns

In [40]:
get_spend_stats_by_col(segment_revenue_and_count, "Relationship", remove_unknowns=True, sort_order="index", include_segment_count = True)

## Married people not only generate the most revenue per customer, but also total revenue, transactions, avg spen etc.

## Singles are the next in terms of revenue

Unnamed: 0_level_0,revenue,total_purchases,avg_spend,pct_rev,pct_total_purchases,cust_count,pct_cust_count,rev_per_customer
Relationship,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Cohabiting,14333791.2,375620,38.16,8.91,9.14,949,23.022804,15104.100316
Married,75669961.48,1886741,40.11,47.06,45.9,1107,26.855895,68355.882096
Separated,27652722.53,732646,37.74,17.2,17.82,1021,24.769529,27083.959383
Single,43144698.69,1115953,38.66,26.83,27.15,1045,25.351771,41286.793005


### Top Spending Categories by Relationship

In [41]:
col_groups = spending_category_stats_by_col(segment_revenue_and_count, "Relationship")

## Irrespective of relationship, people spend the most in Home.

## Married people are typically interested in Garden and Spas & Country House than others 

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Unnamed: 0_level_0,revenue,revenue,revenue,revenue,total_purchases,total_purchases,total_purchases,total_purchases
Relationship,Cohabiting,Married,Separated,Single,Cohabiting,Married,Separated,Single
PurchaseCategoryA,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
Activities,514389.791257,2691029.175535,847103.166946,1368245.978023,21577,107775,34508,55402
Adult,43546.507662,176607.723615,88474.348987,142381.543647,2595,10194,5238,8552
Beach Holidays,103272.214987,491163.25996,200682.727646,443095.191625,584,2855,1279,2566
Beauty,1324167.678766,6290250.42812,2398313.617535,4643758.471149,33740,156420,64466,117256
Children and Baby,446331.533894,1933296.269508,772348.226087,1166413.289188,23716,104724,41521,61500
Electronics,1238280.961761,5680727.2184,2462407.561595,4046097.661621,34652,160446,69491,111164
Entertainment,247615.077175,1350758.376855,474256.881358,825395.585448,16345,82469,30770,53608
European City Breaks,611597.392906,3347466.987791,1145818.704135,2019856.279986,4656,25555,8824,15238
Fashion,411770.956341,2111505.410357,850531.278008,1171911.017651,24870,126354,51384,70546
Fitness,315389.43164,1651639.771525,620238.35784,941422.883465,6612,30923,12796,19783


In [42]:
spending_category_stats_by_col_transpose(segment_revenue_and_count, "Relationship")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Unnamed: 0_level_0,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,revenue,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases,total_purchases
PurchaseCategoryA,Activities,Adult,Beach Holidays,Beauty,Children and Baby,Electronics,Entertainment,European City Breaks,Fashion,Fitness,Food and Drink,Garden,Gifts,Healthcare,Holiday Extras,Home,Home Essentials,Ireland,Jewellery,Learning,Legal Services,Leisure,Long Haul & Cruises,Motoring,Other,Personalised Gifts,Pets,Restaurants & Bars,Spas & Country House,Sports & Adventure,Tradesmen,UK City Breaks,UK Seaside,Wedding,Activities,Adult,Beach Holidays,Beauty,Children and Baby,Electronics,Entertainment,European City Breaks,Fashion,Fitness,Food and Drink,Garden,Gifts,Healthcare,Holiday Extras,Home,Home Essentials,Ireland,Jewellery,Learning,Legal Services,Leisure,Long Haul & Cruises,Motoring,Other,Personalised Gifts,Pets,Restaurants & Bars,Spas & Country House,Sports & Adventure,Tradesmen,UK City Breaks,UK Seaside,Wedding
Relationship,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2,Unnamed: 24_level_2,Unnamed: 25_level_2,Unnamed: 26_level_2,Unnamed: 27_level_2,Unnamed: 28_level_2,Unnamed: 29_level_2,Unnamed: 30_level_2,Unnamed: 31_level_2,Unnamed: 32_level_2,Unnamed: 33_level_2,Unnamed: 34_level_2,Unnamed: 35_level_2,Unnamed: 36_level_2,Unnamed: 37_level_2,Unnamed: 38_level_2,Unnamed: 39_level_2,Unnamed: 40_level_2,Unnamed: 41_level_2,Unnamed: 42_level_2,Unnamed: 43_level_2,Unnamed: 44_level_2,Unnamed: 45_level_2,Unnamed: 46_level_2,Unnamed: 47_level_2,Unnamed: 48_level_2,Unnamed: 49_level_2,Unnamed: 50_level_2,Unnamed: 51_level_2,Unnamed: 52_level_2,Unnamed: 53_level_2,Unnamed: 54_level_2,Unnamed: 55_level_2,Unnamed: 56_level_2,Unnamed: 57_level_2,Unnamed: 58_level_2,Unnamed: 59_level_2,Unnamed: 60_level_2,Unnamed: 61_level_2,Unnamed: 62_level_2,Unnamed: 63_level_2,Unnamed: 64_level_2,Unnamed: 65_level_2,Unnamed: 66_level_2,Unnamed: 67_level_2,Unnamed: 68_level_2
Cohabiting,514389.791257,43546.507662,103272.214987,1324167.678766,446331.533894,1238280.961761,247615.077175,611597.392906,411770.956341,315389.43164,208907.244147,2143544.565217,85.816667,134941.664708,6935.042303,3471259.788521,55521.260165,1684.566665,324135.544341,88692.100464,5988.574347,9822.236171,22819.781666,12085.599832,114745.001625,62883.897833,33642.951309,495347.31747,881574.683048,22965.119988,13441.05003,835807.881237,130566.789249,31.176667,21577,2595,584,33740,23716,34652,16345,4656,24870,6612,12909,37741,2,6861,2022,51670,3343,19,8040,7374,420,973,1153,1120,31787,6296,2523,16056,6450,222,581,7582,1126,3
Married,2691029.175535,176607.723615,491163.25996,6290250.42812,1933296.269508,5680727.2184,1350758.376855,3347466.987791,2111505.410357,1651639.771525,1264436.702635,14189376.239232,53.481667,744108.839939,48198.616523,15310938.99054,276154.099754,12027.821662,1372427.377591,386131.65452,38990.905434,62015.755852,132075.735992,75085.853936,576696.499488,317383.508627,169763.286653,2956935.084871,5665802.203768,152096.666582,67134.258793,5358844.802408,763951.394884,4887.08,107775,10194,2855,156420,104724,160446,82469,25555,126354,30923,70256,229352,2,36727,11277,234680,15919,115,32422,31841,2591,5460,5999,7313,159966,31643,11943,94700,39517,1354,2935,46743,6255,16
Separated,847103.166946,88474.348987,200682.727646,2398313.617535,772348.226087,2462407.561595,474256.881358,1145818.704135,850531.278008,620238.35784,398010.949893,4129892.196585,46.981667,263195.049544,14506.420376,6968515.376155,120273.594993,3210.946665,610761.215013,168385.264939,12485.755029,19704.413006,61761.838662,26611.75,228550.82547,116314.093773,65844.062774,971413.489794,1655659.543119,38431.309975,25122.706054,1628192.258555,262886.723146,2770.895,34508,5238,1279,64466,41521,69491,30770,8824,51384,12796,24677,79281,3,13903,3678,105126,7125,30,16008,14027,895,1740,2278,2611,62868,11664,4933,31079,12076,396,1110,14546,2304,11
Single,1368245.978023,142381.543647,443095.191625,4643758.471149,1166413.289188,4046097.661621,825395.585448,2019856.279986,1171911.017651,941422.883465,499707.203044,4839520.289305,65.236667,440889.798644,18231.311734,10989811.819949,186851.720154,7489.449996,1066320.515691,288928.729321,15020.896701,23284.05068,93691.914319,32278.875498,329008.568695,172588.676329,85425.87698,1760585.23083,2484795.267324,59480.474974,40036.921257,2561728.495724,376822.429073,3557.038334,55402,8552,2566,117256,61500,111164,53608,15238,70546,19783,32616,93173,3,21349,5219,160729,11193,61,26858,23993,1095,2378,3848,2969,91368,17252,6279,53529,18339,628,1734,22361,3341,23


### Top Spending Categories for different Relationship groups in terms of Revenue per customer

In [43]:
spending_category_stats_by_col_rev_per_cust(col_groups, "Relationship")



<IPython.core.display.Javascript object>

Relationship,Cohabiting,Married,Separated,Single
Activities,542.0335,2430.920664,829.679889,1309.326295
Adult,45.886731,159.537239,86.654602,136.250281
Beach Holidays,108.822144,443.688582,196.555071,424.014537
Beauty,1395.329482,5682.249709,2348.984934,4443.788011
Children and Baby,470.317739,1746.428428,756.462513,1116.184966
Electronics,1304.827146,5131.64157,2411.760589,3871.863791
Entertainment,260.922105,1220.197269,464.502332,789.852235
European City Breaks,644.465114,3023.908751,1122.251424,1932.876823
Fashion,433.899849,1907.412295,833.037491,1121.44595
Fitness,332.338706,1491.99618,607.481252,900.883142


# Personalisation

## Workflow

In [51]:
## Overall, the above EDA, for each customer category (Age, Income, Children, Housing, Gender and Relationship)
## indicates that for each segment, there is some product category where people spend the most
## and generate the most revenue per customer.

## As there is no time series element in the given data, we cannot determine periodic patterns in data. But given
## that we have categorical information which creates a segment, we can determine the top purchases and revenue
## generating products for each individual groups.

## revenue per customer is a good metric to select as it aligns pretty well with most businesses.

## For personalisation, the following recommendation workflow may work:

## Define a function which will be parameterized with the 6 categories of a segment.

## Firstly, we shall determine the products that are most popular for that whole group collectively. 
## This is a direct recommendation from the spend history so 
## will consider total_purchases as the metric to detemine the popular category and return top {k} products. 

## Secondly, as recommending new products is aimed to increase revenue, we will consider each of the 6 categories 
## individually and determine the top {k} products with the highest revenue per customer. These are the products that
## will be recommended to the customers as new suggestions.
## Here, we shall only recommend those products which aren't the top purchases already of that group

In [45]:
segment_revenue_and_count["Age"] = segment_revenue_and_count["Age"].astype(str)
segment_revenue_and_count["Gender"] = segment_revenue_and_count["Gender"].astype(str)
segment_revenue_and_count["Income"] = segment_revenue_and_count["Income"].astype(str)
segment_revenue_and_count["Children"] = segment_revenue_and_count["Children"].astype(str)
segment_revenue_and_count["Housing"] = segment_revenue_and_count["Housing"].astype(str)
segment_revenue_and_count["Relationship"] = segment_revenue_and_count["Relationship"].astype(str)
segment_revenue_and_count["PurchaseCategoryA"] = segment_revenue_and_count["PurchaseCategoryA"].astype(str)
segment_revenue_and_count["PurchaseCategoryB"] = segment_revenue_and_count["PurchaseCategoryB"].astype(str)
segment_revenue_and_count["PurchaseCategory"] = segment_revenue_and_count['PurchaseCategoryA'].str.cat(segment_revenue_and_count['PurchaseCategoryB'], sep='|').values

segment_revenue_and_count.head()

Unnamed: 0,Age,Gender,Income,Children,Housing,Relationship,PurchaseCategoryA,PurchaseCategoryB,revenue,total_purchases,avg_spend,PurchaseCategory
0,35-44,Male,Middle,Youth,Flat,Married,Home,Beds & Mattresses,733.45,6,122.241667,Home|Beds & Mattresses
1,Unknown,Female,Low,Youth,House,Unknown,Home,Beds & Mattresses,10745.972,67,160.387642,Home|Beds & Mattresses
2,45-54,Unknown,VeryHigh,,House,Separated,Home,Beds & Mattresses,2204.83,16,137.801875,Home|Beds & Mattresses
3,25-34,Unknown,Middle,Teenager,House,Married,Home,Beds & Mattresses,25627.18,145,176.739172,Home|Beds & Mattresses
4,35-44,Unknown,Middle,,Flat,Unknown,Home,Beds & Mattresses,3405.24,21,162.154286,Home|Beds & Mattresses


## Algorithm

In [46]:
def get_most_popular_product_for_full_category(spend_df, age, gender, income, children, housing, relationship, count):
    
    """
    This function accepts the whole segment and returns the most frequently purchased items
    
    Parameters:
    
        1. spend_df: spend data
        2. (age, gender, ... , relationship): segment details
        3. count: number of top purchases to return
    
    """
    
    ## select the segment
    spend_df = spend_df[spend_df.Age == age]
    spend_df = spend_df[spend_df.Gender == gender]
    spend_df = spend_df[spend_df.Income == income]
    spend_df = spend_df[spend_df.Children == children]
    spend_df = spend_df[spend_df.Housing == housing]
    spend_df = spend_df[spend_df.Relationship == relationship]
    
    ## find the most purchased product
    spend_df = spend_df.nlargest(count, "total_purchases", keep = "all")
    spend_df = spend_df.sort_values(["total_purchases", "avg_spend"], ascending = False).head(count)
    
    top_products = list(spend_df.PurchaseCategory.values)
        
    return top_products

In [47]:
def get_most_popular_product_for_all_cols(spend_df, age, gender, income, children, housing, relationship, count, selected_products):
    
    """
    This function, in contrast to the above function, filter data on just one field at a time and finds
    the most purchased product in the whole selected segment.
    
    For example, if the arguments specified were: "18-24", "Female", "VeryHigh", "None", "House", "Separated",
    first select the whole "18-24" segment, find the most popular product
    the select the Femal segment, find the most popular product and so on..
    
    Parameters:
    
        1. spend_df: spend data
        2. (age, gender, ... , relationship): segment details
        3. count: number of top purchases to return
        4. selected_products: products recommended in the previous step(s)
    
    """
    
    params = (age, gender, income, children, housing, relationship)

    ## remove selected_products to show new products
    spend_df = spend_df[~spend_df.PurchaseCategory.isin(selected_products)]
    top_products_filtered = pd.DataFrame()
    
    
    ## iterate over each field and find the most purchased items
    for idx, col_name in enumerate(['Age', 'Gender', 'Income', 'Children', 'Housing', 'Relationship']):
    
        df_filtered = spend_df[spend_df[col_name] == params[idx]]

        df_filtered = df_filtered.nlargest(count, "total_purchases", keep = "all")
        df_filtered = df_filtered.sort_values(["total_purchases", "avg_spend"], ascending = False).head(count)
        
        ## gather all products from each field
        top_products_filtered = pd.concat((top_products_filtered, df_filtered))
    
    ## find the most purchased products from the gathered products
    top_products_filtered = top_products_filtered.sort_values(["total_purchases", "avg_spend"], ascending = False)
    top_products_filtered = top_products_filtered.drop_duplicates(subset = ["PurchaseCategory"])
    top_products_filtered = top_products_filtered.head(count)
    
    top_products = list(top_products_filtered.PurchaseCategory.values)
    
    return top_products

In [48]:
def get_top_rev_per_customer_product_for_col(spend_df, age, gender, income, children, housing, relationship, count, selected_products):
    """
    This function finds the products that generate the most revenue per customer.
    The idea is similar to the above function get_most_popular_product_for_all_cols.
    
    For example, if the arguments specified were: "18-24", "Female", "VeryHigh", "None", "House", "Separated",
    first select the whole "18-24" segment, find the products which generate most revenue per customer
    the select the Femal segment, find the products which generate most revenue per customer and so on..
    
    Parameters:
    
        1. spend_df: spend data
        2. (age, gender, ... , relationship): segment details
        3. count: number of top purchases to return
        4. selected_products: products recommended in the previous step(s)
    
    """
    
    spend_df = spend_df[~spend_df.PurchaseCategory.isin(selected_products)]
    top_products_filtered = pd.DataFrame()
    
    params = (age, gender, income, children, housing, relationship)
    
    new_products = []
    
    for idx, col_name in enumerate(['Age', 'Gender', 'Income', 'Children', 'Housing', 'Relationship']):
        
        df_filtered = spend_df[spend_df[col_name] == params[idx]]
        df_filtered = df_filtered.groupby([col_name, "PurchaseCategory"]).revenue.sum().reset_index()
        df_filtered = df_filtered[df_filtered.revenue > 0]
        
        df_filtered["revenue_per_customer"] = df_filtered.revenue/segment_count[segment_count[col_name] == params[idx]].shape[0]
        df_filtered = df_filtered.nlargest(count, "revenue_per_customer", keep = "all")
        df_filtered = df_filtered.sort_values(["revenue_per_customer", "revenue"], ascending = False).head(count)
        
        top_products_filtered = pd.concat((top_products_filtered, df_filtered))
        
    top_products_filtered = top_products_filtered.sort_values(["revenue_per_customer", "revenue"], ascending = False)
    top_products_filtered = top_products_filtered.drop_duplicates(subset = ["PurchaseCategory"])
    
    top_products_filtered = top_products_filtered.head(count)
    
    top_products = list(top_products_filtered.PurchaseCategory.values)
    
    return top_products

In [49]:
def recommend_product_for_category(spend_df, age, gender, income, children, housing, relationship, count):
    
    params = (age, gender, income, children, housing, relationship)
    
    top_products = []
    
    ## get most purchased product for full segment
    top_products += get_most_popular_product_for_full_category(spend_df, age, gender, income, children, housing, relationship, count)
    
    ## get most purchased product by considering individual fields in segment
    top_products += get_most_popular_product_for_all_cols(spend_df, age, gender, income, children, housing, relationship, count, top_products)
    
    ## get products with most revenue per customer by considering individual fields in segment
    top_products += get_top_rev_per_customer_product_for_col(spend_df, age, gender, income, children, housing, relationship, count, top_products)
    
    return list(top_products)

In [50]:
recommend_product_for_category(segment_revenue_and_count, "18-24", "Female", "VeryHigh", "None", "House", "Separated", 4)

['Fitness|Sport Equipment',
 'Other|Wowcher Gift Card',
 'Beauty|Spa',
 'Fashion|Cosy-wear & Nightwear',
 'Children and Baby|Toys',
 'Garden|Plants & Flowers',
 'Home|Bedding',
 'Home|Beds & Mattresses',
 'Garden|Garden Furniture',
 'Spas & Country House|Country House Hotels',
 'Garden|Garden Leisure',
 'European City Breaks|Other Cities']

## Q) Please also describe any additional data points you would like to utilise with a personalisation algorithm

1) Purchases are frequently driven by cyclical patterns. However, in this data, we don't have any temporal information.
This makes it hard toidentify patterns in customer behavior and preferences over time and enable the algorithm to make more accurate predictions.

2) Customer feedback is also missing, which is crucial. Feedback from customers such as ratings can be used to generate recommendations for other customers who have similar personal information like Age, Gender etc.

3) Demographical data, if available, can also be utilised in this problem. The idea is that customers in a particular region may have some specific product preferences, which can be recommended to other customers.
