# Calculate Wallet Share 
- this will output a dataframe with wallet share by category for each customer


In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta

In [2]:
df = pd.read_csv('./DataSources/df_rfm.csv')
df.columns

Index(['transaction_id', 'uid', 'order_value', 'created_at', 'merchant_name',
       'category', 'recency', 'frequency', 'monetary'],
      dtype='object')

In [3]:
# Step 1: Calculate Total Spend by Customer on All Categories
total_spend_by_customer = df.groupby('uid')['order_value'].sum().reset_index()

In [4]:
# Step 2: Calculate Total Spend by All Customers on All Categories
total_spend_all_customers = df['order_value'].sum()

In [5]:
# Step 3: Calculate Wallet Share by Merchant for each customer
# Note: Since the "merchant" is not specified for a specific calculation, we'll calculate a general wallet share for now.
total_spend_by_customer['wallet_share'] = (total_spend_by_customer['order_value'] / total_spend_all_customers) * 100

In [6]:
total_spend_by_customer.head()

Unnamed: 0,uid,order_value,wallet_share
0,1897bfcf9c2bcab8462930b3d0d952f35cb6,317.0,0.019147
1,56cc9e626b422ecbe87247f6f0dffc289bce,94.08,0.005682
2,VOXLAT0006010732020002203,63.92,0.003861
3,VOXLAT0006010732020002641,290.0,0.017516
4,VOXLAT0006010732020003193,92.55,0.00559


In [7]:
total_spend_by_customer
# TODO Remove the unique ness in this and add recency for all customers

Unnamed: 0,uid,order_value,wallet_share
0,1897bfcf9c2bcab8462930b3d0d952f35cb6,317.00,0.019147
1,56cc9e626b422ecbe87247f6f0dffc289bce,94.08,0.005682
2,VOXLAT0006010732020002203,63.92,0.003861
3,VOXLAT0006010732020002641,290.00,0.017516
4,VOXLAT0006010732020003193,92.55,0.005590
...,...,...,...
4463,VOXLAT0006010732026626773,138.51,0.008366
4464,VOXLAT0006010732026628894,24.99,0.001509
4465,VOXLAT0006010732026629546,127.42,0.007696
4466,VOXLAT0006010732026630502,926.48,0.055960


Wallet Share calculation by Category

In [9]:
total_spend_by_category = df.groupby('category')['order_value'].sum().reset_index()
spend_by_customer_in_category = df.groupby(['uid', 'category'])['order_value'].sum().reset_index()

# Step 3: Merging the data for wallet share calculation
spend_by_customer_in_category = pd.merge(spend_by_customer_in_category, total_spend_by_category, on='category', suffixes=('_customer', '_total'))

# Step 4: Calculate Wallet Share by Category for Each Customer
spend_by_customer_in_category['wallet_share_by_category'] = (spend_by_customer_in_category['order_value_customer'] / spend_by_customer_in_category['order_value_total']) * 100

# Trying to display the results again
spend_by_customer_in_category.head()

Unnamed: 0,uid,category,order_value_customer,order_value_total,wallet_share_by_category
0,1897bfcf9c2bcab8462930b3d0d952f35cb6,Miscellaneous,317.0,317.0,100.0
1,56cc9e626b422ecbe87247f6f0dffc289bce,Marketplace,94.08,94.08,100.0
2,VOXLAT0006010732020002203,Retailers,63.92,1164391.0,0.00549
3,VOXLAT0006010732020002641,Fashion & Retail,290.0,261796.18,0.110773
4,VOXLAT0006010732020003193,Retailers,92.55,1164391.0,0.007948


## Validating dataframe

In [10]:
df[df.category == 'Miscellaneous']


Unnamed: 0,transaction_id,uid,order_value,created_at,merchant_name,category,recency,frequency,monetary
828,303357338091875,1897bfcf9c2bcab8462930b3d0d952f35cb6,317.0,2023-12-23 09:23:32,Deliveroo Hong Kong,Miscellaneous,66,1,317.0


In [11]:

df[df.category == 'Marketplace']

Unnamed: 0,transaction_id,uid,order_value,created_at,merchant_name,category,recency,frequency,monetary
9598,383082084983572,56cc9e626b422ecbe87247f6f0dffc289bce,59.09,2023-03-23 02:21:41,Amazon Singapore INTERNAL,Marketplace,341,2,94.08
9779,583077310840333,56cc9e626b422ecbe87247f6f0dffc289bce,34.99,2023-03-18 08:38:08,Amazon Singapore INTERNAL,Marketplace,346,2,94.08


In [12]:
spend_by_customer_in_category[spend_by_customer_in_category.uid == "VOXLAT0006010732026109416" ]

Unnamed: 0,uid,category,order_value_customer,order_value_total,wallet_share_by_category
3779,VOXLAT0006010732026109416,Fashion & Retail,71.99,261796.18,0.027498
3780,VOXLAT0006010732026109416,Health and Wellness,80.0,4946.46,1.617318
3781,VOXLAT0006010732026109416,Retailers,185.87,1164391.0,0.015963


In [13]:
spend_by_customer_in_category[spend_by_customer_in_category.category == "Health and Wellness"].order_value_customer.sum()

4946.46

In [14]:
(80.00 /4946.46) * 100

1.6173182437541191

creating a dataframe for wallet share by category for each transaction

# wallet share by category

In [None]:
# Calculate Total Spend by Category
total_spend_by_category = df.groupby('category')['order_value'].sum().reset_index()

# Join this total spend by category back to the original transactions dataframe
df_with_total_spend = pd.merge(df, total_spend_by_category, on='category', suffixes=('', '_total'))

# Calculate Wallet Share directly in the dataframe
df_with_total_spend['wallet_share_by_category'] = (df_with_total_spend['order_value'] / df_with_total_spend['order_value_total']) * 100

# Now, df_with_total_spend contains each transaction, with an additional column for the wallet share by category


In [39]:
# Store the results
df_with_total_spend.to_csv('./DataSources/with_wallet_share.csv', index=False)