<a href="https://colab.research.google.com/github/martin-fabbri/colab-notebooks/blob/master/product-analytics/customer_segmentation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# A/B Testing

## Cohort Analysis

### Time cohorts

In [0]:
#@title ## Setup
#@markdown * Import dependencies
#@markdown * App purchases
#@markdown ---
#@markdown **Data Set Information:**
#@markdown Syntetic app purchases dataset

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors

from datetime import datetime
from datetime import timedelta

!wget https://raw.githubusercontent.com/martin-fabbri/colab-notebooks/master/data/ab_testing_customer_data.csv -q
!wget https://raw.githubusercontent.com/martin-fabbri/colab-notebooks/master/data/ab_testing_app_purchase.csv -q

In [0]:
customer_data = pd.read_csv('ab_testing_customer_data.csv')
app_purchases = pd.read_csv('ab_testing_app_purchase.csv')

In [5]:
customer_data.head(3)

Unnamed: 0,uid,country,gender,spent,purchases,date,group,device
0,11115722,MEX,F,1595,5,2016-03-08,GRP B,I
1,11122053,USA,M,498,2,2017-07-14,GRP B,I
2,11128688,USA,F,2394,6,2017-09-17,GRP A,I


In [6]:
app_purchases.head(3)

Unnamed: 0,date,uid,sku,price
0,7/10/2017,11115722,sku_three_499,499
1,7/15/2017,11122053,sku_three_499,499
2,11/12/2017,11128688,sku_four_599,599


In [7]:
uid_combined_data = app_purchases.merge(customer_data, on=['uid'], how='inner')
uid_combined_data.describe()

Unnamed: 0,uid,price,spent,purchases
count,9006.0,9006.0,9006.0,9006.0
mean,19854040.0,406.772596,1897.186986,4.5674
std,5025477.0,187.91823,1367.149729,3.121144
min,11115720.0,99.0,99.0,1.0
25%,15561130.0,299.0,897.0,2.0
50%,19814170.0,299.0,1596.0,4.0
75%,24230750.0,499.0,2594.0,6.0
max,28525330.0,899.0,11176.0,24.0


In [8]:
uid_combined_data.head()

Unnamed: 0,date_x,uid,sku,price,country,gender,spent,purchases,date_y,group,device
0,7/10/2017,11115722,sku_three_499,499,MEX,F,1595,5,2016-03-08,GRP B,I
1,7/15/2017,11122053,sku_three_499,499,USA,M,498,2,2017-07-14,GRP B,I
2,11/12/2017,11128688,sku_four_599,599,USA,F,2394,6,2017-09-17,GRP A,I
3,9/26/2017,11130578,sku_two_299,299,USA,F,1197,3,2017-11-30,GRP A,I
4,12/1/2017,11130759,sku_four_599,599,ESP,M,1297,3,2018-01-10,GRP B,A


In [9]:
app_purchases.price.agg('mean')

406.77259604707973

In [10]:
app_purchases.price.mean()

406.77259604707973

In [11]:
app_purchases.price.agg(['mean', 'median'])

mean      406.772596
median    299.000000
Name: price, dtype: float64

In [12]:
purchase_summary = uid_combined_data.agg({
    'price': ['mean', 'median'],
    'purchases': ['mean', 'median'],
})
purchase_summary

Unnamed: 0,price,purchases
mean,406.772596,4.5674
median,299.0,4.0


In [13]:
grouped_purchase_data = uid_combined_data.groupby(['device', 'gender'])
purchase_summary = grouped_purchase_data.agg({'price' : ['mean', 'median', 'std']})
purchase_summary

Unnamed: 0_level_0,Unnamed: 1_level_0,price,price,price
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,median,std
device,gender,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
A,F,413.396355,499,191.364475
A,M,402.425846,299,184.64122
I,F,406.091562,299,188.163379
I,M,405.455863,299,187.569669


In [24]:
cutoff_date = datetime(2018, 3, 17)
max_purchase_date = cutoff_date - timedelta(days=28)
max_purchase_date

datetime.datetime(2018, 2, 17, 0, 0)

Filter to only include users who registered before our max date

In [23]:
purchase_data_filt = purchase_data[ purchase < ]

datetime.datetime(2018, 3, 17, 0, 0)