## Dependencies
Libraries needed in this notebook for data visualization and EDA

In [1]:
import sys
import pandas as pd
import numpy as np

import missingno as msno

import plotly.express as px

sys.path.append('..')
from starbucks_campaigns_analytics import read_complete_starbucks_data
from starbucks_campaigns_analytics.plotting import funnel_plot

## Gather data

In [2]:
# Read Starbucks data using the helper function 
# defined in starbucks_campaigns_analytics module
df = read_complete_starbucks_data()
df.head()

Unnamed: 0,person,event,time,offer_id,amount,reward_expected,gender,age,became_member_on,income,profile_group,reward,difficulty,duration,offer_type,web,email,mobile,social
0,78afa995795e4d85b5d9ceeca43f5fef,offer received,0,9b98b8c7a33c4b65b9aebfe6a799e6d9,,,F,75,20170509,100000.0,0,5.0,5.0,168.0,bogo,1.0,1.0,1.0,0.0
1,78afa995795e4d85b5d9ceeca43f5fef,offer viewed,6,9b98b8c7a33c4b65b9aebfe6a799e6d9,,,F,75,20170509,100000.0,0,5.0,5.0,168.0,bogo,1.0,1.0,1.0,0.0
2,78afa995795e4d85b5d9ceeca43f5fef,transaction,132,,19.89,,F,75,20170509,100000.0,0,,,,,,,,
3,78afa995795e4d85b5d9ceeca43f5fef,offer completed,132,9b98b8c7a33c4b65b9aebfe6a799e6d9,,5.0,F,75,20170509,100000.0,0,5.0,5.0,168.0,bogo,1.0,1.0,1.0,0.0
4,78afa995795e4d85b5d9ceeca43f5fef,transaction,144,,17.78,,F,75,20170509,100000.0,0,,,,,,,,


In [3]:
# New columns for offer viewed (by type)
df['bogo_offer_viewed'] = ((df['event']=='offer viewed') 
                           & (df['offer_type']=='bogo'))

df['disc_offer_viewed'] = ((df['event']=='offer viewed') 
                           & (df['offer_type']=='discount'))

df['info_offer_viewed'] = ((df['event']=='offer viewed') 
                           & (df['offer_type']=='informational'))

df['#bogo_offer_viewed'] = df.groupby('person')['bogo_offer_viewed'].transform('sum')
df['#disc_offer_viewed'] = df.groupby('person')['disc_offer_viewed'].transform('sum')
df['#info_offer_viewed'] = df.groupby('person')['info_offer_viewed'].transform('sum')

In [4]:
# New columns for offer completed (by type)
df['bogo_offer_completed'] = ((df['event']=='offer completed') 
                           & (df['offer_type']=='bogo'))
df['disc_offer_completed'] = ((df['event']=='offer completed') 
                           & (df['offer_type']=='discount'))
# df['info_offer_completed'] = ((df['event']=='offer completed') 
#                            & (df['offer_type']=='informational'))

df['#bogo_offer_completed'] = df.groupby('person')['bogo_offer_completed'].transform('sum')
df['#disc_offer_completed'] = df.groupby('person')['disc_offer_completed'].transform('sum')
# df['#info_offer_completed'] = df.groupby('person')['info_offer_completed'].transform('sum')

In [7]:
# New columns for ratios (completed/viewed)
df['bogo_offer_ratio'] = df['#bogo_offer_completed']/df['#bogo_offer_viewed']
df['disc_offer_ratio'] = df['#disc_offer_completed']/df['#disc_offer_viewed']

In [9]:
df.sample(12)

Unnamed: 0,person,event,time,offer_id,amount,reward_expected,gender,age,became_member_on,income,...,info_offer_viewed,#bogo_offer_viewed,#disc_offer_viewed,#info_offer_viewed,bogo_offer_completed,disc_offer_completed,#bogo_offer_completed,#disc_offer_completed,bogo_offer_ratio,disc_offer_ratio
19577,2449f2f011d24c849f72b15303f89f4b,offer received,408,ae264e3637204a6fb9bb56bc8210ddfd,,,F,37,20160131,51000.0,...,False,3.0,0.0,0.0,False,False,2.0,0.0,0.666667,
19753,79a1b2e7bab8420c98a59530a783155d,transaction,522,,7.76,,F,46,20130927,42000.0,...,False,3.0,0.0,0.0,False,False,1.0,1.0,0.333333,inf
100695,ec539cd856754923995755f0dd1af470,offer completed,582,2906b810c7d4411798c6938adc9daaa5,,2.0,F,78,20161107,44000.0,...,False,2.0,0.0,0.0,False,True,2.0,1.0,1.0,inf
19241,fe0d05246e9040578a27cb39e23a1bdd,offer completed,240,2906b810c7d4411798c6938adc9daaa5,,2.0,M,91,20150208,78000.0,...,False,0.0,3.0,3.0,False,True,0.0,3.0,,1.0
193117,c0bdfd5f10d84c1e981c48c48373e983,offer completed,612,2298d6c36e964ae4a3e7e9706d1fb8c2,,3.0,M,84,20150808,43000.0,...,False,1.0,2.0,1.0,False,True,0.0,2.0,0.0,1.0
204315,ee8155c31f314673b8043c3881976c4b,offer completed,414,f19421c1d4aa40978ebb69ca19b0e20d,,5.0,F,57,20170824,95000.0,...,False,1.0,1.0,0.0,True,False,2.0,3.0,2.0,3.0
227757,81f948e3a4134729ba5c0e15d221ce5d,transaction,180,,23.01,,F,58,20170731,61000.0,...,False,0.0,2.0,1.0,False,False,0.0,2.0,,1.0
162154,d3956fdde71b4aa08a7d081f282a914f,offer received,0,fafdcd668e3743c1bb461111dcafc2a4,,,M,41,20180517,85000.0,...,False,0.0,1.0,2.0,False,False,1.0,1.0,inf,1.0
113809,f64f9645524f493d83956f3211bb4c2d,offer received,168,0b1e1539f2cc45b7b9fa7c272da2e1d7,,,M,33,20171103,58000.0,...,False,3.0,1.0,0.0,False,False,0.0,0.0,0.0,0.0
187716,7d14c8ecf4ed4f0e8b817a8614bfaaa4,offer received,0,9b98b8c7a33c4b65b9aebfe6a799e6d9,,,F,77,20170106,37000.0,...,False,0.0,2.0,1.0,False,False,0.0,2.0,,1.0


In [10]:
df.loc[(df['person']=='ee8155c31f314673b8043c3881976c4b') & (df['event']=='offer completed')]

Unnamed: 0,person,event,time,offer_id,amount,reward_expected,gender,age,became_member_on,income,...,info_offer_viewed,#bogo_offer_viewed,#disc_offer_viewed,#info_offer_viewed,bogo_offer_completed,disc_offer_completed,#bogo_offer_completed,#disc_offer_completed,bogo_offer_ratio,disc_offer_ratio
204306,ee8155c31f314673b8043c3881976c4b,offer completed,60,0b1e1539f2cc45b7b9fa7c272da2e1d7,,5.0,F,57,20170824,95000.0,...,False,1.0,1.0,0.0,False,True,2.0,3.0,2.0,3.0
204309,ee8155c31f314673b8043c3881976c4b,offer completed,210,0b1e1539f2cc45b7b9fa7c272da2e1d7,,5.0,F,57,20170824,95000.0,...,False,1.0,1.0,0.0,False,True,2.0,3.0,2.0,3.0
204315,ee8155c31f314673b8043c3881976c4b,offer completed,414,f19421c1d4aa40978ebb69ca19b0e20d,,5.0,F,57,20170824,95000.0,...,False,1.0,1.0,0.0,True,False,2.0,3.0,2.0,3.0
204316,ee8155c31f314673b8043c3881976c4b,offer completed,414,fafdcd668e3743c1bb461111dcafc2a4,,2.0,F,57,20170824,95000.0,...,False,1.0,1.0,0.0,False,True,2.0,3.0,2.0,3.0
204325,ee8155c31f314673b8043c3881976c4b,offer completed,594,ae264e3637204a6fb9bb56bc8210ddfd,,10.0,F,57,20170824,95000.0,...,False,1.0,1.0,0.0,True,False,2.0,3.0,2.0,3.0


In [None]:
df.sample(15)

In [None]:
# dataframe only with person who view at least one bogo offer
person_bogo = df.loc[((df['event']=='offer viewed') 
                      & (df['offer_type']=='bogo')),'person']
df_bogo = df[df['person'].isin(person_bogo)].copy()

In [None]:
df_bogo.shape

In [None]:
df_bogo['bogo_offer_viewed'] = ((df_bogo['event']=='offer viewed') 
                                & (df_bogo['offer_type']=='bogo'))
df_bogo['bogo_offer_viewed'] = ((df_bogo['event']=='offer viewed') 
                                & (df_bogo['offer_type']=='bogo'))

In [None]:
df_bogo.sample(15)