# Problem Statement

The task is to combine transaction, demographic, and offer data to determine which demographic groups respond best to which offer type. Here, we are just concerned with the bogo offer, and we will assess the effectiveness of this offer and identify the customers who respond best to it, enabling us to target them effectively and reduce customer churn rate.

# Import libraries

In [1]:
# Import libraries
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from datetime import datetime
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans

# Import data

In [2]:
starbucks_bogo_data = pd.read_csv('/content/df_trans_cust_bogo_succ_rate.csv')
starbucks_bogo_data.head()

Unnamed: 0,customer_id,gender,age,offer_received,offer_viewed,offer_completed,offer_view_rate,offer_comp_rate,total_trans,amount_trans,rewards_earned,time_lapsed_succ,income,membership_days,income_range,membership_range,age_range
0,0009655768c64bdeb2e877511632db8f,M,33.0,1,1,1,100.0,100.0,8.0,127.6,9.0,0.0,72000.0,2205.0,"(70000, 90000]","(2200, 2700]","(30, 35]"
1,00116118485d4dfda04fdbaba9a87b5c,,,2,2,0,100.0,0.0,,,,,,,,,
2,0011e0d4e6b944f998e987f904e8c1e5,O,40.0,1,1,1,100.0,100.0,5.0,79.46,13.0,270.0,57000.0,1942.0,"(50000, 70000]","(1700, 2200]","(35, 40]"
3,0020c2b971eb4e9188eac86d93036a77,F,59.0,2,1,1,50.0,50.0,8.0,196.86,14.0,84.0,90000.0,2618.0,"(70000, 90000]","(2200, 2700]","(55, 60]"
4,0020ccbbb6d84e358d3414a3ff76cffd,F,24.0,2,2,2,100.0,100.0,12.0,154.05,13.0,102.0,60000.0,2366.0,"(50000, 70000]","(2200, 2700]","(20, 25]"


# Preprocessing

## Check/Drop null values

In [3]:
# Check null values
starbucks_bogo_data.isnull().sum()

customer_id            0
gender              1910
age                 1910
offer_received         0
offer_viewed           0
offer_completed        0
offer_view_rate        0
offer_comp_rate        0
total_trans         1910
amount_trans        1910
rewards_earned      1910
time_lapsed_succ    4802
income              1910
membership_days     1910
income_range        1990
membership_range    1910
age_range           1912
dtype: int64

In [4]:
# Drop null values
starbucks_bogo_data.dropna(inplace=True)
starbucks_bogo_data.head()

Unnamed: 0,customer_id,gender,age,offer_received,offer_viewed,offer_completed,offer_view_rate,offer_comp_rate,total_trans,amount_trans,rewards_earned,time_lapsed_succ,income,membership_days,income_range,membership_range,age_range
0,0009655768c64bdeb2e877511632db8f,M,33.0,1,1,1,100.0,100.0,8.0,127.6,9.0,0.0,72000.0,2205.0,"(70000, 90000]","(2200, 2700]","(30, 35]"
2,0011e0d4e6b944f998e987f904e8c1e5,O,40.0,1,1,1,100.0,100.0,5.0,79.46,13.0,270.0,57000.0,1942.0,"(50000, 70000]","(1700, 2200]","(35, 40]"
3,0020c2b971eb4e9188eac86d93036a77,F,59.0,2,1,1,50.0,50.0,8.0,196.86,14.0,84.0,90000.0,2618.0,"(70000, 90000]","(2200, 2700]","(55, 60]"
4,0020ccbbb6d84e358d3414a3ff76cffd,F,24.0,2,2,2,100.0,100.0,12.0,154.05,13.0,102.0,60000.0,2366.0,"(50000, 70000]","(2200, 2700]","(20, 25]"
5,004b041fbfe44859945daa2c7f79ee64,F,55.0,1,1,1,100.0,100.0,6.0,138.36,7.0,162.0,74000.0,1823.0,"(70000, 90000]","(1700, 2200]","(50, 55]"


## Typecast columns

In [5]:
starbucks_bogo_data[['age', 'total_trans', 'rewards_earned', 'time_lapsed_succ', 'income']] = starbucks_bogo_data[['age', 'total_trans', 'rewards_earned', 'time_lapsed_succ', 'income']].astype(int)
starbucks_bogo_data.head()

Unnamed: 0,customer_id,gender,age,offer_received,offer_viewed,offer_completed,offer_view_rate,offer_comp_rate,total_trans,amount_trans,rewards_earned,time_lapsed_succ,income,membership_days,income_range,membership_range,age_range
0,0009655768c64bdeb2e877511632db8f,M,33,1,1,1,100.0,100.0,8,127.6,9,0,72000,2205.0,"(70000, 90000]","(2200, 2700]","(30, 35]"
2,0011e0d4e6b944f998e987f904e8c1e5,O,40,1,1,1,100.0,100.0,5,79.46,13,270,57000,1942.0,"(50000, 70000]","(1700, 2200]","(35, 40]"
3,0020c2b971eb4e9188eac86d93036a77,F,59,2,1,1,50.0,50.0,8,196.86,14,84,90000,2618.0,"(70000, 90000]","(2200, 2700]","(55, 60]"
4,0020ccbbb6d84e358d3414a3ff76cffd,F,24,2,2,2,100.0,100.0,12,154.05,13,102,60000,2366.0,"(50000, 70000]","(2200, 2700]","(20, 25]"
5,004b041fbfe44859945daa2c7f79ee64,F,55,1,1,1,100.0,100.0,6,138.36,7,162,74000,1823.0,"(70000, 90000]","(1700, 2200]","(50, 55]"


## Select features

In [6]:
X = starbucks_bogo_data[['gender', 'age', 'offer_view_rate', 'offer_comp_rate', 'total_trans', 'amount_trans', 'rewards_earned', 'time_lapsed_succ', 'income', 'membership_days']].values
X

array([['M', 33, 100.0, ..., 0, 72000, 2205.0],
       ['O', 40, 100.0, ..., 270, 57000, 1942.0],
       ['F', 59, 50.0, ..., 84, 90000, 2618.0],
       ...,
       ['M', 71, 66.67, ..., 0, 73000, 2012.0],
       ['M', 34, 100.0, ..., 0, 34000, 2293.0],
       ['F', 45, 100.0, ..., 78, 62000, 2352.0]], dtype=object)

## Data encoding

In [7]:
# One-Hot encoding
ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [0])], remainder='passthrough')
X = np.array(ct.fit_transform(X))
X

array([[0.0, 1.0, 0.0, ..., 0, 72000, 2205.0],
       [0.0, 0.0, 1.0, ..., 270, 57000, 1942.0],
       [1.0, 0.0, 0.0, ..., 84, 90000, 2618.0],
       ...,
       [0.0, 1.0, 0.0, ..., 0, 73000, 2012.0],
       [0.0, 1.0, 0.0, ..., 0, 34000, 2293.0],
       [1.0, 0.0, 0.0, ..., 78, 62000, 2352.0]], dtype=object)

# Feature scaling

In [9]:
# Feature scaling
sc = StandardScaler()
X = sc.fit_transform(X)
X

array([[-0.90487655,  0.93207068, -0.12213275, ..., -0.88686617,
         0.17812063, -0.26170037],
       [-0.90487655, -1.07288   ,  8.18781219, ...,  2.45360421,
        -0.51460108, -0.89353536],
       [ 1.10512312, -1.07288   , -0.12213275, ...,  0.15239128,
         1.00938668,  0.73049678],
       ...,
       [-0.90487655,  0.93207068, -0.12213275, ..., -0.88686617,
         0.22430208, -0.72536636],
       [-0.90487655,  0.93207068, -0.12213275, ..., -0.88686617,
        -1.57677437, -0.0502879 ],
       [ 1.10512312, -1.07288   , -0.12213275, ...,  0.07815861,
        -0.28369384,  0.09145455]])

# Train model

In [10]:
# K-means Clustering
kmeans = KMeans(n_clusters=2, init='k-means++', random_state=42)
y_kmeans = kmeans.fit_predict(X)
y_kmeans



array([0, 1, 1, ..., 0, 0, 1], dtype=int32)

# Predict/Analyse offer success

In [11]:
starbucks_bogo_data['offer_success'] = y_kmeans
starbucks_bogo_data

Unnamed: 0,customer_id,gender,age,offer_received,offer_viewed,offer_completed,offer_view_rate,offer_comp_rate,total_trans,amount_trans,rewards_earned,time_lapsed_succ,income,membership_days,income_range,membership_range,age_range,offer_success
0,0009655768c64bdeb2e877511632db8f,M,33,1,1,1,100.00,100.00,8,127.60,9,0,72000,2205.0,"(70000, 90000]","(2200, 2700]","(30, 35]",0
2,0011e0d4e6b944f998e987f904e8c1e5,O,40,1,1,1,100.00,100.00,5,79.46,13,270,57000,1942.0,"(50000, 70000]","(1700, 2200]","(35, 40]",1
3,0020c2b971eb4e9188eac86d93036a77,F,59,2,1,1,50.00,50.00,8,196.86,14,84,90000,2618.0,"(70000, 90000]","(2200, 2700]","(55, 60]",1
4,0020ccbbb6d84e358d3414a3ff76cffd,F,24,2,2,2,100.00,100.00,12,154.05,13,102,60000,2366.0,"(50000, 70000]","(2200, 2700]","(20, 25]",1
5,004b041fbfe44859945daa2c7f79ee64,F,55,1,1,1,100.00,100.00,6,138.36,7,162,74000,1823.0,"(70000, 90000]","(1700, 2200]","(50, 55]",1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14986,fff29fb549084123bd046dbc5ceb4faa,F,59,9,7,9,77.78,100.00,11,291.47,47,0,93000,2073.0,"(90000, 110000]","(1700, 2200]","(55, 60]",1
14987,fff3ba4757bd42088c044ca26d73817a,F,69,1,1,1,100.00,100.00,11,580.98,9,0,83000,2784.0,"(70000, 90000]","(2700, 3200]","(65, 70]",1
14988,fff7576017104bcc8677a8d63322b5e1,M,71,3,2,1,66.67,33.33,6,29.94,9,0,73000,2012.0,"(70000, 90000]","(1700, 2200]","(70, 75]",0
14990,fffad4f4828548d1b5583907f2e9906b,M,34,5,5,5,100.00,100.00,12,88.83,15,0,34000,2293.0,"(30000, 50000]","(2200, 2700]","(30, 35]",0


In [12]:
starbucks_bogo_data[['offer_success', 'gender', 'age', 'offer_view_rate', 'offer_comp_rate', 'total_trans', 'amount_trans', 'rewards_earned', 'time_lapsed_succ', 'income', 'membership_days']]

Unnamed: 0,offer_success,gender,age,offer_view_rate,offer_comp_rate,total_trans,amount_trans,rewards_earned,time_lapsed_succ,income,membership_days
0,0,M,33,100.00,100.00,8,127.60,9,0,72000,2205.0
2,1,O,40,100.00,100.00,5,79.46,13,270,57000,1942.0
3,1,F,59,50.00,50.00,8,196.86,14,84,90000,2618.0
4,1,F,24,100.00,100.00,12,154.05,13,102,60000,2366.0
5,1,F,55,100.00,100.00,6,138.36,7,162,74000,1823.0
...,...,...,...,...,...,...,...,...,...,...,...
14986,1,F,59,77.78,100.00,11,291.47,47,0,93000,2073.0
14987,1,F,69,100.00,100.00,11,580.98,9,0,83000,2784.0
14988,0,M,71,66.67,33.33,6,29.94,9,0,73000,2012.0
14990,0,M,34,100.00,100.00,12,88.83,15,0,34000,2293.0


# Data Analysis and Insights

In [15]:
starbucks_bogo_data[starbucks_bogo_data['offer_success'] == 1].describe()

Unnamed: 0,age,offer_received,offer_viewed,offer_completed,offer_view_rate,offer_comp_rate,total_trans,amount_trans,rewards_earned,time_lapsed_succ,income,membership_days,offer_success
count,4710.0,4710.0,4710.0,4710.0,4710.0,4710.0,4710.0,4710.0,4710.0,4710.0,4710.0,4710.0,4710.0
mean,57.878981,2.447346,2.057749,1.975372,84.138679,77.74093,8.67431,159.43997,15.700212,67.889384,72666.878981,2284.87155,1.0
std,16.628088,1.738136,1.645105,1.819361,29.096815,34.184765,4.70868,138.973318,8.340323,77.469031,22099.412127,367.66413,0.0
min,18.0,1.0,0.0,0.0,0.0,0.0,1.0,8.9,2.0,0.0,31000.0,1744.0,1.0
25%,48.0,1.0,1.0,1.0,71.43,50.0,5.0,78.75,10.0,6.0,56000.0,1992.0,1.0
50%,58.0,2.0,2.0,1.0,100.0,100.0,8.0,133.3,15.0,42.0,72000.0,2213.0,1.0
75%,69.0,3.0,3.0,2.0,100.0,100.0,12.0,198.0675,21.0,102.0,89000.0,2546.0,1.0
max,100.0,17.0,17.0,17.0,100.0,100.0,28.0,1608.69,50.0,594.0,120000.0,3566.0,1.0


# Offer success rate

In [13]:
starbucks_bogo_data['offer_success'].value_counts()

0    5428
1    4710
Name: offer_success, dtype: int64

# Conclusions

**Insights**

Customers actually viewed and completed the bogo offer successfully

- **Age range** 48-70
- **Offer received** 1-3
- **Total transactions made** 5-12
- **Total amount spent** 80-200 dollars
- **Rewards earned** 10-21
- **Time lapsed success** 6-100 hours, mostly round 60 hours
- **Income** 55,000 - 90,000 dollars
- **Membership** 2000 - 2500 days or 5.5 - 7 years

So, now we have an idea which customers are completing the BOGO offer successfully and whom to engage properly. We need to send more bogo offers or think better business stategies for the customers not covering in the above mentioned ranges of attributes.



**Bogo offer success rate**

Currently, we have around 50% BOGO offer success rate as per data collected

