# User Groups that we are interested in

1. Users that viewed the offer
    1. Did they complete the offer
        - What types of offers did they complete?
    2. Did they at least have transactions
        - What types of offers did they not complete?
        - Can try to see how many transactions there were and what percentage of the offer they completed.

We need to see what types of users fall into each of these categories to see if the offer type changes their completion status

# Library Imports

In [6]:
import pandas as pd
import numpy as np

from functools import reduce

from sqlalchemy import create_engine

from datetime import date

import matplotlib.pyplot as plt
import seaborn as sns

# Data Loading

In [7]:
conn = create_engine('sqlite:///data/starbucks_data.db')

offers = pd.read_sql('SELECT * FROM offers', conn)
offers_wo_chnl = offers.drop(columns=['offer_notification_channel']).drop_duplicates()

users = pd.read_sql('SELECT * FROM users', conn)
users['user_age_group'] = users['user_age_group'].fillna('Unknown')
users['user_age_group'] = users['user_age_group'].astype(str)
categories = ['Unknown'] + sorted(users['user_age_group'].unique().tolist())[:-1]
users['user_age_group'] = pd.Categorical(users['user_age_group'], categories=categories, ordered=True)

interactions = pd.read_sql('SELECT * FROM interactions', conn)

offer_recv_view_comp = pd.read_sql('offer_recv_view_comp', conn)
offer_recv_view_nocomp = pd.read_sql('offer_recv_view_nocomp', conn)

In [8]:
users

Unnamed: 0,user_id,user_gender,user_age,user_member_since,user_income,user_tenure,user_age_group
0,68be06ca386d4c31939f3a4f0e3dd783,,,2017-02-12 00:00:00.000000,,1,Unknown
1,0610b486422d4921ae7d2bf64640c50b,F,55.0,2017-07-15 00:00:00.000000,112000.0,1,56-65
2,38fe809add3b4fcf9315a9694bb96ff5,,,2018-07-12 00:00:00.000000,,0,Unknown
3,78afa995795e4d85b5d9ceeca43f5fef,F,75.0,2017-05-09 00:00:00.000000,100000.0,1,66+
4,a03223e636434f42ac4c3df47e8bac43,,,2017-08-04 00:00:00.000000,,1,Unknown
...,...,...,...,...,...,...,...
16995,6d5f3a774f3d4714ab0c092238f3a1d7,F,45.0,2018-06-04 00:00:00.000000,54000.0,0,46-55
16996,2cb4f97358b841b9a9773a7aa05a9d77,M,61.0,2018-07-13 00:00:00.000000,72000.0,0,56-65
16997,01d26f638c274aa0b965d24cefe3183f,M,49.0,2017-01-26 00:00:00.000000,73000.0,1,46-55
16998,9dc1421481194dcd9400aec7c9ae6366,F,83.0,2016-03-07 00:00:00.000000,50000.0,2,66+


In [19]:
received_offers = interactions[ interactions['intxn_event_type'] == 'offer received' ].copy()
received_offers = received_offers.groupby(['user_id', 'offer_id']).agg({'intxn_event_type': 'count'}).rename(columns={'intxn_event_type': 'received_count'}).reset_index()

received_offers

Unnamed: 0,user_id,offer_id,received_count
0,0009655768c64bdeb2e877511632db8f,2906b810c7d4411798c6938adc9daaa5,1
1,0009655768c64bdeb2e877511632db8f,3f207df678b143eea3cee63160fa8bed,1
2,0009655768c64bdeb2e877511632db8f,5a8bc65990b245e5a138643cd4eb9837,1
3,0009655768c64bdeb2e877511632db8f,f19421c1d4aa40978ebb69ca19b0e20d,1
4,0009655768c64bdeb2e877511632db8f,fafdcd668e3743c1bb461111dcafc2a4,1
...,...,...,...
63283,fffad4f4828548d1b5583907f2e9906b,f19421c1d4aa40978ebb69ca19b0e20d,2
63284,ffff82501cea40309d5fdd7edcca4a07,0b1e1539f2cc45b7b9fa7c272da2e1d7,1
63285,ffff82501cea40309d5fdd7edcca4a07,2906b810c7d4411798c6938adc9daaa5,3
63286,ffff82501cea40309d5fdd7edcca4a07,9b98b8c7a33c4b65b9aebfe6a799e6d9,1


# Looking into users/offers pairs that were completed

In [10]:
offer_recv_view_comp

Unnamed: 0,user_id,offer_id,received_event_type,received_time,expiration_time,viewed_event_type,viewed_time,completed_event_type,completed_time
0,0011e0d4e6b944f998e987f904e8c1e5,2298d6c36e964ae4a3e7e9706d1fb8c2,offer received,168,336,offer viewed,186.0,offer completed,252.0
1,0011e0d4e6b944f998e987f904e8c1e5,0b1e1539f2cc45b7b9fa7c272da2e1d7,offer received,408,648,offer viewed,432.0,offer completed,576.0
2,0011e0d4e6b944f998e987f904e8c1e5,9b98b8c7a33c4b65b9aebfe6a799e6d9,offer received,504,672,offer viewed,516.0,offer completed,576.0
3,0020c2b971eb4e9188eac86d93036a77,fafdcd668e3743c1bb461111dcafc2a4,offer received,0,240,offer viewed,12.0,offer completed,54.0
4,0020c2b971eb4e9188eac86d93036a77,4d5c57ea9a6940dd891ad53e9dbe8da0,offer received,408,528,offer viewed,426.0,offer completed,510.0
...,...,...,...,...,...,...,...,...,...
23859,fffad4f4828548d1b5583907f2e9906b,f19421c1d4aa40978ebb69ca19b0e20d,offer received,408,528,offer viewed,510.0,offer completed,516.0
23860,ffff82501cea40309d5fdd7edcca4a07,fafdcd668e3743c1bb461111dcafc2a4,offer received,0,240,offer viewed,6.0,offer completed,60.0
23861,ffff82501cea40309d5fdd7edcca4a07,0b1e1539f2cc45b7b9fa7c272da2e1d7,offer received,168,408,offer viewed,174.0,offer completed,198.0
23862,ffff82501cea40309d5fdd7edcca4a07,2906b810c7d4411798c6938adc9daaa5,offer received,336,504,offer viewed,354.0,offer completed,384.0


In [39]:
# getting information on these completed offers
completed_offers = offer_recv_view_comp[ ['user_id', 'offer_id', 'received_event_type'] ].drop_duplicates().copy()
completed_offers_by_user = completed_offers.groupby(['user_id', 'offer_id']).agg({'received_event_type': 'count'}).rename(columns={'received_event_type': 'completed_count'}).reset_index()

completed_offers_by_user

Unnamed: 0,user_id,offer_id,completed_count
0,0011e0d4e6b944f998e987f904e8c1e5,0b1e1539f2cc45b7b9fa7c272da2e1d7,1
1,0011e0d4e6b944f998e987f904e8c1e5,2298d6c36e964ae4a3e7e9706d1fb8c2,1
2,0011e0d4e6b944f998e987f904e8c1e5,9b98b8c7a33c4b65b9aebfe6a799e6d9,1
3,0020c2b971eb4e9188eac86d93036a77,4d5c57ea9a6940dd891ad53e9dbe8da0,1
4,0020c2b971eb4e9188eac86d93036a77,fafdcd668e3743c1bb461111dcafc2a4,1
...,...,...,...
21088,fff7576017104bcc8677a8d63322b5e1,fafdcd668e3743c1bb461111dcafc2a4,1
21089,fffad4f4828548d1b5583907f2e9906b,f19421c1d4aa40978ebb69ca19b0e20d,1
21090,ffff82501cea40309d5fdd7edcca4a07,0b1e1539f2cc45b7b9fa7c272da2e1d7,1
21091,ffff82501cea40309d5fdd7edcca4a07,2906b810c7d4411798c6938adc9daaa5,1


In [38]:
received_v_completed = pd.merge(received_offers, completed_offers_by_user, how='outer', on=['user_id', 'offer_id'])
received_v_completed['completed_percent'] = received_v_completed['completed_count'] / received_v_completed['received_count']
received_v_completed = received_v_completed.fillna(0)

Unnamed: 0,user_id,offer_id,received_count,completed_count,completed_percent
0,0009655768c64bdeb2e877511632db8f,2906b810c7d4411798c6938adc9daaa5,1,0.0,0.000000
1,0009655768c64bdeb2e877511632db8f,3f207df678b143eea3cee63160fa8bed,1,0.0,0.000000
2,0009655768c64bdeb2e877511632db8f,5a8bc65990b245e5a138643cd4eb9837,1,0.0,0.000000
3,0009655768c64bdeb2e877511632db8f,f19421c1d4aa40978ebb69ca19b0e20d,1,0.0,0.000000
4,0009655768c64bdeb2e877511632db8f,fafdcd668e3743c1bb461111dcafc2a4,1,0.0,0.000000
...,...,...,...,...,...
63283,fffad4f4828548d1b5583907f2e9906b,f19421c1d4aa40978ebb69ca19b0e20d,2,1.0,0.500000
63284,ffff82501cea40309d5fdd7edcca4a07,0b1e1539f2cc45b7b9fa7c272da2e1d7,1,1.0,1.000000
63285,ffff82501cea40309d5fdd7edcca4a07,2906b810c7d4411798c6938adc9daaa5,3,1.0,0.333333
63286,ffff82501cea40309d5fdd7edcca4a07,9b98b8c7a33c4b65b9aebfe6a799e6d9,1,0.0,0.000000


In [None]:
completed_offer_info = pd.merge(completed_offers, offers_wo_chnl, how='left', left_on='offer_id', right_on='offer_id')
completed_offers_v_users = pd.merge(completed_offer_info, users, how='left', on='user_id')
completed_offers_v_users

Unnamed: 0,user_id,offer_id,offer_type,offer_spend_minimum,offer_reward,offer_duration,user_gender,user_age,user_member_since,user_income,user_tenure,user_age_group
0,0011e0d4e6b944f998e987f904e8c1e5,2298d6c36e964ae4a3e7e9706d1fb8c2,discount,7,3,168,O,40.0,2018-01-09 00:00:00.000000,57000.0,0,36-45
1,0011e0d4e6b944f998e987f904e8c1e5,0b1e1539f2cc45b7b9fa7c272da2e1d7,discount,20,5,240,O,40.0,2018-01-09 00:00:00.000000,57000.0,0,36-45
2,0011e0d4e6b944f998e987f904e8c1e5,9b98b8c7a33c4b65b9aebfe6a799e6d9,bogo,5,5,168,O,40.0,2018-01-09 00:00:00.000000,57000.0,0,36-45
3,0020c2b971eb4e9188eac86d93036a77,fafdcd668e3743c1bb461111dcafc2a4,discount,10,2,240,F,59.0,2016-03-04 00:00:00.000000,90000.0,2,56-65
4,0020c2b971eb4e9188eac86d93036a77,4d5c57ea9a6940dd891ad53e9dbe8da0,bogo,10,10,120,F,59.0,2016-03-04 00:00:00.000000,90000.0,2,56-65
...,...,...,...,...,...,...,...,...,...,...,...,...
21088,fff7576017104bcc8677a8d63322b5e1,fafdcd668e3743c1bb461111dcafc2a4,discount,10,2,240,M,71.0,2017-10-31 00:00:00.000000,73000.0,1,66+
21089,fffad4f4828548d1b5583907f2e9906b,f19421c1d4aa40978ebb69ca19b0e20d,bogo,5,5,120,M,34.0,2017-01-23 00:00:00.000000,34000.0,1,26-35
21090,ffff82501cea40309d5fdd7edcca4a07,fafdcd668e3743c1bb461111dcafc2a4,discount,10,2,240,F,45.0,2016-11-25 00:00:00.000000,62000.0,2,46-55
21091,ffff82501cea40309d5fdd7edcca4a07,0b1e1539f2cc45b7b9fa7c272da2e1d7,discount,20,5,240,F,45.0,2016-11-25 00:00:00.000000,62000.0,2,46-55


In [None]:
completed_offers_by_type_and_user_age = completed_offers_v_users.groupby(['offer_type', 'user_age_group']).agg({'offer_id': 'count'}).rename(columns={'offer_id': 'completed_count'}).reset_index()

  completed_offers_v_users.groupby(['offer_type', 'user_age_group']).agg({'offer_id': 'count'}).rename(columns={'offer_id': 'completed_count'}).reset_index()


Unnamed: 0,offer_type,user_age_group,completed_count
0,bogo,Unknown,290
1,bogo,19-25,388
2,bogo,26-35,676
3,bogo,36-45,1225
4,bogo,46-55,2076
5,bogo,56-65,2341
6,bogo,66+,2941
7,discount,Unknown,615
8,discount,19-25,497
9,discount,26-35,810
