In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
df_eng=pd.read_csv('takehome_user_engagement.csv')

In [3]:
df_eng['login_date']=df_eng['time_stamp'].apply(lambda x:x[:10])

In [4]:
df_eng.head()

Unnamed: 0,time_stamp,user_id,visited,login_date
0,2014-04-22 03:53:30,1,1,2014-04-22
1,2013-11-15 03:45:04,2,1,2013-11-15
2,2013-11-29 03:45:04,2,1,2013-11-29
3,2013-12-09 03:45:04,2,1,2013-12-09
4,2013-12-25 03:45:04,2,1,2013-12-25


In [5]:
'''
function to check if the user is an adopted user
returns 1 if there are 3 logins with a 7 day window,
else returns 0
'''
def check_eng_user(logins):
    eng=False #flag to check if the user is engaged
    eng_rt=0
    if len(logins)<3:
        return eng_rt
    i=0
    while (~eng) & (i+2<len(logins)):
        first_login_dt=pd.to_datetime(logins.values[i])
        second_login_dt=pd.to_datetime(logins.values[i+1])

        #find if second login date is not the same day as the first day
        #avoid flagging a user as engaged if the user has logged in thrice
        #on the same day
        while (((first_login_dt-second_login_dt).days) >= 1) & (i+2<len(logins)):
            i=i+1
            second_login_dt=pd.to_datetime(logins.values[i+1])
        third_login_dt=pd.to_datetime(logins.values[i+2])
        
        #find if third login date is not the same day as the second day
        while (((second_login_dt-third_login_dt).days) >= 1) & (i+2<len(logins)):
            i=i+1
            third_login_dt=pd.to_datetime(logins.values[i+1])
        
        #check if the diff between three successive login is less than 7 days
        if ((third_login_dt-first_login_dt).days) < 7:
            eng=True
            eng_rt=1
        i=i+1

    return eng_rt

In [6]:
usr_eng_grp=df_eng[['user_id','login_date']].groupby('user_id')

In [7]:
usr_eng_dict={}
for i in usr_eng_grp:
    user=i[1]['user_id'].iloc[0]
    login_times=i[1]['login_date']
    usr_eng_dict[user]=check_eng_user(login_times)


In [8]:
usr_eng_df=pd.DataFrame(pd.Series(usr_eng_dict)).reset_index()

In [9]:
usr_eng_df.columns=['user','adopted']

In [10]:
usr_eng_df.head(10)

Unnamed: 0,user,adopted
0,1,0
1,2,1
2,3,0
3,4,0
4,5,0
5,6,0
6,7,0
7,10,1
8,11,0
9,13,0


In [11]:
usr_eng_df['adopted'].unique()

array([0, 1], dtype=int64)

In [12]:
users_df = pd.read_csv('takehome_users.csv',encoding='Latin')

In [13]:
users_df.head(3)

Unnamed: 0,object_id,creation_time,name,email,creation_source,last_session_creation_time,opted_in_to_mailing_list,enabled_for_marketing_drip,org_id,invited_by_user_id
0,1,2014-04-22 03:53:30,Clausen August,AugustCClausen@yahoo.com,GUEST_INVITE,1398139000.0,1,0,11,10803.0
1,2,2013-11-15 03:45:04,Poole Matthew,MatthewPoole@gustr.com,ORG_INVITE,1396238000.0,0,0,1,316.0
2,3,2013-03-19 23:14:52,Bottrill Mitchell,MitchellBottrill@gustr.com,ORG_INVITE,1363735000.0,0,0,94,1525.0


In [14]:
users_df=users_df.set_index('object_id')

In [15]:
usr_eng_df= usr_eng_df.set_index('user')

In [16]:
usr_eng_df.head()

Unnamed: 0_level_0,adopted
user,Unnamed: 1_level_1
1,0
2,1
3,0
4,0
5,0


In [17]:
users_df.head()

Unnamed: 0_level_0,creation_time,name,email,creation_source,last_session_creation_time,opted_in_to_mailing_list,enabled_for_marketing_drip,org_id,invited_by_user_id
object_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1,2014-04-22 03:53:30,Clausen August,AugustCClausen@yahoo.com,GUEST_INVITE,1398139000.0,1,0,11,10803.0
2,2013-11-15 03:45:04,Poole Matthew,MatthewPoole@gustr.com,ORG_INVITE,1396238000.0,0,0,1,316.0
3,2013-03-19 23:14:52,Bottrill Mitchell,MitchellBottrill@gustr.com,ORG_INVITE,1363735000.0,0,0,94,1525.0
4,2013-05-21 08:09:28,Clausen Nicklas,NicklasSClausen@yahoo.com,GUEST_INVITE,1369210000.0,0,0,1,5151.0
5,2013-01-17 10:14:20,Raw Grace,GraceRaw@yahoo.com,GUEST_INVITE,1358850000.0,0,0,193,5240.0


In [18]:
users_df=pd.concat([users_df,usr_eng_df],axis=1)#.head()

In [19]:
users_df['adopted']=users_df['adopted'].fillna(0)#Null values where the user has never logged in

In [20]:
users_df['adopted'].mean()*100

13.350000000000001

**On an average, 13.35% of the users are adopted users.**

In [21]:
#percentage of engaged users by email opt in
users_df[['opted_in_to_mailing_list','adopted']].groupby('opted_in_to_mailing_list')['adopted'].mean()

opted_in_to_mailing_list
0    0.131912
1    0.138277
Name: adopted, dtype: float64

The % of adopted users among users who have opted in to mailing list is higher than % of adopted users in the population. ** Among users who opted in for emails, 13.82% are adopted users. **

In [22]:
#percentage of engaged users by email opt in
users_df[['enabled_for_marketing_drip','adopted']].groupby('enabled_for_marketing_drip')['adopted'].mean()

enabled_for_marketing_drip
0    0.132837
1    0.137277
Name: adopted, dtype: float64

The % of adopted users among users who have opted in to mailing list is higher than % of adopted users in the given sample. ** Among users who opted in for emails, 13.72% are adopted users. **

In [23]:
#percentage of engaged users by creation source
users_df[['creation_source','adopted']].groupby('creation_source')['adopted'].mean()

creation_source
GUEST_INVITE          0.166436
ORG_INVITE            0.129995
PERSONAL_PROJECTS     0.077688
SIGNUP                0.140393
SIGNUP_GOOGLE_AUTH    0.167509
Name: adopted, dtype: float64

The above shows the % of adopted users by creation source. We see that if someone created an account for personal projects, there is a 7.76% chance that they will stay adopted, which is much lower than the % of adopted users in the given sample. ** A user signing up with google authentication has the highest chance of being an adopted user at 16.75% **

In [24]:
users_df[(users_df['creation_source']=='SIGNUP_GOOGLE_AUTH') &
         (users_df['enabled_for_marketing_drip']==1) &
         (users_df['opted_in_to_mailing_list']==1)]['adopted'].mean()

0.17721518987341772

***
** A user who has signed up through Google authentication, has opted in for mailing list and is enabled for marketng drip has the greatest chance (17.72%) of being an adopted user.**
***