# Relax Inc. Take Home Challenge
## *The goal is to identify which factors predict future user adoption*

In [1]:
# imports
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
from collections import defaultdict

In [2]:
# load data into dataframes
user_engagement = pd.read_csv('takehome_user_engagement.csv')
users = pd.read_csv('takehome_users.csv', encoding = 'iso-8859-1')

In [3]:
# change dates to datetime objects
users['creation_time'] = pd.to_datetime(users.creation_time)
users['last_session_creation_time'] = pd.to_datetime(users.last_session_creation_time)
user_engagement['time_stamp'] = pd.to_datetime(user_engagement.time_stamp)

### Identify  users who have adopted the service

In [4]:
# create dictionary where user_id is key, and list of timestamps are value
usage = defaultdict(list)
for ID, time in zip(user_engagement.user_id, user_engagement.time_stamp):
    usage[ID].append(time)

In [5]:
def within_7days(lst):
    '''takes a list of logins and determines if 2 or more occur in 7 days  '''
    delta_days = [] # list of 2 logins that happend withing 7 days
    for num in range(0, len(lst)-1):
        A = lst[num]
        B = lst[num + 1]
        delta_days.append(np.abs(A - B))
        
    final_lst = [day for day in delta_days if day < timedelta(days = 7)]
    if len(final_lst) > 0:
        return 1
    else:
        return 0
    

In [6]:
user_adoption = defaultdict(int)
for ID, logs in usage.items():
    if len(logs) < 2:
        user_adoption[ID] = 0
    else:
        user_adoption[ID] = within_7days(logs)
        
    

In [7]:
users['adoption'] = [user_adoption[ID] for ID in users.object_id]

### Use logistic regression to model the data

In [10]:
# Identify features and target
X = users[['creation_source', 'opted_in_to_mailing_list', 'enabled_for_marketing_drip', 'org_id']]
y = users.adoption

In [12]:
# one-hot encode categorical variables
X = pd.get_dummies(X, columns = ['creation_source', 'org_id'])