In [1]:
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [51]:
import pandas as pd
from src.database.sql import psql_connection
conn = psql_connection()

In [14]:
import cPickle as pickle
from src.modeling.churn import ChurnClassifier

model_f = open('../data/churn_model.pkl')
clf = pickle.load(model_f)

In [5]:
query = """
SELECT 
    u.distinct_id, 
    e.type, 
    count(e.event_id), 
    extract(DAY FROM CURRENT_TIMESTAMP - converted_at) AS account_age,
    u.vertical,
    u.camp_deliveries
    
FROM users AS u 

INNER JOIN customers AS c
ON c.email = u.email

INNER JOIN subscriptions AS s
ON s.customer_id = c.identifier

LEFT JOIN events AS e
ON e.distinct_id = u.distinct_id

INNER JOIN (
    SELECT email, MAX(pe.time) AS converted_at
    FROM customers AS c
    LEFT JOIN payment_events AS pe
    ON pe.customer_id = c.identifier
    WHERE pe.type = 'customer.subscription.created'
    GROUP BY c.email
) AS converted
ON u.email = converted.email

WHERE e.type IS NOT NULL
GROUP BY u.distinct_id, e.type, converted_at, u.vertical, u.camp_deliveries;
"""
query_df = pd.read_sql_query(query, conn)

In [6]:
from src.modeling.churn import *

In [7]:
# query_df.pivot?
ages = query_df[['distinct_id', 'account_age', 'camp_deliveries']]

vertical_dummies = pd.get_dummies(query_df['vertical'], prefix='vertical')
vertical_dummies
ages = pd.concat([ages, vertical_dummies], axis=1).set_index('distinct_id')

events = query_df.pivot(index='distinct_id', columns='type', values='count')
df = events.join(ages).fillna(0)
df['vertical_educator'].describe()

count    32407.000000
mean         0.317493
std          0.465508
min          0.000000
25%          0.000000
50%          0.000000
75%          1.000000
max          1.000000
Name: vertical_educator, dtype: float64

In [13]:
len(FIELDS)

58

In [21]:
np.where(clf._clf.classes_ == True)[0][0]

1

In [34]:
df['churn_proba'] = map(lambda prediction: prediction[1], clf.predict(df[FIELDS].values))

In [35]:
df['churn_proba'].describe()

count    32407.000000
mean         0.142366
std          0.150860
min          0.004819
25%          0.045368
50%          0.092324
75%          0.179927
max          0.959053
Name: churn_proba, dtype: float64

In [49]:
df[df['churn_proba'] > 0.85][['signin', 'Export', 'account_age', 'churn_proba']].describe()

Unnamed: 0,signin,Export,account_age,churn_proba
count,176.0,176.0,176.0,176.0
mean,0.926136,0.977273,14.727273,0.918303
std,1.214295,1.213988,12.523557,0.026777
min,0.0,0.0,7.0,0.850278
25%,0.0,0.0,9.0,0.888364
50%,1.0,0.0,11.0,0.91883
75%,1.0,2.0,14.0,0.94318
max,4.0,3.0,83.0,0.959053


In [52]:
query = """
SELECT u.distinct_id, e.type, count(e.event_id)
FROM users AS u

LEFT JOIN events AS e
ON e.distinct_id = u.distinct_id

WHERE 
    u.distinct_id NOT IN (
        SELECT u.distinct_id 
        FROM subscriptions AS s
        INNER JOIN customers AS c
        ON c.identifier = s.customer_id
        INNER JOIN users AS u
        ON u.email = c.email
    ) AND
    u.subscription_type = 'basic' AND
    u.email IS NOT NULL AND
    e.type IS NOT NULL
    
GROUP BY u.distinct_id, e.type, u.email, u.vertical
"""

conversion_query_df = pd.read_sql_query(query, conn)

In [56]:
conversion_df = (
    conversion_query_df
    .pivot(index='distinct_id', columns='type', values='count')
    .fillna(0)
)

In [58]:
from src.modeling.conversion import ConversionClassifier

model_f = open('../data/conversion_model.pkl')
clf = pickle.load(model_f)

In [66]:
from src.modeling.conversion import FEATURE_COLUMNS
X = conversion_df[FEATURE_COLUMNS].values
probas = clf._clf.predict_proba(X)
conversion_df['conversion_proba'] = map(lambda proba: proba[1], probas)
conversion_df.head()

type,App Became Active,Click Button,Click Link,Client error,Countdown Pro Button,Deck Created,Display Limit Modal,Display Limit Notification,Display Video Editor Modal,Display Welcome Countdown,...,Successfully completed pro upgrade,Validation failed,View player page,Zuru Upgrade Edu Button,Zuru Upgrade Pro Button,cancel,signin,signup,upgrade,conversion_proba
distinct_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
005cINJekv,0.0,0.0,0.0,0.0,0.0,1.0,0.0,4.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,3.0,1.0,0.0,0.06662
005z3MNNpS,0.0,0.0,7.0,0.0,0.0,3.0,0.0,15.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,7.0,1.0,0.0,0.097114
007GNOwkAT,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.050013
00AeBeXbE9,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.081348
00BdMY6CNc,1.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.060126
