In [1]:
import pandas as pd
import utils
import utils_bux
import featuretools as ft
from sklearn.externals import joblib



### DEFINE PIPELINE PARAMETERS

In [2]:
load_to_vertica = False

# the timeframe of extracted users
users_from = '2016-01-01'
# make relative
users_till = '2017-01-01'
cohort_size = 2000

# the timeframe of extracted behavioral data
interval = '1 week'

# the type of the prediction problem
# 'regression', 'binary classification', 'multiclass classification'
prediction_problem_type = 'multiclass classification'

# multiclass values
medium_value = 5
high_value = 50

print("Pipeline parameters defined")

Pipeline parameters defined


### CONNECT TO THE DATABASE

In [3]:
# connect to the vertica database, create a cursor
cur = utils.connect_to_db()
print("Connected to the database")

Connected to the database


### BUILD ENTITIES

#### Cohorts entity

In [4]:
cohorts = utils_bux.build_cohorts_entity(cur=cur,
                                         users_from=users_from,
                                         users_till=users_till)
print("Cohorts entity built")

Cohorts entity built


#### Users entity

In [5]:
user_details = utils_bux.build_users_entity(cur=cur,
                                            users_from=users_from,
                                            users_till=users_till,
                                            interval=interval,
                                            cohorts=cohorts,
                                            cohort_size=cohort_size)
print("Users entity built")

Users entity built


In [6]:
user_details.head()

Unnamed: 0,user_id,country_cd,gender,nationality,platform_type_name,trading_experience,title,network,bux_account_created_dts,ams_first_funded_dts,...,Withdrawal Completed_did_event,Banner Clicked_hours_till_event,Conversion Completed_hours_till_event,Group Followed_hours_till_event,In-App Purchase Completed_hours_till_event,News Item Opened_hours_till_event,Position Closed_hours_till_event,Position Opened_hours_till_event,Product Alert Set_hours_till_event,Withdrawal Completed_hours_till_event
0,00298260-dc97-4382-8d59-863f3cc30398,NL,,,iOS,NONE,Rookie Trader,Organic,2016-02-05 13:31:01.650,NaT,...,0.0,0.0,500.0,500.0,500.0,500.0,0.0,0.0,500.0,500.0
1,20a35c57-d631-42a2-bd5b-00aa740f97a0,NL,MALE,NL,Android,NONE,Rookie Trader,Organic,2016-02-04 15:24:31.236,2017-10-25 12:01:25,...,0.0,0.0,500.0,500.0,500.0,500.0,0.0,0.0,500.0,500.0
2,29f8548c-54b5-4e86-89b3-66872b6bf7f5,NL,,,Android,NONE,Rookie Trader,Organic,2016-02-02 15:26:07.097,NaT,...,0.0,3.0,500.0,167.0,500.0,500.0,0.0,0.0,500.0,500.0
3,2eadc61a-2935-4484-8de0-19f89f708719,AT,,,Android,NONE,Rookie Trader,Facebook Installs,2016-02-01 14:15:22.799,NaT,...,0.0,500.0,500.0,500.0,500.0,500.0,0.0,0.0,500.0,500.0
4,3794cc61-9526-4a5a-9eb9-c3436aa4d75e,AT,MALE,AT,Android,SOME,Experienced Trader,Organic,2016-02-07 21:14:20.903,2016-02-08 16:37:55,...,1.0,500.0,19.0,500.0,500.0,0.0,10.5,10.0,500.0,42.0


#### Transactions entity

In [None]:
daily_transactions = utils_bux.build_transactions_entity(cur=cur,
                                                         interval=interval)
print("Transactions entity built")

In [None]:
daily_transactions.head()

#### Labels

In [None]:
labels = utils_bux.build_target_values(cur=cur,
                                       medium_value=medium_value,
                                       high_value=high_value)
print("Target values built")

In [None]:
labels.head()

### CREATE THE ENTITY SET

In [11]:
es = utils_bux.create_bux_entity_set(cohorts, user_details, daily_transactions)
es

Entityset: bux_clv
  Entities:
    cohorts (shape = [5, 11])
    transactions (shape = [28144, 18])
    users (shape = [3423, 37])
  Relationships:
    users.cohort_id -> cohorts.cohort_id
    transactions.user_id -> users.user_id

### FEATURE ENGINEERING (DFS)

In [12]:
top_features = ft.load_features("top_features", es)
fm = utils.calculate_feature_matrix_top_features(es, top_features)
# X = fm.reset_index().merge(labels)
# X.to_csv("production_features.csv")
print("Features built")

Features built


### LOADING THE MODEL

In [13]:
model = joblib.load('models/model.pkl')
print("Model loaded")

Model loaded


### SCORING

In [15]:
X = fm.reset_index().merge(labels)

In [16]:
X, y = utils.make_labels(X, prediction_problem_type)
X_train, X_test, y_train, y_test = utils.train_test_splitting(X, y)
model = utils.xgboost_train(X_train, y_train, prediction_problem_type)
y_pred = utils.xgboost_predict(model, X_test, prediction_problem_type)
print("Prediction done")

Prediction done


### LOAD RESULTS INTO VERTICA

In [13]:
print("Scoring loaded to vertica")

Scoring loaded to vertica
