In [43]:
# load in packages
from itertools import combinations

from test_results import test_results, score
import numpy as np
import pandas as pd
import scipy as sp
import sklearn as sk
import xgboost as xgb
import pickle
from imblearn.over_sampling import SMOTE
import matplotlib.pyplot as plt
import seaborn as sb
%matplotlib inline

# load in the data
train_data = pd.read_csv('./training.csv')
train_data.head()

Unnamed: 0,ID,Promotion,purchase,V1,V2,V3,V4,V5,V6,V7
0,1,No,0,2,30.443518,-1.165083,1,1,3,2
1,3,No,0,3,32.15935,-0.645617,2,3,2,2
2,4,No,0,2,30.431659,0.133583,1,1,4,2
3,5,No,0,0,26.588914,-0.212728,2,1,4,2
4,8,Yes,0,3,28.044332,-0.385883,1,1,2,2


In [44]:
test_data = pd.read_csv('./Test.csv')
test_data.head()

Unnamed: 0,ID,Promotion,purchase,V1,V2,V3,V4,V5,V6,V7
0,2,No,0,1,41.37639,1.172517,1,1,2,2
1,6,Yes,0,1,25.163598,0.65305,2,2,2,2
2,7,Yes,0,1,26.553778,-1.597972,2,3,4,2
3,10,No,0,2,28.529691,-1.078506,2,3,2,2
4,12,No,0,2,32.378538,0.479895,2,2,1,2


In [45]:
features = ['V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7']

In [4]:
# up sample purhcase '1' to 8,000 examples

train_data_pur = train_data[train_data['purchase'] == 1]

train_data_pur_new = sk.utils.resample(train_data_pur, replace=True, n_samples=7000)
train_data_pur_new = pd.DataFrame(train_data_pur_new)
train_data_pur_new.head()

Unnamed: 0,ID,Promotion,purchase,V1,V2,V3,V4,V5,V6,V7
26865,39918,Yes,1,1,24.545281,1.518828,2,1,1,2
35024,52104,Yes,1,2,25.391155,1.605406,2,1,2,1
62785,93541,Yes,1,1,28.092568,0.393317,2,3,1,2
79383,118479,Yes,1,1,32.75864,-1.68455,2,3,1,2
20953,31167,Yes,1,2,30.818383,0.133583,2,1,2,1


In [5]:
train_data_upsam = pd.concat([train_data, train_data_pur_new], ignore_index=True)
train_data_upsam.head()

Unnamed: 0,ID,Promotion,purchase,V1,V2,V3,V4,V5,V6,V7
0,1,No,0,2,30.443518,-1.165083,1,1,3,2
1,3,No,0,3,32.15935,-0.645617,2,3,2,2
2,4,No,0,2,30.431659,0.133583,1,1,4,2
3,5,No,0,0,26.588914,-0.212728,2,1,4,2
4,8,Yes,0,3,28.044332,-0.385883,1,1,2,2


In [14]:
# split into train and valid
# create ensemble for train portion only
train, valid = sk.model_selection.train_test_split(train_data_upsam, test_size=0.2,random_state=42)
train_no_pur = train[train['purchase']==0]
train_pur = train[train['purchase']==1]

In [15]:
# Generate validation set
valid_control = valid[valid['Promotion']=='No']
Y_valid_control = valid_control['purchase']
X_valid_control = valid_control[features]

valid_exper = valid[valid['Promotion']=='Yes']
Y_valid_exper = valid_exper['purchase']
X_valid_exper = valid_exper[features]

In [17]:
random_seeds = [1, 100, 200, 300, 400, 500, 600, 700, 800, 900]

for i in range(10):
    # Get a subsample of data with no purchases
    train_no_pur_sample = train_no_pur.sample(frac=0.1, replace=True, random_state=random_seeds[i])
    # Join subsample of customers with no purchases with full data for customers with purchases
    train = pd.concat([train_no_pur_sample, train_pur], ignore_index=True)
    train= sk.utils.shuffle(train)
    train_control = train[train['Promotion']=='No']
    train_exper = train[train['Promotion']=='Yes']
    
    Y_train_control = train_control['purchase']
    X_train_control = train_control[features]

    Y_train_exper = train_exper['purchase']
    X_train_exper = train_exper[features]
    
    # model for control group
    model_control_name = "model_control_" + str(i+1) +'.pickle.dat'
    
    eval_set = [(X_train_control, Y_train_control), (X_valid_control, Y_valid_control)]
    model_control = xgb.XGBClassifier(learning_rate = 0.1,\
                                  max_depth = 7,\
                                  min_child_weight = 5,\
                                  objective = 'binary:logistic',\
                                  seed = 42,\
                                  gamma = 1,\
                                  silent = True)
    model_control.fit(X_train_control, Y_train_control, eval_set=eval_set,\
                      eval_metric="auc", verbose=True, early_stopping_rounds=30)
    # save model for control group
    pickle.dump(model_control, open(model_control_name, "wb"))
    
    # for experimental group
    model_exper_name = "model_exper_" + str(i+1) +'.pickle.dat'
    eval_set = [(X_train_exper, Y_train_exper), (X_valid_exper, Y_valid_exper)]
    model_exper = xgb.XGBClassifier(learning_rate = 0.1,\
                                  max_depth = 7,\
                                  min_child_weight = 5,\
                                  objective = 'binary:logistic',\
                                  seed = 42,\
                                  gamma = 1,\
                                  silent = True)
    model_exper.fit(X_train_exper, Y_train_exper, eval_set=eval_set,\
                    eval_metric="auc", verbose=True, early_stopping_rounds=30)
    
    # save model for control group
    pickle.dump(model_exper, open(model_exper_name, "wb"))

[0]	validation_0-auc:0.644776	validation_1-auc:0.627161
Multiple eval metrics have been passed: 'validation_1-auc' will be used for early stopping.

Will train until validation_1-auc hasn't improved in 30 rounds.
[1]	validation_0-auc:0.672676	validation_1-auc:0.645559
[2]	validation_0-auc:0.683759	validation_1-auc:0.652584
[3]	validation_0-auc:0.704999	validation_1-auc:0.663601
[4]	validation_0-auc:0.728263	validation_1-auc:0.683148
[5]	validation_0-auc:0.74944	validation_1-auc:0.703511
[6]	validation_0-auc:0.765438	validation_1-auc:0.719348
[7]	validation_0-auc:0.779421	validation_1-auc:0.734062
[8]	validation_0-auc:0.820614	validation_1-auc:0.772948
[9]	validation_0-auc:0.824164	validation_1-auc:0.77668
[10]	validation_0-auc:0.860689	validation_1-auc:0.808497
[11]	validation_0-auc:0.865342	validation_1-auc:0.811509
[12]	validation_0-auc:0.869514	validation_1-auc:0.814713
[13]	validation_0-auc:0.86987	validation_1-auc:0.8151
[14]	validation_0-auc:0.876216	validation_1-auc:0.821155
[15

[40]	validation_0-auc:0.897431	validation_1-auc:0.8392
[41]	validation_0-auc:0.897787	validation_1-auc:0.83923
[42]	validation_0-auc:0.900127	validation_1-auc:0.841726
[43]	validation_0-auc:0.901081	validation_1-auc:0.842154
[44]	validation_0-auc:0.902558	validation_1-auc:0.843954
[45]	validation_0-auc:0.907719	validation_1-auc:0.847924
[46]	validation_0-auc:0.908398	validation_1-auc:0.848351
[47]	validation_0-auc:0.909207	validation_1-auc:0.848937
[48]	validation_0-auc:0.910815	validation_1-auc:0.849736
[49]	validation_0-auc:0.911993	validation_1-auc:0.851082
[50]	validation_0-auc:0.912577	validation_1-auc:0.851461
[51]	validation_0-auc:0.913513	validation_1-auc:0.852318
[52]	validation_0-auc:0.915196	validation_1-auc:0.854118
[53]	validation_0-auc:0.916764	validation_1-auc:0.855776
[54]	validation_0-auc:0.918847	validation_1-auc:0.858136
[55]	validation_0-auc:0.920733	validation_1-auc:0.860069
[56]	validation_0-auc:0.921655	validation_1-auc:0.860864
[57]	validation_0-auc:0.922991	val

[82]	validation_0-auc:0.976958	validation_1-auc:0.939754
[83]	validation_0-auc:0.976946	validation_1-auc:0.939693
[84]	validation_0-auc:0.976946	validation_1-auc:0.939682
[85]	validation_0-auc:0.977299	validation_1-auc:0.939904
[86]	validation_0-auc:0.97735	validation_1-auc:0.939998
[87]	validation_0-auc:0.977363	validation_1-auc:0.940169
[88]	validation_0-auc:0.977593	validation_1-auc:0.940455
[89]	validation_0-auc:0.977593	validation_1-auc:0.940281
[90]	validation_0-auc:0.978063	validation_1-auc:0.940322
[91]	validation_0-auc:0.978518	validation_1-auc:0.940599
[92]	validation_0-auc:0.978408	validation_1-auc:0.940422
[93]	validation_0-auc:0.978858	validation_1-auc:0.941132
[94]	validation_0-auc:0.979016	validation_1-auc:0.941307
[95]	validation_0-auc:0.979996	validation_1-auc:0.94222
[96]	validation_0-auc:0.98024	validation_1-auc:0.942643
[97]	validation_0-auc:0.980262	validation_1-auc:0.942688
[98]	validation_0-auc:0.980456	validation_1-auc:0.94301
[99]	validation_0-auc:0.980584	vali

[22]	validation_0-auc:0.900395	validation_1-auc:0.846429
[23]	validation_0-auc:0.900514	validation_1-auc:0.846323
[24]	validation_0-auc:0.903613	validation_1-auc:0.848613
[25]	validation_0-auc:0.903807	validation_1-auc:0.848494
[26]	validation_0-auc:0.907935	validation_1-auc:0.853151
[27]	validation_0-auc:0.914526	validation_1-auc:0.859843
[28]	validation_0-auc:0.918015	validation_1-auc:0.863206
[29]	validation_0-auc:0.918535	validation_1-auc:0.863809
[30]	validation_0-auc:0.924335	validation_1-auc:0.869839
[31]	validation_0-auc:0.924636	validation_1-auc:0.870342
[32]	validation_0-auc:0.925023	validation_1-auc:0.870406
[33]	validation_0-auc:0.926248	validation_1-auc:0.872664
[34]	validation_0-auc:0.931148	validation_1-auc:0.878265
[35]	validation_0-auc:0.933928	validation_1-auc:0.881166
[36]	validation_0-auc:0.934829	validation_1-auc:0.882325
[37]	validation_0-auc:0.940533	validation_1-auc:0.888024
[38]	validation_0-auc:0.944147	validation_1-auc:0.891875
[39]	validation_0-auc:0.944426	

[64]	validation_0-auc:0.937136	validation_1-auc:0.880493
[65]	validation_0-auc:0.937972	validation_1-auc:0.881271
[66]	validation_0-auc:0.940166	validation_1-auc:0.883555
[67]	validation_0-auc:0.94094	validation_1-auc:0.883857
[68]	validation_0-auc:0.941653	validation_1-auc:0.884908
[69]	validation_0-auc:0.941826	validation_1-auc:0.885165
[70]	validation_0-auc:0.943099	validation_1-auc:0.88607
[71]	validation_0-auc:0.94361	validation_1-auc:0.8867
[72]	validation_0-auc:0.944593	validation_1-auc:0.888031
[73]	validation_0-auc:0.946586	validation_1-auc:0.889944
[74]	validation_0-auc:0.947157	validation_1-auc:0.890446
[75]	validation_0-auc:0.947815	validation_1-auc:0.890893
[76]	validation_0-auc:0.948847	validation_1-auc:0.892303
[77]	validation_0-auc:0.949096	validation_1-auc:0.892644
[78]	validation_0-auc:0.949539	validation_1-auc:0.893247
[79]	validation_0-auc:0.949616	validation_1-auc:0.893361
[80]	validation_0-auc:0.950072	validation_1-auc:0.893751
[81]	validation_0-auc:0.950098	valid

[3]	validation_0-auc:0.783581	validation_1-auc:0.743927
[4]	validation_0-auc:0.792965	validation_1-auc:0.752348
[5]	validation_0-auc:0.797054	validation_1-auc:0.756617
[6]	validation_0-auc:0.805574	validation_1-auc:0.762927
[7]	validation_0-auc:0.810182	validation_1-auc:0.766616
[8]	validation_0-auc:0.813705	validation_1-auc:0.770047
[9]	validation_0-auc:0.82122	validation_1-auc:0.77641
[10]	validation_0-auc:0.821947	validation_1-auc:0.77656
[11]	validation_0-auc:0.827	validation_1-auc:0.77981
[12]	validation_0-auc:0.82942	validation_1-auc:0.781176
[13]	validation_0-auc:0.830998	validation_1-auc:0.781794
[14]	validation_0-auc:0.832253	validation_1-auc:0.783063
[15]	validation_0-auc:0.833821	validation_1-auc:0.784637
[16]	validation_0-auc:0.833899	validation_1-auc:0.784381
[17]	validation_0-auc:0.834723	validation_1-auc:0.784757
[18]	validation_0-auc:0.837406	validation_1-auc:0.78616
[19]	validation_0-auc:0.83843	validation_1-auc:0.786374
[20]	validation_0-auc:0.840425	validation_1-auc:

[45]	validation_0-auc:0.957906	validation_1-auc:0.91155
[46]	validation_0-auc:0.959257	validation_1-auc:0.913518
[47]	validation_0-auc:0.961537	validation_1-auc:0.915666
[48]	validation_0-auc:0.96209	validation_1-auc:0.916289
[49]	validation_0-auc:0.962671	validation_1-auc:0.91602
[50]	validation_0-auc:0.963454	validation_1-auc:0.916849
[51]	validation_0-auc:0.963622	validation_1-auc:0.917187
[52]	validation_0-auc:0.964779	validation_1-auc:0.918653
[53]	validation_0-auc:0.965285	validation_1-auc:0.919265
[54]	validation_0-auc:0.965359	validation_1-auc:0.919428
[55]	validation_0-auc:0.965383	validation_1-auc:0.91954
[56]	validation_0-auc:0.965575	validation_1-auc:0.919833
[57]	validation_0-auc:0.967435	validation_1-auc:0.922188
[58]	validation_0-auc:0.9674	validation_1-auc:0.9222
[59]	validation_0-auc:0.967441	validation_1-auc:0.92227
[60]	validation_0-auc:0.967469	validation_1-auc:0.922359
[61]	validation_0-auc:0.968367	validation_1-auc:0.923383
[62]	validation_0-auc:0.968506	validatio

[87]	validation_0-auc:0.951018	validation_1-auc:0.88896
[88]	validation_0-auc:0.951064	validation_1-auc:0.889021
[89]	validation_0-auc:0.951097	validation_1-auc:0.888903
[90]	validation_0-auc:0.952408	validation_1-auc:0.890422
[91]	validation_0-auc:0.952627	validation_1-auc:0.890602
[92]	validation_0-auc:0.953564	validation_1-auc:0.891701
[93]	validation_0-auc:0.954382	validation_1-auc:0.89315
[94]	validation_0-auc:0.955637	validation_1-auc:0.894312
[95]	validation_0-auc:0.956828	validation_1-auc:0.89531
[96]	validation_0-auc:0.957005	validation_1-auc:0.895536
[97]	validation_0-auc:0.957573	validation_1-auc:0.89612
[98]	validation_0-auc:0.957749	validation_1-auc:0.896501
[99]	validation_0-auc:0.958507	validation_1-auc:0.896988
[0]	validation_0-auc:0.683975	validation_1-auc:0.67661
Multiple eval metrics have been passed: 'validation_1-auc' will be used for early stopping.

Will train until validation_1-auc hasn't improved in 30 rounds.
[1]	validation_0-auc:0.719621	validation_1-auc:0.69

[27]	validation_0-auc:0.886908	validation_1-auc:0.829263
[28]	validation_0-auc:0.888933	validation_1-auc:0.8315
[29]	validation_0-auc:0.890348	validation_1-auc:0.83296
[30]	validation_0-auc:0.89319	validation_1-auc:0.834877
[31]	validation_0-auc:0.898415	validation_1-auc:0.839004
[32]	validation_0-auc:0.901654	validation_1-auc:0.841597
[33]	validation_0-auc:0.903537	validation_1-auc:0.842857
[34]	validation_0-auc:0.906987	validation_1-auc:0.846056
[35]	validation_0-auc:0.908952	validation_1-auc:0.847879
[36]	validation_0-auc:0.911784	validation_1-auc:0.850625
[37]	validation_0-auc:0.91361	validation_1-auc:0.852151
[38]	validation_0-auc:0.916695	validation_1-auc:0.854594
[39]	validation_0-auc:0.920426	validation_1-auc:0.858395
[40]	validation_0-auc:0.923046	validation_1-auc:0.861239
[41]	validation_0-auc:0.923991	validation_1-auc:0.861915
[42]	validation_0-auc:0.924423	validation_1-auc:0.862296
[43]	validation_0-auc:0.92592	validation_1-auc:0.863915
[44]	validation_0-auc:0.926724	valida

[69]	validation_0-auc:0.977861	validation_1-auc:0.931714
[70]	validation_0-auc:0.978002	validation_1-auc:0.932112
[71]	validation_0-auc:0.978717	validation_1-auc:0.93298
[72]	validation_0-auc:0.979138	validation_1-auc:0.933298
[73]	validation_0-auc:0.979617	validation_1-auc:0.934434
[74]	validation_0-auc:0.979536	validation_1-auc:0.934448
[75]	validation_0-auc:0.97963	validation_1-auc:0.934625
[76]	validation_0-auc:0.980413	validation_1-auc:0.935795
[77]	validation_0-auc:0.980624	validation_1-auc:0.936216
[78]	validation_0-auc:0.980616	validation_1-auc:0.936202
[79]	validation_0-auc:0.980896	validation_1-auc:0.937032
[80]	validation_0-auc:0.981008	validation_1-auc:0.937424
[81]	validation_0-auc:0.98122	validation_1-auc:0.937568
[82]	validation_0-auc:0.981732	validation_1-auc:0.938377
[83]	validation_0-auc:0.981949	validation_1-auc:0.938073
[84]	validation_0-auc:0.98272	validation_1-auc:0.939316
[85]	validation_0-auc:0.982699	validation_1-auc:0.93921
[86]	validation_0-auc:0.982886	valid

[9]	validation_0-auc:0.832567	validation_1-auc:0.780102
[10]	validation_0-auc:0.838534	validation_1-auc:0.785614
[11]	validation_0-auc:0.839067	validation_1-auc:0.787045
[12]	validation_0-auc:0.851413	validation_1-auc:0.798603
[13]	validation_0-auc:0.858503	validation_1-auc:0.805762
[14]	validation_0-auc:0.873235	validation_1-auc:0.819082
[15]	validation_0-auc:0.883071	validation_1-auc:0.828555
[16]	validation_0-auc:0.886614	validation_1-auc:0.833158
[17]	validation_0-auc:0.888754	validation_1-auc:0.83678
[18]	validation_0-auc:0.887967	validation_1-auc:0.834621
[19]	validation_0-auc:0.889194	validation_1-auc:0.836571
[20]	validation_0-auc:0.906075	validation_1-auc:0.851723
[21]	validation_0-auc:0.911056	validation_1-auc:0.856748
[22]	validation_0-auc:0.912974	validation_1-auc:0.858427
[23]	validation_0-auc:0.915848	validation_1-auc:0.861174
[24]	validation_0-auc:0.923087	validation_1-auc:0.870908
[25]	validation_0-auc:0.92493	validation_1-auc:0.871928
[26]	validation_0-auc:0.928112	val

[51]	validation_0-auc:0.917518	validation_1-auc:0.854622
[52]	validation_0-auc:0.918756	validation_1-auc:0.856134
[53]	validation_0-auc:0.91943	validation_1-auc:0.856757
[54]	validation_0-auc:0.922161	validation_1-auc:0.859603
[55]	validation_0-auc:0.922336	validation_1-auc:0.859763
[56]	validation_0-auc:0.922632	validation_1-auc:0.86008
[57]	validation_0-auc:0.922751	validation_1-auc:0.860187
[58]	validation_0-auc:0.924569	validation_1-auc:0.861887
[59]	validation_0-auc:0.92575	validation_1-auc:0.863073
[60]	validation_0-auc:0.927677	validation_1-auc:0.865673
[61]	validation_0-auc:0.927979	validation_1-auc:0.866256
[62]	validation_0-auc:0.929528	validation_1-auc:0.868117
[63]	validation_0-auc:0.932379	validation_1-auc:0.871263
[64]	validation_0-auc:0.932369	validation_1-auc:0.871298
[65]	validation_0-auc:0.933685	validation_1-auc:0.873299
[66]	validation_0-auc:0.934963	validation_1-auc:0.874294
[67]	validation_0-auc:0.936066	validation_1-auc:0.875804
[68]	validation_0-auc:0.936994	val

[93]	validation_0-auc:0.981427	validation_1-auc:0.944094
[94]	validation_0-auc:0.98185	validation_1-auc:0.94422
[95]	validation_0-auc:0.981983	validation_1-auc:0.944528
[96]	validation_0-auc:0.982005	validation_1-auc:0.944601
[97]	validation_0-auc:0.982883	validation_1-auc:0.946283
[98]	validation_0-auc:0.983025	validation_1-auc:0.946476
[99]	validation_0-auc:0.983248	validation_1-auc:0.947204
[0]	validation_0-auc:0.75572	validation_1-auc:0.709414
Multiple eval metrics have been passed: 'validation_1-auc' will be used for early stopping.

Will train until validation_1-auc hasn't improved in 30 rounds.
[1]	validation_0-auc:0.773033	validation_1-auc:0.726329
[2]	validation_0-auc:0.78556	validation_1-auc:0.735646
[3]	validation_0-auc:0.791647	validation_1-auc:0.74311
[4]	validation_0-auc:0.795384	validation_1-auc:0.746869
[5]	validation_0-auc:0.799815	validation_1-auc:0.749394
[6]	validation_0-auc:0.808688	validation_1-auc:0.758575
[7]	validation_0-auc:0.81099	validation_1-auc:0.761413
[8

[33]	validation_0-auc:0.938397	validation_1-auc:0.89303
[34]	validation_0-auc:0.938158	validation_1-auc:0.892885
[35]	validation_0-auc:0.941049	validation_1-auc:0.896457
[36]	validation_0-auc:0.944389	validation_1-auc:0.900362
[37]	validation_0-auc:0.947453	validation_1-auc:0.90423
[38]	validation_0-auc:0.947781	validation_1-auc:0.904753
[39]	validation_0-auc:0.947809	validation_1-auc:0.904713
[40]	validation_0-auc:0.953248	validation_1-auc:0.910402
[41]	validation_0-auc:0.954637	validation_1-auc:0.911877
[42]	validation_0-auc:0.955414	validation_1-auc:0.913012
[43]	validation_0-auc:0.958044	validation_1-auc:0.916009
[44]	validation_0-auc:0.960125	validation_1-auc:0.918005
[45]	validation_0-auc:0.961854	validation_1-auc:0.920054
[46]	validation_0-auc:0.961985	validation_1-auc:0.920485
[47]	validation_0-auc:0.961904	validation_1-auc:0.920459
[48]	validation_0-auc:0.962562	validation_1-auc:0.921173
[49]	validation_0-auc:0.964507	validation_1-auc:0.923457
[50]	validation_0-auc:0.964466	va

[75]	validation_0-auc:0.943056	validation_1-auc:0.883765
[76]	validation_0-auc:0.944128	validation_1-auc:0.884747
[77]	validation_0-auc:0.944578	validation_1-auc:0.885222
[78]	validation_0-auc:0.944795	validation_1-auc:0.88543
[79]	validation_0-auc:0.94552	validation_1-auc:0.885958
[80]	validation_0-auc:0.945712	validation_1-auc:0.88609
[81]	validation_0-auc:0.947173	validation_1-auc:0.887208
[82]	validation_0-auc:0.94826	validation_1-auc:0.888509
[83]	validation_0-auc:0.948345	validation_1-auc:0.888595
[84]	validation_0-auc:0.94858	validation_1-auc:0.88895
[85]	validation_0-auc:0.94912	validation_1-auc:0.889744
[86]	validation_0-auc:0.949103	validation_1-auc:0.889571
[87]	validation_0-auc:0.949666	validation_1-auc:0.89023
[88]	validation_0-auc:0.95115	validation_1-auc:0.891227
[89]	validation_0-auc:0.951727	validation_1-auc:0.891522
[90]	validation_0-auc:0.951851	validation_1-auc:0.891697
[91]	validation_0-auc:0.95201	validation_1-auc:0.891778
[92]	validation_0-auc:0.952194	validation

In [40]:
def promotion_strategy(df):
    '''
    INPUT 
    df - a dataframe with *only* the columns V1 - V7 (same as train_data)

    OUTPUT
    promotion_df - np.array with the values
                   'Yes' or 'No' related to whether or not an 
                   individual should recieve a promotion 
                   should be the length of df.shape[0]            
    Ex:
    INPUT: df
    
    V1	V2	  V3	V4	V5	V6	V7
    2	30	-1.1	1	1	3	2
    3	32	-0.6	2	3	2	2
    2	30	0.13	1	1	4	2
    
    OUTPUT: promotion
    
    array(['Yes', 'Yes', 'No'])
    indicating the first two users would recieve the promotion and 
    the last should not.
    '''

    test = df
    avg_pred_probs_contol = np.zeros(shape=(test.shape[0],2))
    avg_pred_probs_exper = np.zeros(shape=(test.shape[0],2))
    for i in range(10):
        model_control_name = "model_control_" + str(i+1) +'.pickle.dat'
        model_control = pickle.load(open(model_control_name, "rb"))
        pred_probs_contol = model_control.predict_proba(test, ntree_limit=model_control.best_ntree_limit)
        avg_pred_probs_contol += pred_probs_contol

        model_exper_name = "model_exper_" + str(i+1) +'.pickle.dat'
        model_exper = pickle.load(open(model_exper_name, "rb"))
        pred_probs_exper = model_exper.predict_proba(test, ntree_limit=model_control.best_ntree_limit)
        avg_pred_probs_exper += pred_probs_exper

    # get average soft predictions for both control and experimental model
    avg_pred_probs_contol /= 10.0
    avg_pred_probs_exper /= 10.0

    # get difference in probabilities between experimental and cpntrol model for purchase = 1 label
    diff_avg_pred_probs = avg_pred_probs_exper[:,1] - avg_pred_probs_contol[:,1]
    
    promotion = []
    
    # Only send promotions to top 0.1 percentile of probabilities
    cutoff_percentile = np.percentile(diff_avg_pred_probs, 99.9)
    
    for prob in diff_avg_pred_probs:
        if prob > 0:
            if prob > cutoff_percentile:
                promotion.append('Yes')
        else:
            promotion.append('No')

    promotion = np.array(promotion)
    
    return promotion

In [41]:
# This will test your results, and provide you back some information 
# on how well your promotion_strategy will work in practice

test_results(promotion_strategy)

Nice job!  See how well your strategy worked on our test data below!

Your irr with this strategy is 0.0476.

Your nir with this strategy is 6.85.
We came up with a model with an irr of 0.0188 and an nir of 189.45 on the test set.

 How did you do?


(0.047619047619047616, 6.85)