In [None]:
import uplift
import warnings
import pandas as pd

warnings.simplefilter('ignore', DeprecationWarning)

#Load dataframe
df_load = pd.read_csv('./integral_xsell.csv', sep=';', decimal=',')

#Initial dataframe
df = df_load.copy()

def assign_treatment(s):
    #Definition on control and treatment group in initial dataframe
    if s in ('01.08.2018'): return 'control'
    elif s in ('01.09.2018'): return 'target'
    else: return 'Non calculated'    

#delete values with target is null
df = df[df.FLG_MOB12_90.notnull()]
    
#Choose only raws with calculated treatment
df['treatment'] = df['APPLICATION_MONTH'].apply(assign_treatment)
df = df[df['treatment']!='Non calculated']
df = df[df['SCORE_BLACKBOX'].notnull()]


#Random forest parameters
rf_parameters = {
'n_estimators': 50,
'max_features': 10,
'random_state': 2019,
'max_depth': 3,
'min_samples_leaf': 100,
'min_samples_treatment': 10,
'n_reg': 10,
'evaluationFunction': 'Chi',
'control_name': 'control',
'normalization': True
}


#lgbm forest parameters for s-learner and t-learner
lgb_parameters = {
"max_depth":3, 
 "reg_lambda":0.1, 
 "reg_alpha":0.1, 
 "min_child_samples":1000,
 "n_estimators":500,
 "num_leaves":32
}


#tree parameters for decision tree
tree_parameters = {
'max_depth':4, 
'min_samples_leaf':50, 
'min_samples_treatment':50,
'n_reg':100, 
'evaluationFunction':'Chi', 
'control_name':'control'
}


#initializing model object
m = uplift.causalmodel(model_type='t-learner', #model type (s-learner, t-learner, random forest, tree)
                model_params = lgb_parameters, #lgm_parameters for s-learner and t-learner, rf_parameters for random forest, tree_parameters for tree
                feature_names = ['SCORE_BLACKBOX','CLIENT_AGE_SEG','NEW_STRAT_SEGMENT','REQUESTED_LIMIT'],#X input for modeling
                target_name = 'FLG_MOB12_90' ) #Target name

#Data pre-preprocess
m.preprocess(df)

#Eliminate bias between samples. Caliper is the hyperparameter means cut-off for set samples close enough for matching 
m.bias_elimination(caliper=0.001)

#fit uplift model
m.fit_model()

#Plot total uplift fact and forecast
m.plot_total_uplift(quantiles=20)

#Plot uplift distribution on control and target population
m.plot_uplift_distribution()

#Plot feature importance
m.plot_feature_importances()

#Plot forecast throw variables
m.plot_variable_uplift(var = 'SCORE_BLACKBOX',var_type='numeric', raw_data=True, bins_num=20, ntile=10)


#Output scored sample in csv
df_out = pd.DataFrame(m.x).copy()
df_out['y'] = m.y
df_out['treatment'] = m.treatment
df_out['y_pred'] = m.predict(m.x)
df_out.to_csv('./out.csv')

#Plot tree (only if model_type==tree)
graph = m.plot_tree()
Image(graph.create_png())

In [None]:
#Print uplift forecast by variables
m.plot_variable_uplift(var = 'SCORE_BLACKBOX',var_type='numeric', raw_data=True, bins_num=50, ntile=20)