In [1]:
import numpy as np
import pandas as pd

In [2]:
data = pd.read_csv('data.csv')

In [3]:
data.head()

Unnamed: 0,recency,history,used_discount,used_bogo,zip_code,is_referral,channel,offer,conversion
0,10,142.44,1,0,Surburban,0,Phone,Buy One Get One,0
1,6,329.08,1,1,Rural,1,Web,No Offer,0
2,7,180.65,0,1,Surburban,1,Web,Buy One Get One,0
3,9,675.83,1,0,Rural,1,Web,Discount,0
4,2,45.34,1,0,Urban,0,Web,Buy One Get One,0


In [4]:
data.rename(columns={'conversion':'target', 'offer':'treatment'}, inplace=True)

In [5]:
data[['treatment']].value_counts()

treatment      
Buy One Get One    21387
Discount           21307
No Offer           21306
dtype: int64

In [6]:
data['treatment'] = data.treatment.apply(lambda i : 0 if i == 'No Offer' else 1)

In [7]:
data.head()

Unnamed: 0,recency,history,used_discount,used_bogo,zip_code,is_referral,channel,treatment,target
0,10,142.44,1,0,Surburban,0,Phone,1,0
1,6,329.08,1,1,Rural,1,Web,0,0
2,7,180.65,0,1,Surburban,1,Web,1,0
3,9,675.83,1,0,Rural,1,Web,1,0
4,2,45.34,1,0,Urban,0,Web,1,0


In [8]:
data[['channel']].value_counts()

channel     
Web             28217
Phone           28021
Multichannel     7762
dtype: int64

In [9]:
data = pd.get_dummies(data)
data = data.drop(columns=['zip_code_Urban','channel_Web'])

In [10]:
data.head()

Unnamed: 0,recency,history,used_discount,used_bogo,is_referral,treatment,target,zip_code_Rural,zip_code_Surburban,channel_Multichannel,channel_Phone
0,10,142.44,1,0,0,1,0,0,1,0,1
1,6,329.08,1,1,1,0,0,1,0,0,0
2,7,180.65,0,1,1,1,0,0,1,0,0
3,9,675.83,1,0,1,1,0,1,0,0,0
4,2,45.34,1,0,0,1,0,0,0,0,0


In [11]:
from sklearn.model_selection import train_test_split

In [12]:
data_train, data_test = train_test_split(data, test_size = 0.3)

In [13]:
data_train.shape, data_test.shape

((44800, 11), (19200, 11))

In [14]:
X_train = data_train.drop(columns=['treatment', 'target'])
y_train = data_train[['target']]
treatment_train = data_train[['treatment']]

In [15]:
X_test = data_test.drop(columns=['treatment', 'target'])
y_test = data_test[['target']]
treatment_test = data_test[['treatment']]

In [16]:
from sklift.metrics import uplift_at_k
from sklift.viz import plot_uplift_preds
from sklift.models import SoloModel

from catboost import CatBoostClassifier

In [17]:
models_results = {
    'approach': [],
    'uplift@10%': [],
    'uplift@20%': []
}

1.1 Одна модель с признаком коммуникации

In [18]:
N =20000
sm = SoloModel(  CatBoostClassifier(iterations=20, thread_count=2, random_state=42, silent=True))
sm = sm.fit(  X_train, y_train, treatment_train)

uplift_sm = sm.predict(X_test)

sm_score = uplift_at_k( y_true=y_test, uplift=uplift_sm, treatment=treatment_test, strategy='by_group', k=0.1)
sm_score_2 = uplift_at_k( y_true=y_test, uplift=uplift_sm, treatment=treatment_test, strategy='by_group', k=0.2)

models_results['approach'].append('SoloModel')
models_results['uplift@10%'].append(sm_score)
models_results['uplift@20%'].append(sm_score_2)

# Получим условные вероятности выполнения целевого действия при взаимодействии для каждого объекта
sm_trmnt_preds = sm.trmnt_preds_
# И условные вероятности выполнения целевого действия без взаимодействия для каждого объекта
sm_ctrl_preds = sm.ctrl_preds_

1.2 Трансформация классов

In [19]:
from sklift.models import ClassTransformation


ct = ClassTransformation(CatBoostClassifier(iterations=20, thread_count=2, random_state=42, silent=True))
ct = ct.fit(X_train, y_train, treatment_train)

uplift_ct = ct.predict(X_test)

ct_score = uplift_at_k(y_true=y_test, uplift=uplift_ct, treatment=treatment_test, strategy='by_group', k=0.1)
ct_score_2 = uplift_at_k(y_true=y_test, uplift=uplift_ct, treatment=treatment_test, strategy='by_group', k=0.2)

models_results['approach'].append('ClassTransformation')
models_results['uplift@10%'].append(ct_score)
models_results['uplift@20%'].append(ct_score_2)

  ct = ct.fit(X_train, y_train, treatment_train)


In [20]:
X_train

Unnamed: 0,recency,history,used_discount,used_bogo,is_referral,zip_code_Rural,zip_code_Surburban,channel_Multichannel,channel_Phone
22993,1,233.93,1,0,1,0,1,1,0
18572,10,159.89,0,1,1,0,1,0,1
37156,3,29.99,1,0,0,0,1,0,1
19152,7,89.05,1,0,0,0,1,0,1
9266,2,29.99,1,0,1,0,1,0,1
...,...,...,...,...,...,...,...,...,...
6622,3,231.23,1,0,1,0,0,0,0
37232,6,379.62,1,1,0,0,0,1,0
4234,2,274.61,1,1,0,0,1,1,0
13300,1,301.67,0,1,0,0,1,0,1


2.1 Две независимые модели

In [None]:
from sklift.models import TwoModels


tm = TwoModels(  
    estimator_trmnt=CatBoostClassifier(iterations=10, random_state=42, silent=True), 
    estimator_ctrl=CatBoostClassifier(iterations=10, random_state=42, silent=True), 
    method='vanilla'
)
tm = tm.fit(X_train, y_train, treatment_train)

uplift_tm = tm.predict(X_test)

tm_score = uplift_at_k(y_true=y_test, uplift=uplift_tm, treatment=treatment_test, strategy='by_group', k=0.1)
tm_score_2 = uplift_at_k(y_true=y_test, uplift=uplift_tm, treatment=treatment_test, strategy='by_group', k=0.2)

models_results['approach'].append('TwoModels')
models_results['uplift@10%'].append(tm_score)
models_results['uplift@20%'].append(tm_score_2)

Построим табичку с результатами

In [27]:
pd.DataFrame(data=models_results).sort_values('uplift@10%', ascending=False)

Unnamed: 0,approach,uplift@10%,uplift@20%
0,SoloModel,0.115229,0.096805
2,TwoModels,0.098264,0.087213
1,ClassTransformation,0.089464,0.076549


In [37]:
!pip3 install causalml





In [None]:
from IPython.display import Image, display
from causalml.inference.tree import UpliftTreeClassifier
from causalml.inference.tree import uplift_tree_string, uplift_tree_plot

uplift_model = UpliftTreeClassifier(max_depth=4, min_samples_leaf=200, min_samples_treatment=50,
                                    n_reg=100, evaluationFunction='KL', control_name='control')

uplift_model.fit(X_train.values,
                 treatment=treatment_train.map({1: 'treatment1', 0: 'control'}).values,
                 y=y_train)

graph = uplift_tree_plot(uplift_model.fitted_uplift_tree, features)

In [None]:
import numpy as np
tree_lift =  np.array(uplift_model.predict(X_test.values))[0]

uplift_tree_30 = uplift_at_k(y_val, tree_lift, treat_val, strategy='overall')
print(f'uplift@30%: {uplift_tree_30:.4f}')
models_results['approach'].append('TreeModels')
models_results['uplift@30%'].append(uplift_tree_30)