In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklift.models import SoloModel, ClassTransformation, TwoModels
from sklift.metrics import uplift_at_k
from catboost import CatBoostClassifier

In [2]:
df = pd.read_csv('data.csv')
df.head(3)

Unnamed: 0,recency,history,used_discount,used_bogo,zip_code,is_referral,channel,offer,conversion
0,10,142.44,1,0,Surburban,0,Phone,Buy One Get One,0
1,6,329.08,1,1,Rural,1,Web,No Offer,0
2,7,180.65,0,1,Surburban,1,Web,Buy One Get One,0


In [3]:
df.loc[df['offer'] == 'No Offer', 'offer'] = 0
df.loc[(df['offer'] == 'Buy One Get One') | 
       (df['offer'] == 'Discount'), 'offer'] = 1
df['offer'] = df['offer'].astype(int)
df.head(3)

Unnamed: 0,recency,history,used_discount,used_bogo,zip_code,is_referral,channel,offer,conversion
0,10,142.44,1,0,Surburban,0,Phone,1,0
1,6,329.08,1,1,Rural,1,Web,0,0
2,7,180.65,0,1,Surburban,1,Web,1,0


In [4]:
df['zip_code'].unique(), df['channel'].unique()

(array(['Surburban', 'Rural', 'Urban'], dtype=object),
 array(['Phone', 'Web', 'Multichannel'], dtype=object))

In [5]:
df = pd.concat([df, pd.get_dummies(df[['zip_code', 'channel']])], axis=1)
df = df.drop(['zip_code', 'channel'], 1)
df.head(3)

Unnamed: 0,recency,history,used_discount,used_bogo,is_referral,offer,conversion,zip_code_Rural,zip_code_Surburban,zip_code_Urban,channel_Multichannel,channel_Phone,channel_Web
0,10,142.44,1,0,0,1,0,0,1,0,0,1,0
1,6,329.08,1,1,1,0,0,1,0,0,0,0,1
2,7,180.65,0,1,1,1,0,0,1,0,0,0,1


In [6]:
df_clients = df.drop(['offer', 'conversion'], 1)

indices_train, indices_test = \
train_test_split(df_clients.index, test_size=0.5, random_state=123)

df_train = df.loc[indices_train, ['offer', 'conversion']]
df_test = pd.DataFrame(indices_test)

In [7]:
def func_uplift(test_size):
    # Извлечение признаков
    df_features = df_clients.copy()

    # разбираем примеры на 2 группы : train /val - пока только по индексам клиентов
    indices_train = df_train.index  # индексы, для которых знаем результат  
    indices_test = df_test.index # индексы, для которых не знаем результат (можем просто дать модели поработать потом)
    indices_learn, indices_valid = train_test_split(df_train.index, test_size=test_size, random_state=123) # разбили известный набор на 2 части

    # данные для обучения
    X_train = df_features.loc[indices_learn, :] # признаки о клиенте (в них остались только описания клиентов )
    y_train = df_train.loc[indices_learn, 'conversion'] #целевой признак (те кто сделал правильное действие)
    treat_train = df_train.loc[indices_learn, 'offer']  #те кому написали смс с предложением
    #данные для валидации
    X_val = df_features.loc[indices_valid, :]
    y_val = df_train.loc[indices_valid, 'conversion']
    treat_val =  df_train.loc[indices_valid, 'offer']
    #весь набор целиком
    X_train_full = df_features.loc[indices_train, :]
    y_train_full = df_train.loc[:, 'conversion']
    treat_train_full = df_train.loc[:, 'offer']
    #данные из теста (слепые)
    X_test = df_features.loc[indices_test, :]
    # наш категорийный признак
    cat_features = ['used_discount']
    # словарь для записывания результатов
    models_results = {
        'approach': [],
        f'uplift@{test_size*100}%': []
    }

    sm = SoloModel(   CatBoostClassifier(iterations=20, thread_count=2, random_state=42, silent=True))
    sm = sm.fit(   X_train, y_train, treat_train, estimator_fit_params={'cat_features': cat_features})

    uplift_sm = sm.predict(X_val)

    # размер uplift на топ k процентах выборки
    sm_score = uplift_at_k( y_true=y_val, uplift=uplift_sm, treatment=treat_val, strategy='by_group', k=test_size)

    models_results['approach'].append('SoloModel')
    models_results[f'uplift@{test_size*100}%'].append(sm_score)

    ct = ClassTransformation(CatBoostClassifier(iterations=20, thread_count=2, random_state=42, silent=True))
    ct = ct.fit(X_train, y_train, treat_train, estimator_fit_params={'cat_features': cat_features})

    uplift_ct = ct.predict(X_val)

    ct_score = uplift_at_k(y_true=y_val, uplift=uplift_ct, treatment=treat_val, strategy='by_group', k=test_size)

    models_results['approach'].append('ClassTransformation')
    models_results[f'uplift@{test_size*100}%'].append(ct_score)
    
    tm = TwoModels(  
        estimator_trmnt=CatBoostClassifier(iterations=10, thread_count=2, random_state=42, silent=True), 
        estimator_ctrl=CatBoostClassifier(iterations=10, thread_count=2, random_state=42, silent=True), 
        method='vanilla'
    )   

    tm = tm.fit(
        X_train, y_train, treat_train,
        estimator_trmnt_fit_params={'cat_features': cat_features}, 
        estimator_ctrl_fit_params={'cat_features': cat_features}
    )

    uplift_tm = tm.predict(X_val)

    tm_score = uplift_at_k(y_true=y_val, uplift=uplift_tm, treatment=treat_val, strategy='by_group', k=test_size)

    models_results['approach'].append('TwoModels')
    models_results[f'uplift@{test_size*100}%'].append(tm_score)
    
    return models_results

In [8]:
df1 = pd.DataFrame(data=func_uplift(0.1)).set_index('approach')
df2 = pd.DataFrame(data=func_uplift(0.2)).set_index('approach')
df3 = pd.merge(df1, df2, on='approach')
df3

Unnamed: 0_level_0,uplift@10.0%,uplift@20.0%
approach,Unnamed: 1_level_1,Unnamed: 2_level_1
SoloModel,0.116775,0.075614
ClassTransformation,0.114527,0.042448
TwoModels,0.137675,0.081002
