## Достанем сгенерированную базу и обучим самые простые бустинги

In [None]:
import numpy as np
import datetime
import random
import pandas as pd
from scipy.stats import lognorm
import matplotlib.pyplot as plt
from itertools import product
import pickle

from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from lightgbm import LGBMClassifier

from IPython.display import clear_output, display, HTML
import ipywidgets as widgets

In [None]:
client_base_df = pd.read_csv("client_base.csv", sep=";", decimal=",", encoding="cp1251")
contact_history = pd.read_csv("contact_history.csv", sep=";", decimal=",", encoding="cp1251")

main_train_sample = pd.merge(contact_history, client_base_df, 
                            how='inner', left_on = 'client_id',
                            right_on = 'client_id')

main_train_sample = main_train_sample.astype({"camp_start_dt":'datetime64[ns]', "first_buy_dt":'datetime64[ns]'})

In [None]:
# обучение моделей
sample_ids = ['promoted_product', 'channel']
target = ['response_flg']
train_fields = ['camp_start_dt','is_female', 'region', 'buy_frequency', 'is_new_client',
                   'first_buy_dt', 'came_from', 'first_product', 'spent_total',
                   'buyed_items_total_cnt', 'spent_on_teddy_bear',
                   'spent_on_сhristmas_decorations', 'spent_on_alco', 'spent_on_bently',
                   'spent_on_diapers', 'buyed_of_teddy_bear',
                   'buyed_of_сhristmas_decorations', 'buyed_of_alco', 'buyed_of_bently',
                   'buyed_of_diapers']

class Model():
    """обёртка для модели включающая предобработчик входных данных"""
    def __init__(self, model):
        self.model = model
    
    def fit(self, X, y):
        transformed = self.transform(X)
        self.model.fit(transformed.values, y)
        
    def predict(self, X):
        transformed = self.transform(X)
        return self.model.predict(transformed)
    
    def predict_proba(self, X):
        transformed = self.transform(X)
        return self.model.predict_proba(transformed)
    
    def transform(self, df, y=None):
        X = df.copy()
        self.multiple_choice_options = {key:X[key].unique().tolist() for key in ["region", "came_from", "first_product"]}
        
        X.loc[:, 'client_lifetime'] = (X['camp_start_dt'] - X['first_buy_dt']).dt.days
        X = pd.concat((X, pd.get_dummies(X.region, prefix='region').drop(columns="region" + "_" + "Мордор")), axis=1)
        X = pd.concat((X, pd.get_dummies(X.came_from, prefix='came_from',
                                        ).drop(columns="came_from" + "_" + "from street")), axis=1)
        X = pd.concat((X, pd.get_dummies(X.first_product, prefix='first_product')\
                       .drop(columns='first_product' + "_" + 'Алкоголь')), axis=1)


        X = X.drop(columns=['camp_id', 'promoted_product', 'channel', 'client_id' , 'region', 'first_buy_dt', 
                            'camp_start_dt', 'came_from', 'first_product', 'response_flg'])
        return X

product_x_channel_unique = main_train_sample[sample_ids].drop_duplicates().reset_index(drop=True).values

model_collection = {}

for prod, channel in product_x_channel_unique:
    X = main_train_sample.loc[(main_train_sample.promoted_product == prod) & 
                          (main_train_sample.channel == channel)].copy()
    y = main_train_sample.loc[(main_train_sample.promoted_product == prod) & 
                          (main_train_sample.channel == channel)].copy().response_flg.astype(int)
    
    model = Model(LGBMClassifier(boosting_type='gbdt', max_depth=3, learning_rage=0.1, n_estimators=200))
    model.fit(X, y)
    model_collection[(prod, channel)] = model
    print("Model for product = {:<15}, channel = {:<17} is fitted".format(prod, channel), 
          datetime.datetime.now().strftime("%H-%M-%S.%f"))

In [None]:
output = widgets.Output()
result_output = widgets.Output()

# check = widgets.Button(description="main button")
choose_product_widget = widgets.Dropdown(options=list(set([i[0] for i in list(model_collection.keys())])),
                 value=list(set([i[0] for i in list(model_collection.keys())]))[0],
                 description="Какой продукт продвигать:",
                 layout=widgets.Layout(width="50%"), 
                 style={"description_width":"50%"})

choose_channel_widget = widgets.Dropdown(options=list(set([i[1] for i in list(model_collection.keys())])),
                 value=list(set([i[1] for i in list(model_collection.keys())]))[0],
                 description="Через какой канал:",
                 layout=widgets.Layout(width="50%"), 
                 style={"description_width":"50%"})

@choose_product_widget.observe
def some_func(w):
    if w['type'] == 'change' and w['name'] == 'value':
        # model = np.random.choice(list(model_collection.values()))
        by_product = w['new']
        by_channel = choose_channel_widget.value
        model = model_collection[(by_product, by_channel)]
        my_widgets = get_widgets(model, result_output)
        
        with output:
            clear_output(wait=True)
            display(widgets.HBox([widgets.VBox(list(my_widgets.values())), result_output]))
        tmp = my_widgets['spent_total'].value
        my_widgets['spent_total'].value = 23000
        my_widgets['spent_total'].value = tmp
        

@choose_channel_widget.observe
def some_func(w):
    if w['type'] == 'change' and w['name'] == 'value':
        by_product = choose_product_widget.value
        by_channel = w['new']
        model = model_collection[(by_product, by_channel)]
        my_widgets = get_widgets(model, result_output)
        
        with output:
            clear_output(wait=True)
            display(widgets.HBox([widgets.VBox(list(my_widgets.values())), result_output]))
        tmp = my_widgets['spent_total'].value
        my_widgets['spent_total'].value = 23000
        my_widgets['spent_total'].value = tmp
        
    
display(widgets.VBox([choose_product_widget, choose_channel_widget, output]))
choose_channel_widget.value = 'Санта-Клаус'

In [None]:
with open("model_collection.pkl", "wb") as f:
    pickle.dump(model_collection, f)

In [None]:
X.columns

## Scoring

In [1]:
import numpy as np
import datetime
import random
import pandas as pd
from scipy.stats import lognorm
import matplotlib.pyplot as plt
from itertools import product
import pickle

from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from lightgbm import LGBMClassifier

from IPython.display import clear_output, display, HTML
import ipywidgets as widgets

class Model():
    """обёртка для модели включающая предобработчик входных данных"""
    def __init__(self, model):
        self.model = model
    
    def fit(self, X, y):
        transformed = self.transform(X)
        self.model.fit(transformed.values, y)
        
    def predict(self, X):
        transformed = self.transform(X)
        return self.model.predict(transformed)
    
    def predict_proba(self, X):
        transformed = self.transform(X)
        return self.model.predict_proba(transformed)
    
    def transform(self, df, y=None):
        X = df.copy()
        self.multiple_choice_options = {key:X[key].unique().tolist() for key in ["region", "came_from", "first_product"]}
        
        X.loc[:, 'client_lifetime'] = (X['camp_start_dt'] - X['first_buy_dt']).dt.days
        X = pd.concat((X, pd.get_dummies(X.region, prefix='region').drop(columns="region" + "_" + "Мордор")), axis=1)
        X = pd.concat((X, pd.get_dummies(X.came_from, prefix='came_from',
                                        ).drop(columns="came_from" + "_" + "from street")), axis=1)
        X = pd.concat((X, pd.get_dummies(X.first_product, prefix='first_product')\
                       .drop(columns='first_product' + "_" + 'Алкоголь')), axis=1)


        X = X.drop(columns=['camp_id', 'promoted_product', 'channel', 'client_id' , 'region', 'first_buy_dt', 
                            'camp_start_dt', 'came_from', 'first_product', 'response_flg'])
        return X

In [2]:
def add_scores_to_client_base(client_base, model_collection):
    cols_for_scoring_input = ['camp_id', 'camp_start_dt', 'promoted_product', 'channel', 'client_id',
       'response_flg', 'is_female', 'region', 'buy_frequency', 'is_new_client',
       'first_buy_dt', 'came_from', 'first_product', 'spent_total',
       'buyed_items_total_cnt', 'spent_on_teddy_bear',
       'spent_on_сhristmas_decorations', 'spent_on_alco', 'spent_on_bently',
       'spent_on_diapers', 'buyed_of_teddy_bear',
       'buyed_of_сhristmas_decorations', 'buyed_of_alco', 'buyed_of_bently',
       'buyed_of_diapers']
    
    for col in cols_for_scoring_input:
        if col not in client_base.columns:
            client_base.loc[:, col] = np.nan
            
    product_x_channel_unique = client_base[['promoted_product', 'channel']].drop_duplicates().values

    client_base.loc[:, 'camp_start_dt'] = pd.to_datetime("2021-01-15")

    client_base = client_base[cols_for_scoring_input]
    client_base = client_base.astype({"camp_start_dt":'datetime64[ns]', "first_buy_dt":'datetime64[ns]'})
    
    for prod, channel in model_collection:
        X = client_base[cols_for_scoring_input]
        score_col_name = prod+"_" + channel +"_score"
        client_base.loc[:, score_col_name] = model_collection[(prod, channel)].predict_proba(X)[:, 1]
    return client_base



client_base_to_score = pd.read_csv("scored_client_base.csv", 
                                   sep=";", decimal=",", encoding="cp1251")

with open("model_collection.pkl", "rb") as f:
    model_collection = pickle.load(f)
        
client_base_to_show = add_scores_to_client_base(client_base_to_score, model_collection)