## Для создания приложения и предсказания целевой переменой модель, созданная в модуле В не подходит, поэтому стоит обучить новую модель для этих целей

# Установка библиотек

In [6]:
!pip install catboost -q

# Импорт библиотек

In [48]:
import pandas as pd

from catboost import CatBoostRegressor

from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder

import pickle

# Импорт данных

In [24]:
df = pd.read_csv('profiles.csv')
df.head()

Unnamed: 0,User Id,Session Start,Session End,Device,Region,Channel,date,mean,Revenue
0,382394863972,2022-05-01 00:00:41,2022-05-01 01:13:45,Mac,Russia,MediaTornado,2022-05-01,30.482,0.0
1,428935481941,2022-05-01 00:01:35,2022-05-01 02:30:30,PC,Turkey,AdNonSense,2022-05-01,30.482,0.0
2,86266511938,2022-05-01 00:02:38,2022-05-01 01:22:33,Android,China,organic,2022-05-01,30.482,0.0
3,763000657462,2022-05-01 00:04:21,2022-05-01 00:24:15,PC,Turkey,organic,2022-05-01,30.482,0.0
4,791058813280,2022-05-01 00:05:17,2022-05-01 01:14:51,Android,Thailand,LeapBob,2022-05-01,30.482,0.0


In [51]:
df.Channel.unique()

array(['MediaTornado', 'AdNonSense', 'organic', 'LeapBob', 'FaceBoom',
       'TipTop', 'OppleCreativeMedia', 'RocketSuperAds', 'WahooNetBanner',
       'lambdaMediaAds', 'YRabbit'], dtype=object)

# Предобработка данных

In [55]:
def preprocess_data(df):
    df['date'] = pd.to_datetime(df['date'])
    cat_features = ['Channel', 'Device', 'Region']
    # Обработка категориальных признаков
    encoder = OneHotEncoder(drop='first')
    encoder.fit(df[cat_features])
    X_preprocess = encoder.transform(df[cat_features])
    X = pd.DataFrame(X_preprocess.toarray(), columns=encoder.get_feature_names_out())
    # добавляем дату
    X['day'] = df['date'].dt.day
    X['month'] = df['date'].dt.month
    X['day'] = df['date'].dt.year
    with open('encoder.pkl', 'wb') as file:
    # загружаем кодировщик в файл
        pickle.dump(encoder, file)
    return X

In [56]:
X = preprocess_data(df)
y = df['Revenue']
X.head()

Unnamed: 0,Channel_FaceBoom,Channel_LeapBob,Channel_MediaTornado,Channel_OppleCreativeMedia,Channel_RocketSuperAds,Channel_TipTop,Channel_WahooNetBanner,Channel_YRabbit,Channel_lambdaMediaAds,Channel_organic,Device_Mac,Device_PC,Device_iPhone,Region_Russia,Region_Thailand,Region_Turkey,day,month
0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,2022,5
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,2022,5
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,2022,5
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,2022,5
4,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,2022,5


# Обучение модели

In [33]:
model = CatBoostRegressor(thread_count=-1,
                         verbose=0,
                         random_state=1)

model.fit(X, y)

<catboost.core.CatBoostRegressor at 0x270583a2390>

# Предсказание по контрольной выборке

In [38]:
test = pd.read_csv('control_for_competitors.csv').rename(columns=
                                                         {'channel': 'Channel', 'device': 'Device', 'region': 'Region', 'dt': 'date'})
X_test = preprocess_data(test)
pred = pd.Series(model.predict(X))
pred.to_csv('predictions.csv')
pred.head()

0    1.785259
1    0.999401
2    1.381444
3    1.011229
4    0.402079
dtype: float64

# Сохранение модели в файл

In [39]:
model.save_model('model.cbm')

#

# Отчет

В рамках работы над модулем Г были сделаны следующие действия:
- Обучена новая модель и сделано предсказание по контрольной выборке
- Создан API для предсказания по параметрам
- Создан интерфейс для работы с API