# 예측 모형 실습: 쿠폰 지급을 위한 예측 모형

배송료 무료 쿠폰을 줬을 때, 구매 가능성이 가장 높을 유저를 예측

In [1]:
import numpy as np
import pandas as pd
from sklearn.compose import ColumnTransformer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from xgboost import XGBRegressor

In [2]:
df = pd.read_csv('/content/gooppang.csv')

In [3]:
train_df, test_df = train_test_split(df, train_size=0.8, random_state=0)

In [5]:
expected_basket = (test_df.basket * test_df.checkout).mean()
expected_basket

2.144275

# Checkout estimation model

In [6]:
# Checkout estimation with basket
checkout_ct = ColumnTransformer([
  ('bin', 'passthrough', ['coupon', 'subscriber']),
  ('cat', OneHotEncoder(drop='first'), ['region', 'gender']),
  ('num', StandardScaler(), ['age', 'monthly_spend', 'tenure']),
])
checkout_pipe = Pipeline([
  ('trans', checkout_ct),
  ('clf', LogisticRegression()),
])
checkout_grid_params = {
  'clf__C': np.logspace(-2, 2, 10),
}
checkout_gs = GridSearchCV(checkout_pipe, checkout_grid_params, scoring='roc_auc', n_jobs=-1)
checkout_gs = checkout_gs.fit(train_df, y=train_df.checkout)
checkout_estimator = checkout_gs.best_estimator_

In [7]:
# P(checkout | coupon = 1)
test_with_coupon_df = test_df.assign(coupon=1)
p_checkout_with_coupon = checkout_estimator.predict_proba(test_with_coupon_df)[:, 1]

# 1. Treat historical basket as given

In [8]:
# 1. Take observed basket as given
expected_basket_with_coupon = (test_df.basket * p_checkout_with_coupon).mean()
expected_basket_with_coupon

3.976793294951042

# 2. Estimate basket (via XGBoost)

In [9]:
# 2. Predict basket (via XGBoost)
basket_ct = ColumnTransformer([
  ('bin', 'passthrough', ['subscriber']),
  ('cat', OneHotEncoder(), ['region', 'gender']),
  ('num', StandardScaler(), ['age', 'monthly_spend', 'tenure']),
])
basket_pipe = Pipeline([
  ('trans', basket_ct),
  ('reg', XGBRegressor()),
])
# Skip CV for time. In practice, use CV!
basket_pipe = basket_pipe.fit(train_df, y=train_df.basket)



In [10]:
# Use predicted basekt to estimate expected basket with coupon
predicted_basket = basket_pipe.predict(test_df)
expected_basket_with_coupon = (predicted_basket * p_checkout_with_coupon).mean()
expected_basket_with_coupon

3.965950306840656