# Complete experiment to try to estimate the probability of an offer to "succeed" with one customer. The "informative" offers are considered successful if the customer views them. Other kinds of offers are successful only if the customer views them, and then completes them.

Table of contents <a id='top'>

1. [Get the data and show it](#data)
2. [Create the Model](#model)
3. [Evaluate the Model](#eval)
4. [Analysis and Conclusions](#conclusions)

In [5]:
import pandas as pd
import numpy as np
import math
import json
import os
import matplotlib.pyplot as plt
%matplotlib inline

%load_ext autoreload
%autoreload 2

from jupyterthemes import jtplot
jtplot.style(theme='solarizedd')
plt.rcParams['figure.figsize'] = (20.0, 10.0)

import data_utils_mt.utils as utils
import seaborn as sns
from sklearn.preprocessing import LabelEncoder
import datetime as dt
from xgboost import XGBClassifier

ROOT = '../..'
DATA_DIR = os.path.join(ROOT_DIR, 'data')
DATA_RAW = os.path.join(DATA_DIR, 'raw')
DATA_INTERIM = os.path.join(DATA_DIR, 'interim')
DATA_EXTERNAL = os.path.join(DATA_DIR, 'external')
DATA_PROCESSED = os.path.join(DATA_DIR, 'processed')
SRC = os.path.join(ROOT, 'src')

import sys
sys.path.append(SRC)

import src.data.preprocessing as pp
import src.data.success_dataset as sd

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## 1. Get the data and show it <a id='data'/>
[Top](#top)

In [7]:
# Get the data
X_train_val, X_test, y_train_val, y_test, encoder = sd.get_success_data(
    drop_time=False)
X_test = sd.drop_time_dependent(X_test)

# Time-dependent validation datasets
X_train, X_val, y_train, y_val = sd.time_split(X_train_val, 
                                               y_train_val,
                                               time_limit=370)

In [11]:
print(X_train.shape)
print(y_train.shape)
X_train.head()

(38030, 13)
(38030,)


Unnamed: 0,age,gender,income,missing_demographics,member_epoch_days,difficulty,duration,offer_type,reward_t,channel_mobile,channel_web,channel_social,channel_email
0,33.0,M,72000.0,0,17277,0.0,3.0,informational,0.0,1.0,0.0,1.0,1.0
1,33.0,M,72000.0,0,17277,0.0,4.0,informational,0.0,1.0,1.0,0.0,1.0
5,,,,1,17646,5.0,5.0,bogo,5.0,1.0,1.0,1.0,1.0
7,40.0,O,57000.0,0,17540,0.0,4.0,informational,0.0,1.0,1.0,0.0,1.0
8,40.0,O,57000.0,0,17540,7.0,7.0,discount,3.0,1.0,1.0,1.0,1.0


In [12]:
print(X_val.shape)
print(y_val.shape)
X_val.head()

(12778, 13)
(12778,)


Unnamed: 0,age,gender,income,missing_demographics,member_epoch_days,difficulty,duration,offer_type,reward_t,channel_mobile,channel_web,channel_social,channel_email
2,33.0,M,72000.0,0,17277,5.0,5.0,bogo,5.0,1.0,1.0,1.0,1.0
10,40.0,O,57000.0,0,17540,20.0,10.0,discount,5.0,0.0,1.0,0.0,1.0
15,59.0,F,90000.0,0,16864,10.0,5.0,bogo,10.0,1.0,1.0,1.0,1.0
19,24.0,F,60000.0,0,17116,0.0,3.0,informational,0.0,1.0,0.0,1.0,1.0
24,26.0,F,73000.0,0,17338,10.0,10.0,discount,2.0,1.0,1.0,1.0,1.0


In [13]:
print(X_test.shape)
print(y_test.shape)
X_test.head()

(25469, 13)
(25469,)


Unnamed: 0,age,gender,income,missing_demographics,member_epoch_days,difficulty,duration,offer_type,reward_t,channel_mobile,channel_web,channel_social,channel_email
3,33.0,M,72000.0,0,17277,10.0,10.0,discount,2.0,1.0,1.0,1.0,1.0
4,33.0,M,72000.0,0,17277,10.0,7.0,discount,2.0,1.0,1.0,0.0,1.0
6,,,,1,17646,5.0,5.0,bogo,5.0,1.0,1.0,1.0,1.0
11,40.0,O,57000.0,0,17540,5.0,7.0,bogo,5.0,1.0,1.0,0.0,1.0
16,59.0,F,90000.0,0,16864,0.0,3.0,informational,0.0,1.0,0.0,1.0,1.0


## 2. Create the model <a id='model'/>
[Top](#top)

## 3. Evaluate the model <a id='eval'/>
[Top](#top)

## 4. Analysis and Conclusions <a id='conclusions'/>
[Top](#top)