# Import libraries

In [181]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split

from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC, LinearSVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import Perceptron
from sklearn.linear_model import SGDClassifier
from sklearn.tree import DecisionTreeClassifier

import os

In [182]:
pd.options.display.float_format = '{:.2f}'.format

In [183]:
loc = os.path.normpath(os.getcwd() + os.sep + os.pardir)
clean_data_loc = loc + r'\clean_data'

# Import dataset

In [184]:
profile_df = pd.read_csv(clean_data_loc + r"\\clean_profile.csv")

bins = [18, 30, 50, 65, np.inf]
labels = ["18-29", "30-49", "50-64", ">65"]

profile_df['age_group'] = pd.cut(profile_df['age'], bins=bins, right=False, labels=labels)
profile_df

Unnamed: 0,gender,age,id,became_member_on,income,age_group
0,F,55,0610b486422d4921ae7d2bf64640c50b,2017-07-15,112000.00,50-64
1,F,75,78afa995795e4d85b5d9ceeca43f5fef,2017-05-09,100000.00,>65
2,M,68,e2127556f4f64592b11af22de27a7932,2018-04-26,70000.00,>65
3,M,65,389bc3fa690240e798340f5a15918d5c,2018-02-09,53000.00,>65
4,M,58,2eeac8d8feae4a8cad5a6af0499a211d,2017-11-11,51000.00,50-64
...,...,...,...,...,...,...
16995,F,43,5c686d09ca4d475a8f750f2ba07e0440,2016-09-01,69000.00,30-49
16996,F,64,d9ca82f550ac4ee58b6299cf1e5c824a,2016-04-15,60000.00,50-64
16997,M,68,ca45ee1883624304bac1e4c8a114f045,2018-03-05,59000.00,>65
16998,F,58,a9a20fa8b5504360beb4e7c8712f8306,2016-01-16,60000.00,50-64


In [185]:
# Create categorical codes
profile_df['age_group_code'] = profile_df['age_group'].cat.codes
profile_df['gender_code'] = pd.Categorical(profile_df['gender'], categories=['O', 'F', 'M']).codes
profile_df['income_code'] = pd.cut(profile_df['income'], bins=4).cat.codes
profile_df['loyalty_code'] = pd.cut(pd.to_datetime(profile_df['became_member_on']), bins=4).cat.codes

customer_df = profile_df[['id', 'age_group_code', 'gender_code', 'income_code', 'loyalty_code']]
customer_df.head()

Unnamed: 0,id,age_group_code,gender_code,income_code,loyalty_code
0,0610b486422d4921ae7d2bf64640c50b,2,1,3,3
1,78afa995795e4d85b5d9ceeca43f5fef,3,1,3,3
2,e2127556f4f64592b11af22de27a7932,3,2,1,3
3,389bc3fa690240e798340f5a15918d5c,3,2,1,3
4,2eeac8d8feae4a8cad5a6af0499a211d,2,2,0,3


In [186]:
transcript_df = pd.read_csv(clean_data_loc + r"\\clean_transcript.csv")

portfolio_df = pd.read_csv(clean_data_loc + r"\\clean_portfolio.csv")
portfolio_df['offer_name'] = ['offer {}'.format(i) for i in range(1,11)]

transcript_df = transcript_df.merge(portfolio_df, how='left', left_on='offer id', right_on='id')
transcript_df.drop(['id', 'offer id'], axis=1, inplace=True)
transcript_df.head()

Unnamed: 0,person,time,transaction,offer received,offer viewed,offer completed,amount,reward,difficulty,duration,offer_type,email,mobile,social,web,offer_name
0,78afa995795e4d85b5d9ceeca43f5fef,0,0,1,0,0,,5.0,5.0,7.0,bogo,1.0,1.0,0.0,1.0,offer 4
1,a03223e636434f42ac4c3df47e8bac43,0,0,1,0,0,,5.0,20.0,10.0,discount,1.0,0.0,0.0,1.0,offer 5
2,e2127556f4f64592b11af22de27a7932,0,0,1,0,0,,2.0,10.0,7.0,discount,1.0,1.0,0.0,1.0,offer 10
3,8ec6ce2a7e7949b1bf142def7d0e0586,0,0,1,0,0,,2.0,10.0,10.0,discount,1.0,1.0,1.0,1.0,offer 7
4,68617ca6246f4fbc85e91a2a49552598,0,0,1,0,0,,10.0,10.0,5.0,bogo,1.0,1.0,1.0,1.0,offer 2


In [187]:
dummies = pd.get_dummies(transcript_df['offer_name'])
dummies.head()

Unnamed: 0,offer 1,offer 10,offer 2,offer 3,offer 4,offer 5,offer 6,offer 7,offer 8,offer 9
0,0,0,0,0,1,0,0,0,0,0
1,0,0,0,0,0,1,0,0,0,0
2,0,1,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,1,0,0
4,0,0,1,0,0,0,0,0,0,0


In [188]:
event_offer_df = pd.concat([transcript_df[['person', 'transaction', 'offer received', 'offer viewed', 'offer completed']], dummies], axis=1)
event_offer_df.head()

Unnamed: 0,person,transaction,offer received,offer viewed,offer completed,offer 1,offer 10,offer 2,offer 3,offer 4,offer 5,offer 6,offer 7,offer 8,offer 9
0,78afa995795e4d85b5d9ceeca43f5fef,0,1,0,0,0,0,0,0,1,0,0,0,0,0
1,a03223e636434f42ac4c3df47e8bac43,0,1,0,0,0,0,0,0,0,1,0,0,0,0
2,e2127556f4f64592b11af22de27a7932,0,1,0,0,0,1,0,0,0,0,0,0,0,0
3,8ec6ce2a7e7949b1bf142def7d0e0586,0,1,0,0,0,0,0,0,0,0,0,1,0,0
4,68617ca6246f4fbc85e91a2a49552598,0,1,0,0,0,0,1,0,0,0,0,0,0,0


# Create classification model

In [189]:
def offer_rec(offer, model_type):
    """
    Input:
        offer: type of offer
        model_type: type of model
    Output: 
        model: model after training
        accuracy: accuracy rate
    """   
    
    # Filter our offer
    offer_df = event_offer_df[event_offer_df[offer] == 1]
    
    # Aggregate
    offer_df = offer_df.groupby('person')[['offer received', 'offer completed']].sum()
    offer_df.reset_index(inplace=True)
    
    # Join with customer_df
    offer_df = offer_df.merge(customer_df, how='left', left_on='person', right_on='id')
    offer_df.drop('id', axis=1, inplace=True)
    
    # is_used -> customer at least use one offer
    offer_df['is_used'] = offer_df['offer completed'].apply(lambda x: 1 if x>1 else x)
    
    # Filtered out the needed feature
    offer_df = offer_df[['is_used', 'offer received' ,'gender_code', 'income_code' , 'loyalty_code']]

    # X and y
    X = offer_df.drop('is_used', axis=1)
    y = offer_df['is_used']
    
    #
    X_train, X_test, y_train, y_test = train_test_split(X, y)
    
    # Choose a model:
    model_dct = {
        'LogisticRegression': LogisticRegression(),
        'SVC':SVC(),
        'GaussianNB': GaussianNB(),
        'Perceptron': Perceptron(),
        'LinearSVC' : LinearSVC(),
        'SGDClassifier': SGDClassifier(),
        'RandomForestClassifier': RandomForestClassifier(),
        'DecisionTreeClassifier' : DecisionTreeClassifier()
    }
    model = model_dct[model_type]
    
    # Fit
    model.fit(X_train, y_train)
    
    # Predict
    y_pred = model.predict(X_test)
    
    # Evaluation 
    accuracy = (y_pred == y_test).mean()
    
    return model, accuracy

In [190]:
lst = ['offer 1', 'offer 2', 'offer 4', 'offer 5', 'offer 6', 'offer 7', 'offer 9', 'offer 10']

models = [
        'LogisticRegression',
        'SVC',
        'GaussianNB',
        'Perceptron',
        'LinearSVC',
        'SGDClassifier',
        'RandomForestClassifier',
        'DecisionTreeClassifier'
]

result_df = pd.DataFrame(index= models)

for offer in lst:
    acc_list = [offer_rec(offer, model)[1] for model in models]
    result_df[offer] = acc_list
    
result_df

Unnamed: 0,offer 1,offer 2,offer 4,offer 5,offer 6,offer 7,offer 9,offer 10
LogisticRegression,0.66,0.65,0.66,0.65,0.71,0.71,0.63,0.63
SVC,0.67,0.68,0.66,0.7,0.7,0.73,0.66,0.67
GaussianNB,0.67,0.69,0.62,0.65,0.71,0.73,0.62,0.64
Perceptron,0.63,0.5,0.57,0.58,0.71,0.71,0.43,0.59
LinearSVC,0.67,0.67,0.66,0.65,0.71,0.73,0.66,0.66
SGDClassifier,0.66,0.63,0.6,0.63,0.72,0.67,0.61,0.63
RandomForestClassifier,0.68,0.69,0.64,0.66,0.71,0.73,0.66,0.67
DecisionTreeClassifier,0.68,0.68,0.66,0.68,0.71,0.71,0.66,0.65


In [191]:
def rec(customer_id, offer, model_type):
    """
    Input:
        customer_id: id of the customer
        offer: type of offer
        model_type: type of model
    Output: 
        model: model after training
        accuracy: accuracy rate
    """ 
    i = profile_df[profile_df['id'] == customer_id][['age_group_code', 'gender_code', 'income_code', 'loyalty_code']]
    model = offer_rec(offer, model_type)[0]
    
    pred = model.predict(i)
    
    return pred