# Machine Learning Engineer Capstone Project
## Starbucks Challenge
### by Mark-Danney Oonk

---

This notebook describes:
- The training and refinement of the model
- Justification of the model against the benchmark
- Discussion of outcomes and possible improvements

Running the code in this notebook results in:
- model/???.??? with the trained neural network that is able to make offer recommendations based on profile data

In [15]:
# imports

import joblib
import keras_tuner as kt
import logging
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Input, Dropout
from sklearn.model_selection import KFold
from sklearn.preprocessing import MinMaxScaler

# constants
SEED = 42
ACCOUNT_PROPERTIES = ['age', 'income', 'gender_F', 'gender_M', 'gender_O', 'gender_U', 'account_age']
OFFER_CHANNELS = ['channels_email', 'channels_mobile', 'channels_social', 'channels_web']
OFFER_TYPES = ['offer_type_bogo', 'offer_type_discount', 'offer_type_informational']
OFFER_PARAMETERS = ['reward', 'difficulty', 'duration']

In [16]:
# load scaler and data

scaler: MinMaxScaler = joblib.load('./model/scaler.gz')
SCALED_COLS = joblib.load('./model/SCALED_COLS')

successes: pd.DataFrame = pd.read_csv('./data/successes.csv')

display(successes)

Unnamed: 0,reward,difficulty,duration,channels_email,channels_mobile,channels_social,channels_web,offer_type_bogo,offer_type_discount,offer_type_informational,successful,amount,age,income,gender_F,gender_M,gender_O,gender_U,account_age
0,0.2,0.50,0.571429,1,1,0,1,0.0,1.0,0.0,0,0.000000,0.180723,0.466667,0.0,1.0,0.0,0.0,0.252880
1,0.0,0.00,0.142857,1,1,0,1,0.0,0.0,1.0,1,0.074148,0.180723,0.466667,0.0,1.0,0.0,0.0,0.252880
2,0.0,0.00,0.000000,1,1,1,0,0.0,0.0,1.0,1,0.191729,0.180723,0.466667,0.0,1.0,0.0,0.0,0.252880
3,0.5,0.25,0.285714,1,1,1,1,1.0,0.0,0.0,0,0.000000,0.180723,0.466667,0.0,1.0,0.0,0.0,0.252880
4,0.2,0.50,1.000000,1,1,1,1,0.0,1.0,0.0,0,0.000000,0.180723,0.466667,0.0,1.0,0.0,0.0,0.252880
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
62827,0.5,0.25,0.571429,1,1,0,1,1.0,0.0,0.0,0,0.000000,0.192771,0.044444,0.0,1.0,0.0,0.0,0.301152
62828,0.5,0.25,0.285714,1,1,1,1,1.0,0.0,0.0,1,0.165686,0.192771,0.044444,0.0,1.0,0.0,0.0,0.301152
62829,0.5,1.00,1.000000,1,0,0,1,0.0,1.0,0.0,1,0.197958,0.325301,0.355556,1.0,0.0,0.0,0.0,0.333516
62830,0.5,0.25,0.571429,1,1,0,1,1.0,0.0,0.0,0,0.000000,0.325301,0.355556,1.0,0.0,0.0,0.0,0.333516


In [17]:
# Build data for offer success prediction

successes_X = successes[ACCOUNT_PROPERTIES + OFFER_CHANNELS + OFFER_TYPES + OFFER_PARAMETERS]
successes_y = successes[['successful', 'amount']]

display(successes_X)
display(successes_y)

Unnamed: 0,age,income,gender_F,gender_M,gender_O,gender_U,account_age,channels_email,channels_mobile,channels_social,channels_web,offer_type_bogo,offer_type_discount,offer_type_informational,reward,difficulty,duration
0,0.180723,0.466667,0.0,1.0,0.0,0.0,0.252880,1,1,0,1,0.0,1.0,0.0,0.2,0.50,0.571429
1,0.180723,0.466667,0.0,1.0,0.0,0.0,0.252880,1,1,0,1,0.0,0.0,1.0,0.0,0.00,0.142857
2,0.180723,0.466667,0.0,1.0,0.0,0.0,0.252880,1,1,1,0,0.0,0.0,1.0,0.0,0.00,0.000000
3,0.180723,0.466667,0.0,1.0,0.0,0.0,0.252880,1,1,1,1,1.0,0.0,0.0,0.5,0.25,0.285714
4,0.180723,0.466667,0.0,1.0,0.0,0.0,0.252880,1,1,1,1,0.0,1.0,0.0,0.2,0.50,1.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
62827,0.192771,0.044444,0.0,1.0,0.0,0.0,0.301152,1,1,0,1,1.0,0.0,0.0,0.5,0.25,0.571429
62828,0.192771,0.044444,0.0,1.0,0.0,0.0,0.301152,1,1,1,1,1.0,0.0,0.0,0.5,0.25,0.285714
62829,0.325301,0.355556,1.0,0.0,0.0,0.0,0.333516,1,0,0,1,0.0,1.0,0.0,0.5,1.00,1.000000
62830,0.325301,0.355556,1.0,0.0,0.0,0.0,0.333516,1,1,0,1,1.0,0.0,0.0,0.5,0.25,0.571429


Unnamed: 0,successful,amount
0,0,0.000000
1,1,0.074148
2,1,0.191729
3,0,0.000000
4,0,0.000000
...,...,...
62827,0,0.000000
62828,1,0.165686
62829,1,0.197958
62830,0,0.000000


In [18]:
# Build data for offer proposal and amount spent prediction

# Get best offer for money spent per account
idx = successes.groupby(ACCOUNT_PROPERTIES, sort=False)['amount'].transform(max) == successes['amount']
top_spending = successes[idx].groupby(ACCOUNT_PROPERTIES, sort=False).first().reset_index()

top_spending_X = top_spending[ACCOUNT_PROPERTIES]
top_spending_y = top_spending[OFFER_CHANNELS + OFFER_TYPES + OFFER_PARAMETERS + ['amount']]

display(top_spending.head())
display(top_spending.describe())

Unnamed: 0,age,income,gender_F,gender_M,gender_O,gender_U,account_age,reward,difficulty,duration,channels_email,channels_mobile,channels_social,channels_web,offer_type_bogo,offer_type_discount,offer_type_informational,successful,amount
0,0.180723,0.466667,0.0,1.0,0.0,0.0,0.25288,0.0,0.0,0.0,1,1,1,0,0.0,0.0,1.0,1,0.191729
1,0.26506,0.3,0.0,0.0,1.0,0.0,0.108612,0.5,1.0,1.0,1,0,0,1,0.0,1.0,0.0,1,0.190777
2,0.493976,0.666667,1.0,0.0,0.0,0.0,0.47943,0.2,0.5,1.0,1,1,1,1,0.0,1.0,0.0,1,0.152535
3,0.072289,0.333333,1.0,0.0,0.0,0.0,0.341196,0.0,0.0,0.0,1,1,1,0,0.0,0.0,1.0,1,0.509344
4,0.096386,0.477778,1.0,0.0,0.0,0.0,0.219419,0.2,0.5,1.0,1,1,1,1,0.0,1.0,0.0,1,0.194411


Unnamed: 0,age,income,gender_F,gender_M,gender_O,gender_U,account_age,reward,difficulty,duration,channels_email,channels_mobile,channels_social,channels_web,offer_type_bogo,offer_type_discount,offer_type_informational,successful,amount
count,15752.0,15752.0,15752.0,15752.0,15752.0,15752.0,15752.0,15752.0,15752.0,15752.0,15752.0,15752.0,15752.0,15752.0,15752.0,15752.0,15752.0,15752.0,15752.0
mean,0.438446,0.39334,0.388268,0.537963,0.013459,0.06031,0.288213,0.456044,0.412919,0.533193,1.0,0.904393,0.710576,0.780536,0.39906,0.443753,0.157186,0.72067,0.203406
std,0.203088,0.232666,0.487372,0.498573,0.115232,0.238067,0.22882,0.353854,0.260264,0.318924,0.0,0.294061,0.453509,0.413897,0.489721,0.496842,0.363988,0.448684,0.174807
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.313253,0.222222,0.0,0.0,0.0,0.0,0.115195,0.2,0.25,0.285714,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.082713
50%,0.438476,0.393389,0.0,1.0,0.0,0.0,0.199122,0.3,0.5,0.571429,1.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,0.16906
75%,0.578313,0.533333,1.0,1.0,0.0,0.0,0.439386,1.0,0.5,0.571429,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,0.272106
max,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [19]:
# Create a scaler just for amount, so we can invert scale that

amount_col = SCALED_COLS.index('amount')
scaler_amount = MinMaxScaler()
scaler_amount.min_, scaler_amount.scale_ = scaler.min_[amount_col], scaler.scale_[amount_col]

In [20]:
logger = tf.get_logger()
logger.setLevel(logging.ERROR)
print('Using:')
print('\t\u2022 TensorFlow version:', tf.__version__)
print('\t\u2022 tf.keras version:', tf.keras.__version__)
print('\t\u2022 Running on GPU' if tf.test.is_gpu_available() else '\t\u2022 GPU device not found. Running on CPU')

Using:
	• TensorFlow version: 2.6.0
	• tf.keras version: 2.6.0
	• Running on GPU


## Constructing the datasets

Since the amount of rows is relatively small, we'll use a K-Fold cross-validation method to split the dataset.

In [21]:
# Create hyperband parameter tuning parameters

# Hyperband tuning doesn't support batch size tuning out of the box, so make it
class HyperbandWithBatchTuning(kt.Hyperband):
    def run_trial(self, trial, *args, **kwargs):
        kwargs['batch_size'] = trial.hyperparameters.Int('batch_size', 16, 64, step=16)
        return super(HyperbandWithBatchTuning, self).run_trial(trial, *args, **kwargs)

def success_model_builder(hp: kt.HyperParameters):
    hp_dropout = hp.Float('dropout', min_value=0.2, max_value=0.4, step=0.1)
    hp_units = hp.Int('units', min_value=64, max_value=128, step=32)
    hp_units_second = hp.Int('units_2', min_value=64, max_value=128, step=32)
    hp_learning_rate = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])

    model = Sequential()
    model.add(Input(shape=successes_X.shape))
    model.add(Dropout(rate=hp_dropout, seed=SEED))
    model.add(Dense(units=hp_units, activation='relu'))
    model.add(Dropout(rate=hp_dropout, seed=SEED))
    model.add(Dense(units=hp_units_second, activation='relu'))
    model.add(Dropout(rate=hp_dropout, seed=SEED))
    model.add(Dense(successes_y.shape[1], activation='softmax'))

    model.compile(optimizer=keras.optimizers.Adadelta(learning_rate=hp_learning_rate),
                loss=keras.losses.CategoricalCrossentropy(),
                metrics=['accuracy'])
    return model

def spending_model_builder(hp: kt.HyperParameters):
    hp_dropout = hp.Float('dropout', min_value=0.2, max_value=0.4, step=0.1)
    hp_units = hp.Int('units', min_value=32, max_value=128, step=32)
    hp_units_second = hp.Int('units_2', min_value=32, max_value=128, step=32)
    hp_learning_rate = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])

    model = Sequential()
    model.add(Input(shape=top_spending_X.shape))
    model.add(Dropout(rate=hp_dropout, seed=SEED))
    model.add(Dense(units=hp_units, activation='relu'))
    model.add(Dropout(rate=hp_dropout, seed=SEED))
    model.add(Dense(units=hp_units_second, activation='relu'))
    model.add(Dropout(rate=hp_dropout, seed=SEED))
    model.add(Dense(top_spending_y.shape[1], activation='softmax'))

    model.compile(optimizer=keras.optimizers.Adadelta(learning_rate=hp_learning_rate),
                loss=keras.losses.MeanSquaredLogarithmicError(),
                metrics=['accuracy'])
    return model

In [22]:
tuner_successes = HyperbandWithBatchTuning(success_model_builder,
                     objective='val_accuracy',
                     max_epochs=15,
                     factor=3,
                     directory='model',
                     project_name='starbucks_successes',
                     seed=SEED)

tuner_spendings = HyperbandWithBatchTuning(spending_model_builder,
                     objective='val_accuracy',
                     max_epochs=15,
                     factor=3,
                     directory='model',
                     project_name='starbucks_spending',
                     seed=SEED)

stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)

In [23]:
tuner_successes.search(successes_X, successes_y, epochs=50, validation_split=0.2, callbacks=[stop_early])

best_hps_successes=tuner_successes.get_best_hyperparameters()[0]
model_successes = tuner_successes.hypermodel.build(best_hps_successes)

print(best_hps_successes.values)

Trial 30 Complete [00h 04m 20s]
val_accuracy: 0.8635314702987671

Best val_accuracy So Far: 0.8635314702987671
Total elapsed time: 00h 21m 03s
{'dropout': 0.30000000000000004, 'units': 64, 'units_2': 96, 'learning_rate': 0.001, 'tuner/epochs': 2, 'tuner/initial_epoch': 0, 'tuner/bracket': 2, 'tuner/round': 0, 'batch_size': 16}


In [24]:
tuner_spendings.search(top_spending_X, top_spending_y, epochs=50, validation_split=0.2, callbacks=[stop_early])

best_hps_spendings=tuner_spendings.get_best_hyperparameters()[0]
model_spendings = tuner_spendings.hypermodel.build(best_hps_spendings)

print(best_hps_spendings.values)

Trial 30 Complete [00h 00m 20s]
val_accuracy: 1.0

Best val_accuracy So Far: 1.0
Total elapsed time: 00h 05m 28s
{'dropout': 0.4000000000000001, 'units': 32, 'units_2': 96, 'learning_rate': 0.01, 'batch_size': 32, 'tuner/epochs': 15, 'tuner/initial_epoch': 5, 'tuner/bracket': 2, 'tuner/round': 2, 'tuner/trial_id': 'bfbb02a86b485d2e6a84f2eafd27f3e9'}


In [25]:
# history_model = history_model_builder(best_hps_history)

# kfold = KFold(n_splits=3, shuffle=True, random_state=SEED)

# for fold_no, (train, test) in enumerate(kfold.split(history_X, history_y)):
#     history_model.fit(x=history_X.iloc[train], y=history_y.iloc[train], epochs=3)
#     scores = history_model.evaluate(history_X.iloc[test], history_y.iloc[test])
#     print(f'Score for fold {fold_no}: {history_model.metrics_names[0]} of {scores[0]}; {history_model.metrics_names[1]} of {scores[1]*100}%')

In [26]:
# success_model = success_model_builder(best_hps_successes)

# for fold_no, (train, test) in enumerate(kfold.split(successes_X, successes_y)):
#     success_model.fit(x=successes_X.iloc[train], y=successes_y.iloc[train], epochs=3)
#     scores = success_model.evaluate(successes_X.iloc[test], successes_y.iloc[test])
#     print(f'Score for fold {fold_no}: {success_model.metrics_names[0]} of {scores[0]}; {success_model.metrics_names[1]} of {scores[1]*100}%')

In [27]:
# #model.fit(x=history_X, y=history_y)
# scores = model.evaluate(history_X, history_y)
# print(f'Score for fold {fold_no}: {model.metrics_names[0]} of {scores[0]}; {model.metrics_names[1]} of {scores[1]*100}%')

In [28]:
import random
pd.DataFrame(columns=history_y.columns, data=model.predict(np.array([history_X.iloc[random.randint(0, history_X.shape[0])]]))).sort_values(by=0, axis=1, ascending=False)
#pd.DataFrame(columns=history_y.columns, data=scaler_amount.inverse_transform(model.predict(history_X))).sort_values(by='offer_type_discount_channels_mobile', axis=0, ascending=False)#.sort_values(by=0, axis=1, ascending=False)

NameError: name 'history_y' is not defined