# ML Optimisation Strategy

## Bayes Optimisation

Bayesian optimisation is a better approach to this problem as it able to map non-linear relationships. I will integrate my machine learning model into a Bayesian model so that it can interact with the enviroment and allocat OfferAmt.

In [1]:
import pandas as pd
from datetime import datetime, timedelta
import numpy as np
import warnings
from bayes_opt import BayesianOptimization
from bayes_opt.util import UtilityFunction
from skopt import Optimizer
from skopt.space import Real
from skopt.utils import use_named_args
import mlflow

pd.set_option('display.float_format', lambda x: '%.6f' % x)

warnings.filterwarnings('ignore')

import sys
sys.path.append('C:/projects/python/coupon-optimisation')

In [2]:
from src.models.XGBoost import XGBoostTrainer
from src.models.NeuralNetwork import NeuralNetworkTrainer
from src.models.LogisticRegression import LogisticRegressionTrainer
from src.models.GradientBoostClassifier import GBMTrainer
from src.models.CatBoost import CatBoostTrainer
from src.models.LightGradientBoost import LightGBMTrainer
from src.optimisers.SKOPT_Optimiser import optimize_offers_skopt
from src.optimisers.Bayes_Opt import optimize_offers

Below are the selected features from Optuna

In [3]:
features = [
            # 'CouponUsageRate',
            'TotalOfferAmtRedeemed',
            # 'TotalVisits', 
            # 'TotalSpendAmt',
            'TotalCouponRedeem', 
            'OfferAmt', 
            'AverageOfferAmtPerVisit',
            'AverageSpendPerVisit',
            'TotalOfferAmtReceived',
            'n-1_spend',
            # 'n-2_spend',
            'n-3_spend',
            'n-4_spend',
            # 'n-5_spend',
            # 'MaxSpend',
            # 'sensitivity_gradient', 
            'MinOfferAmt', 
            # 'MaxOfferAmt',
            # 'MedianSpendPerOfferAmt',
            'Year',
            # 'PatronID',
            'Month',
            'weekNum'
               ]

Next we want to filter the test data based on the date we are interested in.

In [4]:
# Load the data
optimised_data = pd.read_csv('../data/processed/processed-test-data.csv')

# Correctly convert 'UseStartDate' to datetime format before filtering
optimised_data['UseStartDate'] = pd.to_datetime(optimised_data['UseStartDate'], errors='coerce')

# Filter data to include only rows with 'UseStartDate' of "2019-07-01"
specific_date = "2019-07-01"
filtered_data = optimised_data[optimised_data['UseStartDate'] == pd.to_datetime(specific_date)]

# Filter data by UseStartDate after a specific date
start_date = "2019-07-01"
filtered_data = filtered_data[filtered_data['UseStartDate'] == pd.to_datetime(start_date)]
filtered_data = filtered_data[filtered_data['TotalCouponRedeem'] >= 5]
filtered_data.fillna(0, inplace=True)
filtered_data.reset_index(inplace=True)


### Learnings from Model building

This model has been improved through:

1) Reducing the size of the dataframe being manupulated
2) Use SKOPT from Sklearn
3) Doing an iterative approach rather than setting bounds
4) Set the intial amounts using x0 and y0

In [5]:
# data_to_optimize = filtered_data.head(20).drop(index=10)
data_to_optimize = filtered_data
data_to_optimize['OfferAmt'].sum()

113757.0

In [6]:
trainer = GBMTrainer()

rounds = 100
increment_value = 10

trainer.load_model()
mlflow.set_experiment('Bayes Optimisation')
# Start an MLflow run
with mlflow.start_run(run_name='BO GBM Test'):
    
    # Log parameters
    mlflow.log_params({
        'rounds': rounds,
        'budget': data_to_optimize['OfferAmt'].sum(),
        'increment_value': increment_value
    })
    
    # Perform your optimization
    optimised_data = optimize_offers_skopt(
        data=data_to_optimize, 
        model=trainer, 
        feature_names=features,
        budget=data_to_optimize['OfferAmt'].sum(), 
        rounds=rounds,
        increment_amount = increment_value
    )

    
    optimised_data['Residual'] = optimised_data['BayesOptExpSpend'] - optimised_data['Spend']
    offer_amt_sum = optimised_data['BayesOptOfferAmt'].sum()
    exp_spend_sum = optimised_data['BayesOptExpSpend'].sum()
    metric_value =  exp_spend_sum / offer_amt_sum if exp_spend_sum != 0 else 0
    
    
    # Log outcomes
    mlflow.log_metrics({
        'BayesOptOfferAmt_sum': optimised_data['BayesOptOfferAmt'].sum(),
        'BayesOptExpSpend_sum': optimised_data['BayesOptExpSpend'].sum(),
        'OfferAmt_sum': optimised_data['OfferAmt'].sum(),
        'Residuals_sum': optimised_data['Residual'].sum(), 
        'Spend_sum': optimised_data['Spend'].sum(),
        'Ratio': metric_value
    })
    
    # End the run
    mlflow.end_run()


Model and scaler loaded.
Offer: 22642.760000000002
Budget: 113757.0
Offer: 28305.760000000002
Budget: 113757.0
Offer: 33840.759999999995
Budget: 113757.0
Offer: 39146.759999999995
Budget: 113757.0
Offer: 44300.75999999999
Budget: 113757.0
Offer: 49235.75999999999
Budget: 113757.0
Offer: 53982.75999999999
Budget: 113757.0
Offer: 58534.75999999999
Budget: 113757.0
Offer: 62956.75999999999
Budget: 113757.0
Offer: 67168.75999999998
Budget: 113757.0
Offer: 71160.75999999998
Budget: 113757.0
Offer: 74979.75999999998
Budget: 113757.0
Offer: 78628.76
Budget: 113757.0
Offer: 82147.76
Budget: 113757.0
Offer: 85480.76
Budget: 113757.0
Offer: 88636.76
Budget: 113757.0
Offer: 91645.65999999999
Budget: 113757.0
Offer: 94558.65999999999
Budget: 113757.0
Offer: 97381.65999999999
Budget: 113757.0
Offer: 100126.65999999999
Budget: 113757.0
Offer: 102756.65999999999
Budget: 113757.0
Offer: 105316.65999999999
Budget: 113757.0
Offer: 107784.65999999999
Budget: 113757.0
Offer: 110174.65999999999
Budget: 113

In [8]:
import datetime

# Format the metric value to two decimal places
metric_value_formatted = f"{metric_value:.2f}"

# Get the current date in YYYYMMDD format
current_date = datetime.datetime.now().strftime("%Y%m%d")

# Construct the file name
model_name = "GBM_SKOPT"  # Simplified model name
file_name = f"{model_name}_{metric_value_formatted}_{current_date}_{rounds}.csv"

# Save the DataFrame to this file name
csv_file_path = f"../results/predictions/{file_name}"
optimised_data.to_csv(csv_file_path, index=False)


In [9]:
print(optimised_data['Spend'].sum())

225375.69
