# Coupon Probability Model

Here I would like to run a model for every customer in that week based on the maximum and minimum is it possible to provide them. I would like a probability number that then are going to accept the offer.

First I need to import tghe sanitised data

In [1]:
import mlflow
import mlflow.sklearn
import os
import pandas as pd
import numpy as np
import torch

In [2]:
df_test = pd.read_csv("../data/raw/TestingData.csv")
df_train = pd.read_csv("../data/raw/HistoricalTrainingData.csv")
df_gradient = pd.read_csv("../data/processed/customer_sensitivity_metrics.csv")

In [3]:
# Check if CUDA (GPU support) is available
if torch.cuda.is_available():
    # CUDA is available
    print("CUDA (GPU support) is available.")
    
    # Get the number of available GPUs
    num_gpus = torch.cuda.device_count()
    print(f"Number of available GPUs: {num_gpus}")

    # Get the name of the GPU device
    gpu_name = torch.cuda.get_device_name(0)  # Assuming you have at least one GPU
    print(f"GPU Name: {gpu_name}")

    # Set the device to GPU (assuming you want to use the first GPU)
    device = torch.device("cuda:0")
else:
    # CUDA is not available, use CPU
    print("CUDA (GPU support) is not available. Using CPU.")
    device = torch.device("cpu")

CUDA (GPU support) is available.
Number of available GPUs: 1
GPU Name: NVIDIA GeForce RTX 3060 Laptop GPU


# Pre-Processing

In [4]:
import os
print(os.getcwd())

C:\projects\python\coupon-optimisation\workbooks


In [5]:
import sys
sys.path.append('C:/projects/python/coupon-optimisation')
from src.preprocessing import preprocess_and_split

X_train, X_test, y_train, y_test, features = preprocess_and_split(df_train, df_test, df_gradient)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  agg_features['MedianSpendPerOfferAmt'].replace([np.inf, -np.inf], 1, inplace=True)


# Machine Learning Model Imports

In [6]:
from src.models.XGBoost import XGBoostTrainer
from src.models.NeuralNetwork import NeuralNetworkTrainer
from src.models.LogisticRegression import LogisticRegressionTrainer
from src.models.GradientBoostClassifier import GBMTrainer
from src.models.CatBoost import CatBoostTrainer
from src.models.LightGradientBoost import LightGBMTrainer
from src.models.BayesianTrainer import BayesianModelTrainer
from src.models.KNN import KNNTrainer
from src.models.ElasticNet import ElasticNetTrainer

Set the evaluation metrics and define the mlruns directory

In [7]:
from src.util.models_util import evaluate_model, log_metrics
scaler_path = '../models/scalers/scaler.joblib'

## Neural Network

In [18]:
mlflow.end_run()

nn_model_path = '../models/'

# Ensure the directory for the model exists
os.makedirs(os.path.dirname(nn_model_path), exist_ok=True)

# Initialize ModelTrainer with the number of features
trainer = NeuralNetworkTrainer(num_features=X_train.shape[1])

# Start MLflow run for tracking and logging
with mlflow.start_run(run_name='Neural Network PyTorch n-1/5'):
    # Train the model
    trainer.train(X_train, y_train)
    
    # Predict probabilities on the test set
    probabilities = trainer.predict(X_test)
    trainer.save_model()
    
    # Evaluate the model and log metrics
    metrics = evaluate_model(y_test, probabilities)
    log_metrics(metrics)

    print("Training and evaluation completed.")
    print("Metrics:", metrics)
    mlflow.end_run()


Epoch 1/25, Loss: 0.43273434042930603
Epoch 2/25, Loss: 0.2942943871021271
Epoch 3/25, Loss: 0.32934999465942383
Epoch 4/25, Loss: 0.41253894567489624
Epoch 5/25, Loss: 0.3412223756313324
Epoch 6/25, Loss: 0.16870488226413727
Epoch 7/25, Loss: 0.38206928968429565
Epoch 8/25, Loss: 0.3434986472129822
Epoch 9/25, Loss: 0.44097232818603516
Epoch 10/25, Loss: 0.2828368544578552
Epoch 11/25, Loss: 0.4011697769165039
Epoch 12/25, Loss: 0.26904433965682983
Epoch 13/25, Loss: 0.3970431089401245
Epoch 14/25, Loss: 0.33614271879196167
Epoch 15/25, Loss: 0.45306816697120667
Epoch 16/25, Loss: 0.32941967248916626
Epoch 17/25, Loss: 0.26086458563804626
Epoch 18/25, Loss: 0.31626707315444946
Epoch 19/25, Loss: 0.4740070700645447
Epoch 20/25, Loss: 0.6471055150032043
Epoch 21/25, Loss: 0.3181081712245941
Epoch 22/25, Loss: 0.4514879584312439
Epoch 23/25, Loss: 0.485196590423584
Epoch 24/25, Loss: 0.453777939081192
Epoch 25/25, Loss: 0.3362463116645813


FileNotFoundError: [Errno 2] No such file or directory: 'models/scalers/scaler.joblib'

## Logistics Regression

In [12]:
# Initialize ModelTrainer with the number of features
trainer = LogisticRegressionTrainer()

# Start MLflow run for tracking and logging
with mlflow.start_run(run_name='Logistic Regression + n1/5'):
    # Train the model
    trainer.train(X_train, y_train)
    
    # Predict probabilities on the test set
    probabilities = trainer.predict(X_test)
    trainer.save_model()
    
    # Evaluate the model and log metrics
    metrics = evaluate_model(y_test, probabilities)
    log_metrics(metrics)

    print("Training and evaluation completed.")
    print("Metrics:", metrics)
    mlflow.end_run()


Model trained.
Model saved to ../models/trained-models/LogisticRegressionTrainer_model.joblib
Training and evaluation completed.
Metrics: {'precision': 0.685544250335957, 'recall': 0.6430758148748424, 'roc_auc': 0.8289644351614488, 'f1': 0.6636312952982718, 'TP': 3571, 'TN': 8656, 'FP': 1638, 'FN': 1982}


## XGBoost Random Forest

In [8]:
# Initialize ModelTrainer with the number of features
trainer = XGBoostTrainer()

# Start MLflow run for tracking and logging
with mlflow.start_run(run_name='XGBoost + n1/5'):
    # Train the model
    trainer.train(X_train, y_train)
    
    # Predict probabilities on the test set
    probabilities = trainer.predict(X_test)
    trainer.save_model()
    
    # Evaluate the model and log metrics
    metrics = evaluate_model(y_test, probabilities)
    log_metrics(metrics)

    print("Training and evaluation completed.")
    print("Metrics:", metrics)
    mlflow.end_run()




Model trained.
Model saved to ../models/trained-models/XGBoostTrainer_model.joblib
Training and evaluation completed.
Metrics: {'precision': 0.7050359712230215, 'recall': 0.68827660723933, 'roc_auc': 0.8533935450991349, 'f1': 0.6965554948059047, 'TP': 3822, 'TN': 8695, 'FP': 1599, 'FN': 1731}


## Gradient Boosted Machine

In [8]:
# Initialize ModelTrainer with the number of features
trainer = GBMTrainer()

# Start MLflow run for tracking and logging
with mlflow.start_run(run_name='GBM Optuna F1-Score'):
    # Train the model
    trainer.train(X_train, y_train)
    
    # Predict probabilities on the test set
    probabilities = trainer.predict(X_test)
    trainer.save_model()
    
    # Evaluate the model and log metrics
    metrics = evaluate_model(y_test, probabilities)
    log_metrics(metrics)
    
    print("Training and evaluation completed.")
    print("Metrics:", metrics)
    mlflow.end_run()


Model trained.
Model saved to ../models/trained-models/GBMTrainer_model.joblib
Training and evaluation completed.
Metrics: {'precision': 0.7272897546356996, 'recall': 0.6992616603637674, 'roc_auc': 0.8664968895211906, 'f1': 0.7130003672420125, 'TP': 3883, 'TN': 8838, 'FP': 1456, 'FN': 1670}


## CatBoost

In [15]:
# Initialize ModelTrainer with the number of features
trainer = CatBoostTrainer()

# Start MLflow run for tracking and logging
with mlflow.start_run(run_name='Cat Boost + n1/5'):
    # Train the model
    trainer.train(X_train, y_train)
    
    # Predict probabilities on the test set
    probabilities = trainer.predict(X_test)
    trainer.save_model()
    
    # Evaluate the model and log metrics
    metrics = evaluate_model(y_test, probabilities)
    log_metrics(metrics)
    
    print("Training and evaluation completed.")
    print("Metrics:", metrics)
    mlflow.end_run()


CatBoost model trained.
Model saved to ../models/trained-models/CatBoostTrainer_model.joblib
Training and evaluation completed.
Metrics: {'precision': 0.7034344902780302, 'recall': 0.697100666306501, 'roc_auc': 0.8559421266170237, 'f1': 0.7002532561505066, 'TP': 3871, 'TN': 8662, 'FP': 1632, 'FN': 1682}


## Light Gradient Boost

In [8]:
# Initialize ModelTrainer with the number of features
trainer = LightGBMTrainer()

# Start MLflow run for tracking and logging
with mlflow.start_run(run_name='Light GBM Feature Selection .5'):
    # Train the model
    trainer.train(X_train, y_train)
    
    # Predict probabilities on the test set
    probabilities = trainer.predict(X_test)
    trainer.save_model()
    
    # Evaluate the model and log metrics
    metrics = evaluate_model(y_test, probabilities)
    log_metrics(metrics)
    
    print("Training and evaluation completed.")
    print("Metrics:", metrics)
    mlflow.end_run()


[LightGBM] [Info] Number of positive: 43346, number of negative: 66914
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000802 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2284
[LightGBM] [Info] Number of data points in the train set: 110260, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.393125 -> initscore=-0.434194
[LightGBM] [Info] Start training from score -0.434194
LightGBM model trained.
Model saved to ../models/trained-models/LightGBMTrainer_model.joblib
Training and evaluation completed.
Metrics: {'precision': 0.6995355484101465, 'recall': 0.7052043940212498, 'roc_auc': 0.852842109196537, 'f1': 0.7023585328670076, 'TP': 3916, 'TN': 8612, 'FP': 1682, 'FN': 1637}


## Elastic Net Trainer

In [11]:
# Initialize ModelTrainer with the number of features
trainer = ElasticNetTrainer()

# Start MLflow run for tracking and logging
with mlflow.start_run(run_name='Elastic Net Trainer'):
    # Train the model
    trainer.train(X_train, y_train)
    
    # Predict probabilities on the test set
    probabilities = trainer.predict(X_test)
    trainer.save_model()
    
    # Evaluate the model and log metrics
    metrics = evaluate_model(y_test, probabilities)
    log_metrics(metrics)
    
    print("Training and evaluation completed.")
    print("Metrics:", metrics)
    mlflow.end_run()


Elastic Net model trained.
Model saved to ../models/trained-models/ElasticNetTrainer_model.joblib
Training and evaluation completed.
Metrics: {'precision': 0.0, 'recall': 0.0, 'roc_auc': 0.5, 'f1': 0.0, 'TP': 0, 'TN': 10294, 'FP': 0, 'FN': 5553}


  _warn_prf(average, modifier, msg_start, len(result))


## Gaussian Naive Bayes

In [12]:
# Initialize ModelTrainer with the number of features
trainer = BayesianModelTrainer()

# Start MLflow run for tracking and logging
with mlflow.start_run(run_name='Gaussian Naive Bayes'):
    # Train the model
    trainer.train(X_train, y_train)
    
    # Predict probabilities on the test set
    probabilities = trainer.predict(X_test)
    trainer.save_model()
    
    # Evaluate the model and log metrics
    metrics = evaluate_model(y_test, probabilities)
    log_metrics(metrics)
    
    print("Training and evaluation completed.")
    print("Metrics:", metrics)
    mlflow.end_run()


Bayesian model trained.
Model saved to ../models/trained-models/BayesianModelTrainer_model.joblib
Training and evaluation completed.
Metrics: {'precision': 0.7606232294617564, 'recall': 0.2901134521880065, 'roc_auc': 0.790997903138805, 'f1': 0.4200234649980446, 'TP': 1611, 'TN': 9787, 'FP': 507, 'FN': 3942}


## KNN

In [17]:
# Initialize ModelTrainer with the number of features
trainer = KNNTrainer()

# Start MLflow run for tracking and logging
with mlflow.start_run(run_name='KNN + n1/5'):
    # Train the model
    trainer.train(X_train, y_train)
    
    # Predict probabilities on the test set
    probabilities = trainer.predict(X_test)
    trainer.save_model()
    
    # Evaluate the model and log metrics
    metrics = evaluate_model(y_test, probabilities)
    log_metrics(metrics)
    
    print("Training and evaluation completed.")
    print("Metrics:", metrics)
    mlflow.end_run()


KNN model trained.
Model saved to ../models/trained-models/KNNTrainer_model.joblib
Training and evaluation completed.
Metrics: {'precision': 0.6747879444143656, 'recall': 0.6733297316765712, 'roc_auc': 0.8113719653181516, 'f1': 0.6740580493960698, 'TP': 3739, 'TN': 8492, 'FP': 1802, 'FN': 1814}
