In [1]:
import sys
import os
from pathlib import Path

root_path = str(Path(os.getcwd()).parents[3])
sys.path.append(root_path)

# Import libraries

In [2]:
import gymnasium as gym
import numpy as np
import pandas as pd

import tensorflow as tf
from mercury.rl.agents import ConservativeDQLAgent
from mercury.rl.environment import ENV

# Download the Dataset

https://www.kaggle.com/datasets/gibrano/marketing-communication

**About the dataset:**

This is a**fake simulated marketing dataset**, designed purely for **example purposes** in the context of **Offline Reinforcement Learning (Offline RL)**. This dataset is **not real** but mimics a realistic scenario in which a bank or institutions conducts marketing campaigns to engage with potential customers over time.

**Motivation for Offline RL in Marketing**
Traditional marketing strategies often rely on **A/B testing** or **rule-based heuristics** to optimize outreach. However, these approaches may not efficiently leverage **historical interactions** to maximize long-term engagement.  

Offline RL provides an alternative by learning a policy **purely from logged data**, without additional exploration. Given a dataset of past marketing actions and their outcomes, an RL model can **identify optimal action sequences** that maximize customer conversion while minimizing unnecessary interactions.

# Feature selection

Our dataset is structured around a **Markov Decision Process (MDP)** formulation, where:  
- **The state** consists of customer attributes (age, job, balance, loan status, etc.),  
- **The actions** represent different marketing communication methods (e.g., `"no_contact"`, `"call"`, `"email"`, `"push_notification"`),  
- **The reward** consist of purchase value. 

In [3]:
data_path = "../marketing_communication.csv"

In [4]:
df = pd.read_csv(data_path)

In [5]:
df

Unnamed: 0,balance,customer_id,age,job,marital,education,housing,loan,date,contact,campaign,previous,poutcome,duration,subscribed,purchase
0,50.496928,1,57,admin.,divorced,secondary,no,no,2024-01-02,telephone,2,3,nonexistent,230,no,no
1,747.071134,1,57,admin.,divorced,secondary,no,no,2024-01-23,telephone,8,3,nonexistent,60,yes,no
2,464.772894,1,57,admin.,divorced,secondary,no,no,2024-05-24,cellular,8,0,nonexistent,240,yes,no
3,1794.200009,1,57,admin.,divorced,secondary,no,no,2024-06-05,cellular,9,0,success,31,yes,no
4,172.692606,1,57,admin.,divorced,secondary,no,no,2024-06-11,cellular,1,1,nonexistent,568,yes,no
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
54886,6184.573462,9999,69,services,divorced,primary,no,no,2024-03-09,telephone,2,1,success,100,no,no
54887,1041.047722,9999,69,services,divorced,primary,no,no,2024-03-31,cellular,7,4,success,202,no,no
54888,219.369680,9999,69,services,divorced,primary,no,no,2024-08-28,telephone,2,4,nonexistent,244,no,no
54889,626.981628,10000,60,technician,divorced,primary,no,yes,2024-06-16,cellular,4,0,failure,342,no,no


In [6]:
cols = ['customer_id', 'date','balance', 'age', 'campaign', 'duration', 'loan', 'previous', 'housing', 'subscribed', 'contact', 'purchase_value']

train = df[cols].sample(100000)

KeyError: "['purchase_value'] not in index"

In [7]:
train['loan'] = train['loan'].map({"yes":1, "no":0})
train['housing'] = train['housing'].map({"yes":1, "no":0})
train['subscribed'] = train['subscribed'].map({"yes":1, "no":0})

train['contact'] = train['contact'].map({"no_contact":0, "push_notification":1, "email":2, "call": 3})

In [8]:
train.info()

<class 'pandas.core.frame.DataFrame'>
Index: 100000 entries, 3524534 to 630176
Data columns (total 12 columns):
 #   Column          Non-Null Count   Dtype  
---  ------          --------------   -----  
 0   customer_id     100000 non-null  int64  
 1   date            100000 non-null  object 
 2   balance         100000 non-null  float64
 3   age             100000 non-null  int64  
 4   campaign        100000 non-null  int64  
 5   duration        100000 non-null  int64  
 6   loan            100000 non-null  int64  
 7   previous        100000 non-null  int64  
 8   housing         100000 non-null  int64  
 9   subscribed      100000 non-null  int64  
 10  contact         100000 non-null  int64  
 11  purchase_value  100000 non-null  int64  
dtypes: float64(1), int64(10), object(1)
memory usage: 9.9+ MB


In [9]:
train.to_csv(root_path+"/data/train.csv", index=False)

# Creating the offline environment.

In [10]:
data_path = root_path+"/data/train.csv"
states_cols = ['balance', 'age', 'campaign', 'duration', 'loan', 'previous', 'housing', 'subscribed']
action_col = 'contact'
reward_col = 'purchase_value'
episode_col_id = 'customer_id'
order_col = 'date'

In [11]:
offline_env = ENV(data_path, states_cols, action_col, reward_col, episode_col_id, order_col, batch_size=64, shuffle=True)

# Training

In [13]:
agent = ConservativeDQLAgent(learning_rate=0.01, gamma=0.99, num_states=len(states_cols), n_actions=4)

#agent.MEMORY_SIZE = 50000
history_reward = []

offline_env.reset()

episodes = min(offline_env.env.episodes, 2000)
    
for batch_id in range(episodes):

    episode_ids, sequence, states, actions, rewards = offline_env.get_replay(batch_id)

    next_states = np.concatenate([states[1:], [states[-1]]])

    dones = np.repeat(False, len(states))
    dones[-1] = True    
    for j in range(len(episode_ids)):
        agent.store_transition(states[j], actions[j], next_states[j], rewards[j], dones[j])

    # Training
    agent.learn()

    total_R = sum(rewards)
    history_reward.append(total_R)

    print("Epoch:", batch_id, "episode:", episode_ids[0], "Loss:", agent.loss.numpy())

Epoch: 0 episode: 7841 Loss: 205019.39
Epoch: 1 episode: 2991 Loss: 81083.08
Epoch: 2 episode: 8638 Loss: 67999.305
Epoch: 3 episode: 1086 Loss: 106250.7
Epoch: 4 episode: 2792 Loss: 130310.95
Epoch: 5 episode: 5309 Loss: 125917.28
Epoch: 6 episode: 5725 Loss: 144229.7
Epoch: 7 episode: 6100 Loss: 135196.56
Epoch: 8 episode: 5356 Loss: 139276.92
Epoch: 9 episode: 3327 Loss: 131542.89
Epoch: 10 episode: 9189 Loss: 130114.19
Epoch: 11 episode: 7965 Loss: 131114.53
Epoch: 12 episode: 8772 Loss: 121162.664
Epoch: 13 episode: 772 Loss: 112922.516
Epoch: 14 episode: 1884 Loss: 110974.6
Epoch: 15 episode: 4490 Loss: 104164.266
Epoch: 16 episode: 512 Loss: 108205.914
Epoch: 17 episode: 2072 Loss: 106830.12
Epoch: 18 episode: 5599 Loss: 106849.37
Epoch: 19 episode: 9433 Loss: 109448.055
Epoch: 20 episode: 9813 Loss: 109349.84
Epoch: 21 episode: 7017 Loss: 108137.86
Epoch: 22 episode: 3576 Loss: 103906.67
Epoch: 23 episode: 3637 Loss: 103208.664
Epoch: 24 episode: 8288 Loss: 99490.7
Epoch: 25 ep

In [14]:
agent.q_network.save(root_path+'/models/marketing_cql_model_env.h5')



# Results

This dataset is randomly generated and does not come from actual business data. As a result, any RL model trained on it will also be purely hypothetical and should not be used for real-world decision-making. The randomness in the data prevents the model from learning meaningful marketing policies, but it serves to show how to use it in this type of applications or similar and the impact that it could have. ðŸš€
