# Preamble

In [1]:
# Imports
## General
import numpy as np
import os
import sys
import warnings
warnings.filterwarnings('ignore')

## In order to run calculations on AWS GPU, need to explicitly specify CUDA lib directory in the environment variables
os.environ["XLA_FLAGS"]="--xla_gpu_cuda_data_dir=/home/sagemaker-user/.conda/envs/mlds_gpu"

## Data manipulation and preprocessing
import pandas as pd
import boto3
from tensorflow.keras.layers import StringLookup, Normalization

## Visualization
import matplotlib.pyplot as plt
import seaborn as sns
from IPython.display import display, Image

## Modelling
from tensorflow.keras.callbacks import CSVLogger, ModelCheckpoint, EarlyStopping
import tensorflow as tf

## Import DeepCTR code
## This is done by cloning the github repository instead of installing with pip. This is because of an incompatibility issue
## with TF 2.14 that I had to manually fix in the DeepCTR code
deepctr_path = '/home/sagemaker-user/drl-ad-personalization/DeepCTR'
if deepctr_path not in sys.path:
    sys.path.append(deepctr_path)

from deepctr.feature_column import SparseFeat, DenseFeat, get_feature_names
from deepctr.models.deepfm import DeepFM

## We want to be able to query the list of available adverts from athena, so we need a PyAthena connection
from pyathena import connect
conn = connect(s3_staging_dir='s3://mlds-final-project-bucket/athena_output/',
               region_name='eu-west-2')

2024-08-10 17:47:30.197230: I tensorflow/core/util/port.cc:111] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-08-10 17:47:30.549174: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-08-10 17:47:30.549218: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-08-10 17:47:30.549224: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-08-10 17:47:30.705186: I tensorflow/core/platform/cpu_feature_g

# Preprocessing

In [2]:
# Create lists of categorical colums for each dataset
categorical_columns = [
    'DisplayURL',
    'AdID',
    'AdvertiserID',
    'QueryID',
    'KeywordID',
    'TitleID',
    'DescriptionID',
    'UserID'
]

# Import categorical feature mappings and define stringloohup objects for each dataset
stringlookups = {}
vocab_lengths = {}
for field in categorical_columns:
    df = pd.read_csv(f'./data/kdd12/categorical_value_counts/{field}.csv')
    vocab = [elem.encode() for elem in df['field'].astype(str).to_list()]
    lookup = StringLookup(vocabulary=vocab, mask_token=None)
    stringlookups.update({field:lookup})
    vocab_lengths.update({field:len(vocab)+1})

# Define numerical feature columns
numerical_columns = [
    'Depth',
    'Position'
]
# Extract scaler dicts for all datasets
dist_stats = pd.read_csv('./data/kdd12/means_variances.csv')
scalers = {}
for i in range(len(dist_stats)):
    field = dist_stats['field'][i]
    mean = dist_stats['mean'][i]
    variance = dist_stats['variance'][i]
    scaler = Normalization(mean=mean, variance=variance)
    scaler.build((1,))
    scalers.update({field:scaler})

In [3]:
## Define feature mappings
kdd12_fixlen_feature_columns = [SparseFeat(feat.lower(), vocabulary_size=vocab_lengths[feat], embedding_dim=4) for feat in categorical_columns]\
+ [DenseFeat(feat.lower(),1) for feat in numerical_columns]

## Generate the dnn and linear feature columns
kdd12_dnn_feature_columns = kdd12_fixlen_feature_columns
kdd12_linear_feature_columns = kdd12_fixlen_feature_columns

# Load Model

In [4]:
# Define the early stopping callback
earlystopping = EarlyStopping(
    monitor='val_loss',
    patience=5,
    restore_best_weights=True,
    start_from_epoch=5
)
# Define the precision, recall and auc metrics
precision = tf.keras.metrics.Precision(thresholds=0.5,name='precision')
recall = tf.keras.metrics.Recall(thresholds=0.5,name='recall')
auc = tf.keras.metrics.AUC(name='auc')

In [5]:
# Define function that returns compiled model
def get_model(
    dnn_hidden_units=[400,300,200],
    dnn_dropout=0.6,
    l2_reg_dnn=0.005,
    l2_reg_linear = 0.005,
    l2_reg_embedding=0.005,
    dnn_use_bn=True
):
    model = DeepFM(
        kdd12_linear_feature_columns,
        kdd12_dnn_feature_columns,
        dnn_hidden_units=dnn_hidden_units,
        dnn_dropout=dnn_dropout,
        l2_reg_dnn=l2_reg_dnn,
        l2_reg_linear=l2_reg_linear,
        l2_reg_embedding=l2_reg_embedding,
        dnn_use_bn=dnn_use_bn
    )
    
    # Compile the model
    model.compile(
        "adam", 
        "binary_crossentropy", 
        metrics=[
            'binary_crossentropy',
            'binary_accuracy',
            precision,
            recall,
            auc
        ],
    )

    return model

In [58]:
# Get the model
model = get_model()

In [59]:
# Load the weights
model.load_weights('models/final_rl_model/rl_model.ckpt')

<tensorflow.python.checkpoint.checkpoint.CheckpointLoadStatus at 0x7f05a34e2d40>

In [60]:
target_model = get_model()
target_model.set_weights(model.get_weights())

# Define Reinfocement Learning environment

In [9]:
# Define RL env object that simpulates the Ad search platform
class RLenv:
    """
    Base class for Reinforcement Learning environment that simulates the search session
    """

    def __init__(self,):
        self.session_no = -1
        self.userid = ""
        self.queryid = ""
        self.adlist = pd.DataFrame()
        self.max_clicks = 0

    def newSession(self,):
        self.session_no += 1
        query_input = pd.read_sql(f"select userid, queryid from kdd12.offline_rl_queries where rn={str(self.session_no +1)}",conn)
        self.userid = query_input["userid"].values[0]
        self.queryid = query_input["queryid"].values[0]
        ad_list_df = pd.read_sql(f"select * from kdd12.offline_rl_testing where userid='{self.userid}' and queryid='{self.queryid}'",conn)
        ad_list_df['clicks'] = ad_list_df.clicks/ad_list_df.impression
        self.max_clicks = np.where(ad_list_df.clicks>=0.5,1.0,0.0).sum()
        self.adlist = ad_list_df.drop(columns=['impression']).sort_values(by=['clicks'],ascending=[False]).reset_index(drop=True)
        return self.adlist.copy().drop(columns=['clicks']), self.max_clicks

    def showAd(self, ad_index):
        ctr = self.adlist.loc[ad_index].clicks
        if ctr>=0.5:
            ctr_reward = 1.
        else:
            ctr_reward = 0.

        # return the CTR
        return ctr_reward

# Define preprocessing function

In [40]:
# Encode and Scale the datasets
def encode_scale(element):
    out = element.copy()
    for field in categorical_columns:
        out[field.lower()] = stringlookups[field](element[field.lower()])
    for field in numerical_columns:
        out[field.lower()] = tf.squeeze(scalers[field](element[field.lower()]),axis=-1)
    return out

In [42]:
# Define function for preprocessing the ad list
def preprocess(ad_list,batch_size=1):
    out_df = ad_list.copy()
    # Convert position and depth to floats
    out_df['position'] = out_df.position.astype('float32')
    out_df['depth'] = out_df.depth.astype('float32')
    # Convert to tf dataset
    tf_dataset = tf.data.Dataset.from_tensor_slices(dict(out_df))
    # Apply categorical encoding and numerical scaling
    tf_dataset = tf_dataset.map(encode_scale)
    # Add batch dim
    tf_dataset = tf_dataset.batch(batch_size)
    # Clean up
    del out_df
    return tf_dataset

In [43]:
# Encode and Scale the datasets
def encode_scale_2(element,labels):
    out = element.copy()
    for field in categorical_columns:
        out[field.lower()] = stringlookups[field](element[field.lower()])
    for field in numerical_columns:
        out[field.lower()] = tf.squeeze(scalers[field](element[field.lower()]),axis=-1)
    return out, labels

In [44]:
# Define function for preprocessing the ad list
def preprocess_2(ad_list,labels,batch_size=1):
    out_df = ad_list.copy()
    # Convert position and depth to floats
    out_df['position'] = out_df.position.astype('float32')
    out_df['depth'] = out_df.depth.astype('float32')
    # Convert to tf dataset
    tf_dataset = tf.data.Dataset.from_tensor_slices((dict(out_df),labels))
    # Apply categorical encoding and numerical scaling
    tf_dataset = tf_dataset.map(encode_scale_2)
    # Add batch dim
    tf_dataset = tf_dataset.batch(batch_size)
    # Clean up
    del out_df
    return tf_dataset

# Minor Update Algorithm

In [19]:
# Set hyperparameters
H = 1200 # Episode Time horizon
alpha = 1.0 # Explore network noise variable
gamma = 0.1 # Future value discount
L = 6 # List Length
current_episode = 0
N = 1000 # Max dataset length
memory = pd.DataFrame()
C = 12 # Target model update
current_session = 0

# Initialize the RL env
rl_env = RLenv()

In [20]:
if H%L>0:
    total_sessions = H//L +1
else:
    total_sessions = H//L

In [22]:
while current_episode<H:
    current_session += 1
    if current_episode > (H - L - 1):
        current_list = H%current_episode
    else:
        current_list = L
    print(f"Session {current_session} of {total_sessions}")    
    print("Current list length: ",current_list)
    
    # Initialize new session
    session_ad_list, session_max_clicks = rl_env.newSession()
    
    # Create the explore model
    explore_model = get_model()
    explore_model.set_weights(model.get_weights())
    for layer in explore_model.trainable_weights:
        noise = tf.multiply(tf.multiply(alpha,tf.random.uniform(shape=layer.shape,minval=-1., maxval=1.)),layer)
        layer.assign_add(noise)
    
    # Add base, target and explore model scoring to the session ad list
    features = [
        'displayurl',
        'adid',
        'advertiserid',
        'position',
        'depth',
        'keywordid',
        'titleid',
        'descriptionid',
        'queryid',
        'userid'
    ]
    session_ad_ds = preprocess(session_ad_list)
    session_ad_list['base_score'] = model.predict(session_ad_ds, verbose=False)
    session_ad_list['explore_score'] = explore_model.predict(session_ad_ds, verbose=False)
    session_ad_list['target_score'] = target_model.predict(session_ad_ds, verbose=False)
    
    actions = []
    clicks = []
    Q_futures = []
    selection_models = []
    list_data = pd.DataFrame(columns=features)
    for pos in range(current_list):
        Q_future = session_ad_list[session_ad_list.base_score == session_ad_list.base_score.max()].head(1).target_score.values[0]
        Q_futures.append(Q_future)
        selection_model = np.random.choice(['base','explore'],size=1)[0]
        selection_models.append(selection_model)
        if selection_model == 'base':
            next_action = session_ad_list[session_ad_list.base_score == session_ad_list.base_score.max()].head(1).index.values[0]
        else:
            next_action = session_ad_list[session_ad_list.explore_score == session_ad_list.explore_score.max()].head(1).index.values[0]
        actions.append(next_action)
        list_data.loc[pos] = session_ad_list.loc[next_action]
        click = rl_env.showAd(next_action)
        clicks.append(click)
        session_ad_list = session_ad_list.loc[session_ad_list.index != next_action]
        current_episode += 1
    
    list_data['action']=actions
    list_data['click']=clicks
    list_data['Q_future']=Q_futures
    list_data['selection_model']=selection_models
    list_data = list_data.reset_index(names='list_pos')
    if list_data[list_data.selection_model == "explore"].click.mean() > list_data[list_data.selection_model == "base"].click.mean():
        model.set_weights(explore_model.get_weights())
    
    memory = pd.concat([memory,list_data],ignore_index=True)

Session 1 of 200
Current list length:  6
Session 2 of 200
Current list length:  6
Session 3 of 200
Current list length:  6
Session 4 of 200
Current list length:  6
Session 5 of 200
Current list length:  6
Session 6 of 200
Current list length:  6
Session 7 of 200
Current list length:  6
Session 8 of 200
Current list length:  6
Session 9 of 200
Current list length:  6
Session 10 of 200
Current list length:  6
Session 11 of 200
Current list length:  6
Session 12 of 200
Current list length:  6
Session 13 of 200
Current list length:  6
Session 14 of 200
Current list length:  6
Session 15 of 200
Current list length:  6
Session 16 of 200
Current list length:  6
Session 17 of 200
Current list length:  6
Session 18 of 200
Current list length:  6
Session 19 of 200
Current list length:  6
Session 20 of 200
Current list length:  6
Session 21 of 200
Current list length:  6
Session 22 of 200
Current list length:  6
Session 23 of 200
Current list length:  6
Session 24 of 200
Current list length:  6
S

KeyboardInterrupt: 

In [27]:
memory['reward'] = (1. - gamma)*memory.click + gamma*memory.click.shift(-1).fillna(0.0)

In [45]:
memory_sample = memory.sample(100)

In [56]:
pred_features = [
    'displayurl',
    'adid',
    'advertiserid',
    'position',
    'depth',
    'keywordid',
    'titleid',
    'descriptionid',
    'queryid',
    'userid'
]

In [57]:
model_input_ds = preprocess_2(memory_sample[pred_features],memory_sample.reward.to_numpy(),batch_size=100)

In [51]:
model.trainable_weights[0]

<tf.Variable 'sparse_emb_displayurl/embeddings:0' shape=(23640, 4) dtype=float32, numpy=
array([[-1.85666238e-07, -6.72157885e-11,  8.21891888e-10,
         3.20157767e-09],
       [ 2.88693584e-04, -2.97798283e-06,  1.69755745e-04,
        -1.32294135e-05],
       [ 5.84430836e-06, -6.29715942e-07, -2.14173415e-05,
         1.04697774e-05],
       ...,
       [-1.28967672e-08,  7.12399367e-12, -7.74402487e-11,
         1.45269094e-15],
       [ 1.64311124e-13,  3.25400644e-11,  1.47464560e-12,
        -1.60811035e-13],
       [ 8.82552191e-12, -2.92302424e-13,  3.29343266e-06,
        -2.44636159e-15]], dtype=float32)>

In [61]:
model.fit(model_input_ds)



<keras.src.callbacks.History at 0x7f05a34e30a0>

In [62]:
model.trainable_weights[0]

<tf.Variable 'sparse_emb_displayurl/embeddings:0' shape=(23640, 4) dtype=float32, numpy=
array([[ 3.3015866e-08,  4.8710739e-08,  2.9408472e-08, -7.6759132e-09],
       [ 1.3179771e-04, -1.8346390e-04, -4.5647437e-05, -6.3166684e-05],
       [ 6.4008855e-05, -1.3873050e-05,  5.0293769e-05,  9.1004491e-05],
       ...,
       [ 1.3352013e-07, -1.6821988e-11,  3.1733621e-10, -8.5256590e-14],
       [-1.4564137e-13, -5.1319088e-12, -1.5141150e-11, -9.5001162e-10],
       [ 1.4453264e-10,  9.2324694e-14, -2.5518762e-05,  8.1510948e-13]],
      dtype=float32)>

In [173]:
next_action = session_ad_list[session_ad_list.base_score == session_ad_list.base_score.max()].head(1).index.values[0].head()

Unnamed: 0,displayurl,adid,advertiserid,position,depth,keywordid,titleid,descriptionid,queryid,userid,base_score,explore_score,target_score
4,14340390157469404125,3126839,23777,1,1,19254486,13232,20347,19562,42487,0.048997,0.011077,0.048997
6,14340390157469404125,3126839,23777,1,1,11140198,13232,20347,19562,42487,0.048997,0.011077,0.048997
8,14340390157469404125,3126839,23777,1,1,19504124,13232,20347,19562,42487,0.048997,0.011077,0.048997
9,14340390157469404125,3126839,23777,1,1,19504086,13232,20347,19562,42487,0.048997,0.011077,0.048997
10,14340390157469404125,3126839,23777,1,1,23321680,13232,20347,19562,42487,0.048997,0.011077,0.048997


In [184]:
df = pd.DataFrame(columns=features)

In [186]:
df.head()

Unnamed: 0,displayurl,adid,advertiserid,position,depth,keywordid,titleid,descriptionid,queryid,userid


In [187]:
session_ad_list

Unnamed: 0,displayurl,adid,advertiserid,position,depth,keywordid,titleid,descriptionid,queryid,userid,base_score,explore_score,target_score
4,14340390157469404125,3126839,23777,1,1,19254486,13232,20347,19562,42487,0.048997,0.011077,0.048997
6,14340390157469404125,3126839,23777,1,1,11140198,13232,20347,19562,42487,0.048997,0.011077,0.048997
8,14340390157469404125,3126839,23777,1,1,19504124,13232,20347,19562,42487,0.048997,0.011077,0.048997
9,14340390157469404125,3126839,23777,1,1,19504086,13232,20347,19562,42487,0.048997,0.011077,0.048997
10,14340390157469404125,3126839,23777,1,1,23321680,13232,20347,19562,42487,0.048997,0.011077,0.048997


In [188]:
new_row =session_ad_list.loc[10]

In [189]:
new_row

displayurl       14340390157469404125
adid                          3126839
advertiserid                    23777
position                            1
depth                               1
keywordid                    23321680
titleid                         13232
descriptionid                   20347
queryid                         19562
userid                          42487
base_score                   0.048997
explore_score                0.011077
target_score                 0.048997
Name: 10, dtype: object

In [190]:
df.loc[0] = new_row

In [215]:
('userid' == df.columns.to_numpy()).any()

True