# Preamble

In [1]:
# Imports
## General
import numpy as np
import os
import sys
import warnings
warnings.filterwarnings('ignore')

## In order to run calculations on AWS GPU, need to explicitly specify CUDA lib directory in the environment variables
os.environ["XLA_FLAGS"]="--xla_gpu_cuda_data_dir=/home/sagemaker-user/.conda/envs/mlds_gpu"

## Data manipulation and preprocessing
import pandas as pd
import boto3
from tensorflow.keras.layers import StringLookup, Normalization

## Visualization
import matplotlib.pyplot as plt
import seaborn as sns
from IPython.display import display, Image

## Modelling
from tensorflow.keras.callbacks import CSVLogger, ModelCheckpoint, EarlyStopping
import tensorflow as tf

## Import DeepCTR code
## This is done by cloning the github repository instead of installing with pip. This is because of an incompatibility issue
## with TF 2.14 that I had to manually fix in the DeepCTR code
deepctr_path = '/home/sagemaker-user/drl-ad-personalization/DeepCTR'
if deepctr_path not in sys.path:
    sys.path.append(deepctr_path)

from deepctr.feature_column import SparseFeat, DenseFeat, get_feature_names
from deepctr.models.deepfm import DeepFM

## We want to be able to query the list of available adverts from athena, so we need a PyAthena connection
from pyathena import connect
conn = connect(s3_staging_dir='s3://mlds-final-project-bucket/athena_output/',
               region_name='eu-west-2')

2024-08-10 13:13:05.855963: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-08-10 13:13:05.856565: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-08-10 13:13:05.856580: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-08-10 13:13:06.011997: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


# Preprocessing

In [2]:
# Create lists of categorical colums for each dataset
categorical_columns = [
    'DisplayURL',
    'AdID',
    'AdvertiserID',
    'QueryID',
    'KeywordID',
    'TitleID',
    'DescriptionID',
    'UserID'
]

# Import categorical feature mappings and define stringloohup objects for each dataset
stringlookups = {}
vocab_lengths = {}
for field in categorical_columns:
    df = pd.read_csv(f'./data/kdd12/categorical_value_counts/{field}.csv')
    vocab = [elem.encode() for elem in df['field'].astype(str).to_list()]
    lookup = StringLookup(vocabulary=vocab, mask_token=None)
    stringlookups.update({field:lookup})
    vocab_lengths.update({field:len(vocab)+1})

# Define numerical feature columns
numerical_columns = [
    'Depth',
    'Position'
]
# Extract scaler dicts for all datasets
dist_stats = pd.read_csv('./data/kdd12/means_variances.csv')
scalers = {}
for i in range(len(dist_stats)):
    field = dist_stats['field'][i]
    mean = dist_stats['mean'][i]
    variance = dist_stats['variance'][i]
    scaler = Normalization(mean=mean, variance=variance)
    scaler.build((1,))
    scalers.update({field:scaler})

In [3]:
## Define feature mappings
kdd12_fixlen_feature_columns = [SparseFeat(feat.lower(), vocabulary_size=vocab_lengths[feat], embedding_dim=4) for feat in categorical_columns]\
+ [DenseFeat(feat.lower(),1) for feat in numerical_columns]

## Generate the dnn and linear feature columns
kdd12_dnn_feature_columns = kdd12_fixlen_feature_columns
kdd12_linear_feature_columns = kdd12_fixlen_feature_columns

# Load Model

In [4]:
# Define the early stopping callback
earlystopping = EarlyStopping(
    monitor='val_loss',
    patience=5,
    restore_best_weights=True,
    start_from_epoch=5
)
# Define the precision, recall and auc metrics
precision = tf.keras.metrics.Precision(thresholds=0.5,name='precision')
recall = tf.keras.metrics.Recall(thresholds=0.5,name='recall')
auc = tf.keras.metrics.AUC(name='auc')

In [5]:
# Define function that returns compiled model
def get_model(
    dnn_hidden_units=[400,300,200],
    dnn_dropout=0.6,
    l2_reg_dnn=0.005,
    l2_reg_linear = 0.005,
    l2_reg_embedding=0.005,
    dnn_use_bn=True
):
    model = DeepFM(
        kdd12_linear_feature_columns,
        kdd12_dnn_feature_columns,
        dnn_hidden_units=dnn_hidden_units,
        dnn_dropout=dnn_dropout,
        l2_reg_dnn=l2_reg_dnn,
        l2_reg_linear=l2_reg_linear,
        l2_reg_embedding=l2_reg_embedding,
        dnn_use_bn=dnn_use_bn
    )
    
    # Compile the model
    model.compile(
        "adam", 
        "binary_crossentropy", 
        metrics=[
            'binary_crossentropy',
            'binary_accuracy',
            precision,
            recall,
            auc
        ],
    )

    return model

In [6]:
# Get the model
model = get_model()

2024-08-10 13:15:20.044077: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 43113296 exceeds 10% of free system memory.
2024-08-10 13:15:20.227710: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 43113296 exceeds 10% of free system memory.
2024-08-10 13:15:20.257418: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 43113296 exceeds 10% of free system memory.
2024-08-10 13:15:20.446919: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 71459936 exceeds 10% of free system memory.
2024-08-10 13:15:20.706214: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 71459936 exceeds 10% of free system memory.


In [76]:
# Load the weights
model.load_weights('models/final_rl_model/rl_model.ckpt')

<tensorflow.python.checkpoint.checkpoint.CheckpointLoadStatus at 0x7f02791cef20>

In [77]:
target_model = get_model()
target_model.set_weights(model.get_weights())

In [78]:
# Create the explore model
explore_model = get_model()
explore_model.set_weights(model.get_weights())

alpha = 1.
for layer in explore_model.trainable_weights:
    noise = tf.multiply(tf.multiply(alpha,tf.random.uniform(shape=layer.shape,minval=-1., maxval=1.)),layer)
    layer.assign_add(noise)

# Define Reinfocement Learning environment

In [110]:
# Define RL env object that simpulates the Ad search platform
class RLenv:
    """
    Base class for Reinforcement Learning environment that simulates the search session
    """

    def __init__(self,):
        self.session_no = -1
        self.userid = ""
        self.queryid = ""
        self.adlist = pd.DataFrame()
        self.max_clicks = 0

    def newSession(self,):
        self.session_no += 1
        query_input = pd.read_sql(f"select userid, queryid from kdd12.offline_rl_queries where rn={str(self.session_no +1)}",conn)
        self.userid = query_input["userid"].values[0]
        self.queryid = query_input["queryid"].values[0]
        ad_list_df = pd.read_sql(f"select * from kdd12.offline_rl_testing where userid='{self.userid}' and queryid='{self.queryid}'",conn)
        ad_list_df['clicks'] = ad_list_df.clicks/ad_list_df.impression
        self.max_clicks = np.where(ad_list_df.clicks>=0.5,1.0,0.0).sum()
        self.adlist = ad_list_df.drop(columns=['impression']).sort_values(by=['clicks'],ascending=[False]).reset_index(drop=True)
        return self.adlist.copy().drop(columns=['clicks']), self.max_clicks

    def showAd(self, ad_index):
        ctr = self.adlist.loc[ad_index].clicks
        if ctr>=0.5:
            ctr_reward = 1.
        else:
            ctr_reward = 0.

        # return the CTR
        return ctr_reward

# Define preprocessing function

In [40]:
# Encode and Scale the datasets
def encode_scale(element):
    out = element.copy()
    for field in categorical_columns:
        out[field.lower()] = stringlookups[field](element[field.lower()])
    for field in numerical_columns:
        out[field.lower()] = tf.squeeze(scalers[field](element[field.lower()]),axis=-1)
    return out

In [216]:
# Define function for preprocessing the ad list
def preprocess(ad_list):
    out_df = ad_list.copy()
    # If clicks is in the columns, then separate
    if ('reward' == ad_list.columns.to_numpy()).any():
        clicks = out_df.reward.to_numpy()
        out_df = out_df.drop(columns=['reward'])
    # Convert position and depth to floats
    out_df['position'] = out_df.position.astype('float32')
    out_df['depth'] = out_df.depth.astype('float32')
    # Convert to tf dataset
    if ('reward' == ad_list.columns.to_numpy()).any():
        tf_dataset = tf.data.Dataset.from_tensor_slices((dict(out_df),clicks))
    else:
        tf_dataset = tf.data.Dataset.from_tensor_slices(dict(out_df))
    # Apply categorical encoding and numerical scaling
    if ('reward' == ad_list.columns.to_numpy()).any():
        tf_dataset = tf_dataset.map(lambda x, y:encode_scale(x), y)
    else:
        tf_dataset = tf_dataset.map(encode_scale)
    # Add batch dim
    tf_dataset = tf_dataset.batch(1)
    # Clean up
    del out_df
    return tf_dataset

# Minor Update Algorithm

In [79]:
# Set hyperparameters
H = 24 # Episode Time horizon
alpha = 1.0 # Explore network noise variable
gamma = 0.1 # Future value discount
L = 6 # List Length
current_episode = 0
N = 1000 # Max dataset length
memory = pd.DataFrame()
C = 12 # Target model update


In [43]:
while current_episode <H:
    if current_episode > (H - L - 1):
        current_list = H%current_episode
    else:
        current_list = L
    print(current_list)
    current_episode += current_list

6
6
6
6


In [93]:
# Initialize the RL env
rl_env = RLenv(L=6)

In [52]:
while current_episode <H:
    if current_episode > (H - L - 1):
        current_list = H%current_episode
    else:
        current_list = L
    print("Current list length: ",current_list)
    current_episode += current_list

In [217]:
# Initialize the RL env
rl_env = RLenv()

# Initialize new session
session_ad_list, session_max_clicks = rl_env.newSession()

# Initialize explore model
for layer in explore_model.trainable_weights:
    noise = tf.multiply(tf.multiply(alpha,tf.random.uniform(shape=layer.shape,minval=-1., maxval=1.)),layer)
    layer.assign_add(noise)

# Add base, target and explore model scoring to the session ad list
features = [
    'displayurl',
    'adid',
    'advertiserid',
    'position',
    'depth',
    'keywordid',
    'titleid',
    'descriptionid',
    'queryid',
    'userid'
]
session_ad_ds = preprocess(session_ad_list)
session_ad_list['base_score'] = model.predict(session_ad_ds, verbose=False)
session_ad_list['explore_score'] = explore_model.predict(session_ad_ds, verbose=False)
session_ad_list['target_score'] = target_model.predict(session_ad_ds, verbose=False)

In [218]:
actions = []
clicks = []
Q_futures = []
selection_models = []
list_data = pd.DataFrame(columns=features)
for pos in range(current_list):
    Q_future = session_ad_list[session_ad_list.base_score == session_ad_list.base_score.max()].head(1).target_score.values[0]
    Q_futures.append(Q_future)
    selection_model = np.random.choice(['base','explore'],size=1)[0]
    selection_models.append(selection_model)
    if selection_model == 'base':
        next_action = session_ad_list[session_ad_list.base_score == session_ad_list.base_score.max()].head(1).index.values[0]
    else:
        next_action = session_ad_list[session_ad_list.explore_score == session_ad_list.explore_score.max()].head(1).index.values[0]
    actions.append(next_action)
    list_data.loc[pos] = session_ad_list.loc[next_action]
    click = rl_env.showAd(next_action)
    clicks.append(click)
    session_ad_list = session_ad_list.loc[session_ad_list.index != next_action]

list_data['action']=actions
list_data['click']=clicks
list_data['Q_future']=Q_futures
list_data['selection_model']=selection_models
list_data = list_data.reset_index(names='list_pos')
if list_data[list_data.selection_model == "explore"].click.mean() > list_data[list_data.selection_model == "base"].click.mean():
    model.set_weights(explore_model.get_weights())

In [219]:
memory = pd.concat([memory,list_data],ignore_index=True)

In [220]:
memory.sample(2)

Unnamed: 0,list_pos,displayurl,adid,advertiserid,position,depth,keywordid,titleid,descriptionid,queryid,userid,action,click,Q_future,selection_model
8,2,14340390157469404125,3126839,23777,1,1,19504146,13232,20347,19562,42487,0,1.0,0.048997,explore
0,0,14340390157469404125,3126839,23777,1,1,1027972,13232,20347,19562,42487,9,0.0,0.061145,base


In [113]:
ctr_reward = rl_env.showAd(next_action)

In [114]:
ctr_reward

0.0

In [173]:
next_action = session_ad_list[session_ad_list.base_score == session_ad_list.base_score.max()].head(1).index.values[0].head()

Unnamed: 0,displayurl,adid,advertiserid,position,depth,keywordid,titleid,descriptionid,queryid,userid,base_score,explore_score,target_score
4,14340390157469404125,3126839,23777,1,1,19254486,13232,20347,19562,42487,0.048997,0.011077,0.048997
6,14340390157469404125,3126839,23777,1,1,11140198,13232,20347,19562,42487,0.048997,0.011077,0.048997
8,14340390157469404125,3126839,23777,1,1,19504124,13232,20347,19562,42487,0.048997,0.011077,0.048997
9,14340390157469404125,3126839,23777,1,1,19504086,13232,20347,19562,42487,0.048997,0.011077,0.048997
10,14340390157469404125,3126839,23777,1,1,23321680,13232,20347,19562,42487,0.048997,0.011077,0.048997


In [184]:
df = pd.DataFrame(columns=features)

In [186]:
df.head()

Unnamed: 0,displayurl,adid,advertiserid,position,depth,keywordid,titleid,descriptionid,queryid,userid


In [187]:
session_ad_list

Unnamed: 0,displayurl,adid,advertiserid,position,depth,keywordid,titleid,descriptionid,queryid,userid,base_score,explore_score,target_score
4,14340390157469404125,3126839,23777,1,1,19254486,13232,20347,19562,42487,0.048997,0.011077,0.048997
6,14340390157469404125,3126839,23777,1,1,11140198,13232,20347,19562,42487,0.048997,0.011077,0.048997
8,14340390157469404125,3126839,23777,1,1,19504124,13232,20347,19562,42487,0.048997,0.011077,0.048997
9,14340390157469404125,3126839,23777,1,1,19504086,13232,20347,19562,42487,0.048997,0.011077,0.048997
10,14340390157469404125,3126839,23777,1,1,23321680,13232,20347,19562,42487,0.048997,0.011077,0.048997


In [188]:
new_row =session_ad_list.loc[10]

In [189]:
new_row

displayurl       14340390157469404125
adid                          3126839
advertiserid                    23777
position                            1
depth                               1
keywordid                    23321680
titleid                         13232
descriptionid                   20347
queryid                         19562
userid                          42487
base_score                   0.048997
explore_score                0.011077
target_score                 0.048997
Name: 10, dtype: object

In [190]:
df.loc[0] = new_row

In [215]:
('userid' == df.columns.to_numpy()).any()

True