### Libraries Requirements

In [1201]:
# !pip3 install -r requirements.txt

In [1202]:
import pandas as pd 
import numpy as np 
import random
import json
from copy import deepcopy

import seaborn as sns
import matplotlib.pyplot as plt
from ipywidgets import interact, IntSlider

import pickle
from sklearn.metrics import confusion_matrix, accuracy_score, f1_score, precision_score

import warnings
warnings.filterwarnings("ignore")

In [1203]:
def prepare_data(data, data_info, shuffle=True):
    """Preprocesses the input data.

    Args:
        data (list): List of pandas DataFrames to be concatenated.
        data_info (dict): Information about the data setup.
        shuffle (bool, optional): Whether to shuffle the data. Defaults to True.

    Returns:
        pandas.DataFrame: Preprocessed dataset.
    """
    df = pd.concat([df for df in data])
    ad_sets = df[data_info["groups"]].unique()
    np.random.shuffle(ad_sets)

    if shuffle:
        dataset = pd.DataFrame(columns=df.columns)
        for ad in ad_sets:
            ad_df = df[df[data_info["groups"]] == ad].sort_values(data_info["sort_by"])
            dataset = pd.concat([dataset, ad_df])

        dtypes = df.dtypes
        dataset = dataset.astype(dtypes)
    else:
        dataset = df.copy()

    dataset = dataset.reset_index(drop=True)
    return dataset

def load_model_parameters(load_path="model_parameters.pkl"):
    """Load trained model parameters from a file."""
    with open(load_path, "rb") as file:
        models = pickle.load(file)
    return models

In [1204]:
with open('config.json', 'r') as f:
    config_data = json.load(f)

PROCESSING_INFO = config_data['PROCESSING_INFO']
TRAIN_INFO = config_data['TRAIN_INFO']

In [1205]:
PATHS = [
    "test_15-17.02.2024.csv"
]

DATA = [pd.read_csv(f"./processed/{path}") for path in PATHS]
MODEL = load_model_parameters()["Gradient Boosting"]

In [1206]:
class Action:
    """Represents an action with successes and failures, supporting Bayesian updates."""
    def __init__(self, successes=1, failures=1):
        self.successes = successes
        self.failures = failures

    def sample(self, num_samples=1):
        """Samples from the Beta distribution defined by the action's successes and failures."""
        beta_samples = np.random.beta(self.successes, self.failures, size=num_samples)
        if num_samples == 1:
            return beta_samples[0]    
        return beta_samples

    def update(self, success, failure):
        """Updates the action's successes and failures."""
        self.successes += success
        self.failures += failure

    def get_parameters(self):
        """Returns the current successes and failures."""
        return self.successes, self.failures

    def __str__(self):
        """Returns a string representation of the action."""
        return f"Action(s={self.successes}, f={self.failures})"


class AdSet:
    """Represents a set of advertisement data with associated Action instance."""
    def __init__(self, successful, data, action):
        self.data = data.reset_index(drop=True)
        self._successful = successful
        self._record = 0
        self.action = action

    def get_record(self):
        """Retrieves the current advertisement metrics record for adset performance evaluation."""
        record = self.data.iloc[self._record, :]
        self._record += 1
        return record

    def __str__(self):
        """Provides a string representation of the AdSet"""
        return f"AdSet(successful={self._successful}, record={self._record})"


In [1207]:
def binarize(record, model=MODEL, train_info=TRAIN_INFO):
    """
    Converts a record or multiple records into a binary format based on the model's prediction.
    
    Args:
        record (pd.Series or pd.DataFrame): The input data to be binarized. 
        Can be a single record (pd.Series) or multiple records (pd.DataFrame).
        model (Model): The predictive model used for binarization. Must have a predict method that accepts numpy arrays.
        train_info (dict): A dictionary containing training information. 
        Must include a key "train_fields" that specifies the fields in the record to be used by the model.
    
    Returns:
        int or np.ndarray: The predicted binary outcome(s).
    """
    if record.ndim == 1:
        context = record.loc[train_info["train_fields"]].to_numpy().reshape(1, -1)
        return model.predict(context)[0]
    elif record.ndim == 2:
        context = record.loc[:, train_info["train_fields"]].to_numpy()
        return model.predict(context)   
    else:
        return None

In [1208]:
data = prepare_data(DATA, TRAIN_INFO)

TEST_FIELD = TRAIN_INFO["label_field"]
GROUPS = TRAIN_INFO["groups"]

good = random.sample(sorted(data[data[TEST_FIELD] == 1][GROUPS].unique()), data[data[TEST_FIELD] == 1][GROUPS].nunique())
bad = random.sample(sorted(data[data[TEST_FIELD] == 0][GROUPS].unique()), data[data[TEST_FIELD] == 0][GROUPS].nunique())

max_timestamps = data.groupby(GROUPS)["hour"].count().max()

adsets = []

for ad in bad:
    adset = AdSet(0, data[data[GROUPS] == ad], Action())
    adsets.append(adset)

for ad in good:
    adset = AdSet(1, data[data[GROUPS] == ad], Action())
    adsets.append(adset)

history = {}

for timestamp in range(max_timestamps):

    history[timestamp] = deepcopy(adsets)

    for ad in adsets:
        record = ad.get_record()
        reward = binarize(record)
        ad.action.update(reward, 1 - reward)

In [1211]:
# Additional Methods for visualization
def plot_distribution(timestamp, adsets):
    plt.figure(figsize=(10, 6))
    plt.title(f'Distribution Plot at hour # {timestamp} (red - "successful", blue - "unsuccessful")')
    
    for i, ad in enumerate(adsets):
        samples = ad.action.sample(10000)
        sns.kdeplot(samples, fill=True, label=f'Adset {ad}', color='red' if ad._successful == 0 else "blue")
    
    plt.xlabel('Success Probability')
    plt.ylabel('Density')
    plt.xlim(0, 1)
    plt.tight_layout()  
    plt.show()

def interactive_plot(timestamp):
    adsets_ = history[timestamp]
    plot_distribution(timestamp, adsets_)

interact(interactive_plot, timestamp=IntSlider(min=0, max=max_timestamps-1, step=1, value=0))

interactive(children=(IntSlider(value=0, description='timestamp', max=71), Output()), _dom_classes=('widget-inâ€¦

<function __main__.interactive_plot(timestamp)>

In [1216]:
y_true = []
y_pred = []

for ad in adsets:
    alpha = ad.action.successes
    beta = ad.action.failures
    mu = alpha / (alpha + beta)
    value = 1 if mu > 0.7 else 0
    
    y_true.append(ad._successful)
    y_pred.append(value)

print("precision_score:\t",  f"{precision_score(y_true, y_pred):.2%}")
print("accuracy_score:\t\t", f"{accuracy_score(y_true, y_pred):.2%}")
print("f1_score:\t\t",       f"{f1_score(y_true, y_pred):.2%}")
print("confusion_matrix:\n", confusion_matrix(y_true, y_pred))

precision_score:	 100.00%
accuracy_score:		 97.67%
f1_score:		 97.14%
confusion_matrix:
 [[25  0]
 [ 1 17]]
