## Comparaison des algorithmes de l'échantillonnage de Thompson, l'Epsilon Greedy et l'A/B test pour la sélection des paramètres des bannières contenant des messages publics

### Chargement des libraries

In [1]:
import pandas as pd
import numpy as np
from scipy.stats import beta
import plotly.graph_objs as go
import copy
import random

### Création d'une classe des bannières (messages)

In [2]:
class banner_class():
    def __init__(self,Id,p):
        # probabilité de gain d'une bannière
        self.p = p
        # statistiques et caractéristiques d'une bannière
        self.stat = {'id': Id,'a':0,'b':0,'conv':0,'p':self.p}
        # nombre de victoires, nombre total de tests
        self.a = 0
        self.b = 0

        
    # fonction de collecte de statistiques
    def get_stat(self,value):
        self.stat['b'] += 1
        if value == 1:
            self.stat['a'] +=1
        self.stat['conv'] = self.stat['a'] / self.stat['b']

        
    # fonction de simulation de conversion
    def conversion(self):
        if self.p > random.random():
            result = 1
        else:
            result = 0
        self.get_stat(result)

### Création d'une classe de générateur de bannières

In [3]:
class banners_generator():

    def __init__(self,a, b):
        global random
        self.a = a
        self.b = b
        

    def generate(self,n):
        banners = [banner_class(i,random.uniform(self.a, self.b)) for i in range(n)]
        return banners

### Création d'une classe pour l'algorithme l'Epsilon-Greedy

In [4]:
class EpsilonGreedy():
    def __init__(self, data, n, epsilon = 0.15):
        self.epsilon = epsilon # probabilité d'exploration
        self.data = data
        self.n = n
        self.rc = [i for i in range(len(data))]
    

    # fonction qui renvoie la bannière avec la conversion la plus élevée
    def get_conv(self):
        l = []
        for i in self.data:
            l.append(i.stat['conv'])
        return np.argmax(l)


    def start_test(self):
        for _ in range(self.n):
            if random.random() > self.epsilon:
                self.data[self.get_conv()].conversion()
            else:
                self.data[random.choice(self.rc)].conversion()
        return self.get_result()

    
    def get_result(self):
        sort_data = sorted(self.data, key=lambda d: d.stat['conv'],reverse=True)
        sort_data  = pd.DataFrame([sort_data[i].stat for i in range(len(sort_data))])
        return pd.DataFrame([i.stat for i in self.data])

### Création d'une classe pour l'algorithme de l'échantillonnage de Thompson

In [5]:
class thompson_sampling():    


    def __init__(self,data,l,r,n):
        self.length = len(data)
        self.data = data
        self.win_index = None
        self.win_list = []
        self.l = l
        self.r = r
        self.z = 0.0001
        self.n = n
        

    # fonction qui simule un test d'une bannière
    # la partie principale de l'algorithme
    def do_sample(self):
        # ajoute d'un gagnant à la liste des gagnants
        self.win_list.append(self.win_index)
        # impression d'une bannière
        self.data[self.win_index].conversion()
        # affinage des distributions et affichage du gagnant
        self.conv_list = []
        # création d'un nombre aléatoire pour chacune des distributions
        # puis le choix du maximum
        for i in range(self.length):
            conv = random.betavariate(1 + self.data[i].stat['a'], 
                                      1+ self.data[i].stat['b'])   
            self.conv_list.append(conv)
        self.win_index = np.argmax(self.conv_list)


    # création d'une fonction pour visualiser les indicateurs
    def print_data(self):
        result = [self.data[i].stat for i in range(self.length)]
        return pd.DataFrame(result)


    # création de la couleur pour une visualisation ultérieure
    def create_color(self):
        colors = []
        for _ in range(self.length):
            r1 = np.random.randint(0,255)
            r2 = np.random.randint(0,255)
            r3 = np.random.randint(0,255)   
            color = f'rgba({r1},{r2},{r3},.3)'
            colors.append(color)
        return colors


    # cette fonction est nécessaire pour obtenir les caractéristiques de distribution
    # elle sera utilisé à la fois pour la visualisation et les tests
    def get_distrib(self):
        x = np.arange(self.l,self.r,self.z)
        y_list = []
        self.quantiles = []
        for i in range(self.length):

            Id = self.data[i].stat['id']
            y = beta.pdf(x,1 + self.data[i].stat['a'],
                           1 + self.data[i].stat['b'])
            y_list.append(y)
        return y_list


    def start_test(self,plot = False):
        self.win_index = np.random.randint(0,self.length,1)[0]
        # réalisation d'un premier jeu pour obtenir une distribution initiale
        self.do_sample()
        self.get_distrib()
        # commencement de l'échantillonnage
        for _ in range(self.n):
            self.do_sample()
        # obtention des distributions y
        y_list = self.get_distrib()
        # obtention des couleurs pour les distributions
        colors = self.create_color()
        x = np.arange(self.l,self.r,self.z)
        fig = go.Figure(data=[go.Scatter(x = x,
                        y = y_list[a],
                        marker = dict(color=(colors[a])),
                        fillcolor = colors[a],
                        fill='tozeroy') for a in range(self.length)])
        fig.update_layout(
        title="Conversion Distribution",
        xaxis_title="Conversion value",
        yaxis_title="Probability Density",
        template='plotly_white')
        if plot == True:
            fig.show()
        else:
            pass
        return self.get_result()


    # obtention des données de test
    def get_result(self):
        sort_data = sorted(self.data, key = lambda d: d.stat['conv'],reverse=True)
        sort_data  = pd.DataFrame([sort_data[i].stat for i in range(len(sort_data))])
        return pd.DataFrame([i.stat for i in self.data])

### Création d'une classe pour l'A/B test

In [6]:
class ab_test_model():


    def __init__(self,data,n):
        self.data = data
        self.n = n // len(self.data)


    def start_test(self):
        for d in range(len(self.data)):
            for _ in range(self.n):
                self.data[d].conversion()
        return self.get_result()


    def get_result(self):
        sort_data = sorted(self.data, key=lambda d: d.stat['conv'],reverse=True)
        sort_data  = pd.DataFrame([sort_data[i].stat for i in range(len(sort_data))])
        return pd.DataFrame([i.stat for i in self.data])

### Création d'une classe pour la simulation

In [7]:
class test_algo():

    def __init__(self,models,l,r):
        self.models = models
        self.l = l
        self.r = r
        
    def compare(self,query):
        statistic = []
        log = []
        for q in query:
            n,num_ban,attempt = q

            # création d'un dictionnaire avec des valeurs de test
            results = {}
            for model in models:
                results[model.__name__] = 0

            # liste de stockage des résultats
            for _ in range(attempt):
                # génération des données pour chaque test
                data = banners_generator(self.l, self.r).generate(num_ban)
                # itération sur les modèles
                for model in self.models:
                    copy_data = copy.deepcopy(data)

                    if model.__name__ == 'thompson_sampling':
                        result = model(copy_data, self.l, self.r, n).start_test()
                        results[model.__name__] += np.sum(result["a"])

                    elif model.__name__ == 'EpsilonGreedy':
                        result = model(copy_data, n, 0.15).start_test()
                        results[model.__name__] += np.sum(result["a"])

                    else:
                        result = model(copy_data, n).start_test()
                        results[model.__name__] += np.sum(result["a"])

            # transformation des résultats en un tableau de données
            results = pd.DataFrame([results]).T.sort_values(0,ascending=False)

            log.append(f'Query: n={n}, num_banners={num_ban}, attempt={attempt}, winner - {results.index[0]}')
            print(log[-1])

            statistic.append(results)
        display(self.stat(statistic))

    def stat(self,data):
        ft = pd.DataFrame()

        for i in data:
            ft = ft.append(i)
        
        ft = ft.reset_index()
        ft = ft.groupby('index').sum()
        ft = ft.sort_values(by = 0,ascending=False)
        ft = ft.reset_index()

        gap = np.array(ft[0])
        for i in range(1,len(gap)):
            gap[i] = gap[0] - gap[i]
        gap = gap / gap[0]
        gap[0] = 0
        ft['Difference_from_the_winner'] = gap

        ft.columns = ['Algorithm','number_of_wins','Difference_from_the_winner']
        return ft


### Création d'une fonction pour la démonstration des résultats des tests

In [8]:
def create_report(data):
    data = data.iloc[:,[1,2]]
    banners=[f'message {i}' for i in range(len(data))]

    fig = go.Figure(data=[ 
        go.Bar(name='number of wins (conversion)', x=banners, y=data['a'], marker_color='indianred'),
        go.Bar(name='total number of tests', x=banners, y=data['b'], marker_color='lightsalmon'),
    ])
    fig.update_layout(barmode='group', template='plotly_white')
    fig.show()

## Simulation des algorithmes sur les données générées

In [9]:
models = [ab_test_model,EpsilonGreedy,thompson_sampling]

test = test_algo(models, 0.03, 0.5)

test.compare([
    [10000,10,1],
    [35000,30,1],
    [50000,50,1]
])

Query: n=10000, num_banners=10, attempt=1, winner - thompson_sampling
Query: n=35000, num_banners=30, attempt=1, winner - thompson_sampling
Query: n=50000, num_banners=50, attempt=1, winner - thompson_sampling



The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.



Unnamed: 0,Algorithm,number_of_wins,Difference_from_the_winner
0,thompson_sampling,43145,0.0
1,EpsilonGreedy,42333,0.01882
2,ab_test_model,26293,0.39059


### Echantillonnage de Thompson

In [10]:
from copy import deepcopy

In [27]:
l = 0.03
r = 0.5
data = banners_generator(l, r).generate(5)

In [28]:
data_ts = deepcopy(data)
ts = thompson_sampling(data_ts,l,r,300)
result_ts = ts.start_test(plot = True)

In [29]:
result_ts

Unnamed: 0,id,a,b,conv,p
0,0,2,18,0.111111,0.176702
1,1,1,16,0.0625,0.23665
2,2,97,221,0.438914,0.417503
3,3,0,11,0.0,0.14227
4,4,8,35,0.228571,0.229105


In [30]:
create_report(result_ts)

### Epsilon-Greedy

In [31]:
data_g = deepcopy(data)
result_g = EpsilonGreedy(data_g, 300, 0.15).start_test()
result_g

Unnamed: 0,id,a,b,conv,p
0,0,2,21,0.095238,0.176702
1,1,45,221,0.20362,0.23665
2,2,1,6,0.166667,0.417503
3,3,0,8,0.0,0.14227
4,4,6,44,0.136364,0.229105


In [32]:
create_report(result_g)

### A/B test

In [33]:
data_ab = deepcopy(data)

result_ab = ab_test_model(data_ab, 500).start_test()
result_ab

Unnamed: 0,id,a,b,conv,p
0,0,24,100,0.24,0.176702
1,1,25,100,0.25,0.23665
2,2,44,100,0.44,0.417503
3,3,14,100,0.14,0.14227
4,4,18,100,0.18,0.229105


In [34]:
create_report(result_ab)