In [1]:
import os
import sys
import tqdm
import random
import numpy as np
import pandas as pd
from skopt import BayesSearchCV
from sklearn.base import BaseEstimator
from sklearn.metrics import mean_squared_error

sys.path.append('.')
sys.path.append('../../')
sys.path.append('../network_model/')

notebook_path = os.getcwd()
project_path = os.path.abspath(os.path.join(notebook_path, '..'))
sys.path.append(project_path)

from network_model.network_classes import Network
from networks_container import NetworksContainer
from estimator import Estimator

In [2]:
endorsers_percentage_list = pd.read_csv("../datasets/palin_endorsers_hours_percentage_list.csv", dtype=int, header=None)
endorsers_percentage_list = endorsers_percentage_list.values.tolist()
endorsers_percentage_list = [item for sublist in endorsers_percentage_list for item in sublist]

In [3]:
print(len(endorsers_percentage_list))

3634


In [4]:
# Tolgo 1 in quanto considero la prima epoca come lo stato iniziale
epochs = len(endorsers_percentage_list) - 1

In [5]:
df = pd.DataFrame(columns=['alpha','beta','homophily','prob_infection','prob_vaccination',
                           'prob_cure','prob_influencer','prob_echo','rmse'])

In [6]:
search_spaces = {
    'prob_influencer': (0.0, 1.0),
    'prob_infection': (0.0, 1.0),
    'prob_vaccination': (0.0, 1.0),
    'prob_cure': (0.0, 1.0),
    'prob_echo': (0.0, 1.0),
}

best_params = []
best_rmse = 1000

In [7]:
# Funzione per salvare i migliori parametri e il miglior RMSE a ogni passo di inferenza
def on_step_callback(result):
    global best_rmse, best_params
    length = len(result.func_vals)
    last_val = result.func_vals[length-1]
    if last_val < best_rmse:
        best_rmse = last_val
        best_params = result.x

In [8]:
# Funzione per verificare se i valori di alpha e beta sono corretti
def check_values(alpha, beta):
    if  alpha + beta > 1.0:
        return False
    # Con alpha = 0 e beta = 1 non vengono aggiunti nuovi nodi
    if alpha == 0 and beta == 1:
        return False
    return True

In [9]:
def run_simulations(alpha, beta, homophily):
    #print("Entrata simulazione")
    container = NetworksContainer(n_networks=3, n_nodes=50, 
                                  alpha=alpha, beta=beta, 
                                  delta_in=1, delta_out=1, 
                                  homophily=homophily, n_commons=45, 
                                  n_influencers=5, n_bots=0, n_fact_checkers=0,
                                  prob_complaint=0.1, 
                                  prob_infection=0.5, 
                                  prob_vaccination=0.1,
                                  prob_cure=0.1, 
                                  prob_influencer=0.1, 
                                  exp_decay=True, user_block=False, 
                                  prob_echo=0.0, epochs=epochs)
    container.create_network_list(n_initial_infected_nodes=5)
    #print("Finita creazione reti")
    estimator = Estimator()
    clf = BayesSearchCV(estimator, 
                        search_spaces=search_spaces,
                        cv=[(slice(None), slice(None))], # per evitare la cross-validation
                        verbose=0,
                        n_iter=10)
    #print("Inizio fit")
    clf.fit(np.array([container]), 
            np.array([endorsers_percentage_list]),
            callback=on_step_callback)
    #print("Fine fit")
    best_prob_cure = best_params[0]
    best_prob_echo = best_params[1]
    best_prob_influencer = best_params[2]
    best_prob_infection = best_params[3]
    best_prob_vaccination = best_params[4]
    
    return best_prob_infection, best_prob_vaccination, best_prob_cure, best_prob_influencer, best_prob_echo

In [10]:
values_alpha_beta = [0.0, 0.25, 0.33, 0.50, 0.75, 1.0]
values_homophily = [0.25, 0.5, 0.75]

In [11]:
for i in tqdm.tqdm((range(len(values_alpha_beta)))):
    alpha = values_alpha_beta[i]
    for j in tqdm.tqdm(range(len(values_alpha_beta))):
        beta = values_alpha_beta[j]
        checked_values = check_values(alpha, beta)
        if checked_values:
            for k in tqdm.tqdm(range(len(values_homophily))):
                homophily = values_homophily[k]
                prob_infection, prob_vaccination, prob_cure, prob_influencer, prob_echo = run_simulations(alpha, beta, homophily)
                #print("Finita simulazione")
                row = {'alpha':alpha,'beta':beta,
                       'homophily':homophily,
                       'prob_infection':prob_infection,
                       'prob_vaccination':prob_vaccination,
                       'prob_cure':prob_cure,
                       'prob_influencer':prob_influencer,
                       'prob_echo':prob_echo,
                       'rmse':best_rmse}
                row = pd.DataFrame([row])
                df = pd.concat([df, row], ignore_index=True)
                df.to_csv("parameters_optimization.csv", index=False)
                best_params = []
                best_rmse = 1000

  0%|          | 0/6 [00:00<?, ?it/s]
[A
[A
[A
100%|██████████| 3/3 [00:29<00:00,  9.88s/it]

[A
[A
[A
100%|██████████| 3/3 [00:33<00:00, 11.17s/it]

[A
[A
[A
100%|██████████| 3/3 [00:32<00:00, 10.93s/it]

[A
[A
[A
100%|██████████| 3/3 [00:36<00:00, 12.03s/it]

[A
[A
[A
100%|██████████| 3/3 [00:44<00:00, 14.86s/it]
100%|██████████| 6/6 [02:56<00:00, 29.44s/it]
 17%|█▋        | 1/6 [02:56<14:43, 176.62s/it]
[A
[A
[A
100%|██████████| 3/3 [00:29<00:00,  9.68s/it]

[A
[A
[A
100%|██████████| 3/3 [00:32<00:00, 10.85s/it]

[A
[A
[A
100%|██████████| 3/3 [00:31<00:00, 10.36s/it]

[A
[A
[A
100%|██████████| 3/3 [00:35<00:00, 11.82s/it]

[A
[A
[A
100%|██████████| 3/3 [00:46<00:00, 15.37s/it]
100%|██████████| 6/6 [02:54<00:00, 29.04s/it]
 33%|███▎      | 2/6 [05:50<11:40, 175.24s/it]
[A
[A
[A
100%|██████████| 3/3 [00:29<00:00,  9.75s/it]

[A
[A
[A
100%|██████████| 3/3 [00:31<00:00, 10.48s/it]

[A
[A
[A
100%|██████████| 3/3 [00:31<00:00, 10.54s/it]

[A
[A
[A