# Generating, Updating Networks and Polarization Experimentation

## The Imports

In [None]:
from src.classes.network import RandomNetwork, ScaleFreeNetwork
from src.classes.node import Node
from src.experimentation import generate_networks, read_and_load_networks, multiple_correlations_par
from src.viusalization import plot_cascade_animation, test_significance, calculate_average_per_gamma, plot_cascades_gamma, plot_cascade_dist_average, plot_cascade_power_law
from collections import defaultdict
from collections import defaultdict
import numpy as np


## Global Values

In [2]:
correlations = np.linspace(-1, 1, 11)
correlations = np.round(correlations, 1)
initial_seeds = np.linspace(13, 1600, 11)
num_runs = 30
num_nodes = 200
update_fraction = 0.1
average_degree = 8
starting_distribution = 0.5     # L / R ratio (niet per se nodig maar kan misschien leuk zijn om te varieern)
p = average_degree/(num_nodes-1) 
updates = 300000
m = 4

# vary between random and scale_free
# what_net = "random"
what_net = "scale_free"

## Exemplatory Network Generation

Generating and updating ScaleFreeNetwork

In [3]:
# # Gebruik plot=True om begin plot van distributie te zien
# network = ScaleFreeNetwork(m=m, plot=True)

# for round in range(10000):
#     network.update_round()

# # Gebruik deze functie om de distributie te plotten op het einde
# network.verify_scale_free_distribution(plot=True)

Generating and Updating RandomNetwork

In [4]:
# IPV Network() aan te roepen doe je nu RandomNetwork() of ScaleFreeNetwork()
# # Je kunt rustig dezelfde argumenten meegeven als bij Network() zoals p=0.1, k=8. Als dit wordt leeggelaaten worden standaard waarden gebruikt.

# random_network = RandomNetwork()
# for round in range(10000):
#     random_network.update_round()

## Developing and saving network
This function generates a network (scale-free or random), performs the specified number of updates and reads it out to a .txt file for easy further experimentation. This is done in a parallelized fashion, though it still can take up to 2/3 hours. 

In [None]:
# # dummy values
# num_runs = 10
# updates=1000
# # scale-free
# generate_networks(correlations, initial_seeds, num_nodes=num_nodes, iterations=updates, how_many=num_runs, update_fraction=update_fraction, starting_distribution=starting_distribution, p=p, network_sort=what_net, m=m)

 ## Reading in and generating Network

These function reads in the networks from the .txt in which they were saved. As the network is fully seeded and thus reproducible, the network can be resimulated with the correct seed and connections. 

As a check for the validity of the networks, the below test boolean can be set True. This check takes a ~5 minutes. 

In [6]:
# # # dummy values
# # num_runs = 5
# # updates=1000

# this test only works if the read in network is exactly the same as the generated networks, so check the paths before running!!
test=False


# Read in the network and save it in a datastructure
# all_networks = read_and_load_networks(num_runs, num_nodes, update_fraction, average_degree, starting_distribution, correlations, whichtype=what_net)
all_networks = read_and_load_networks(num_runs, num_nodes, update_fraction, average_degree, starting_distribution, correlations, whichtype="random")

# test for consistency of the saved network
if test:
    used_seed = int(initial_seeds[0])
    if what_net == "scale_free":
        test_network = ScaleFreeNetwork(num_nodes=num_nodes, m=m, mean=0, correlation=-1.0, update_fraction=update_fraction, starting_distribution=starting_distribution, seed=used_seed)  
    else: 
        test_network = RandomNetwork(num_nodes=num_nodes, mean=0, correlation=-1.0, update_fraction=update_fraction, starting_distribution=starting_distribution, seed=used_seed, p=p)
    number_of_alterations = 0


    assert set([(conn[0].ID, conn[1].ID) for conn in all_networks[(-1.0, 0)][0].connections]) == set([(conn[0].ID, conn[1].ID) for conn in test_network.connections]), "The networks that are generated should be the same at the start"

    for _ in range(updates):
        test_network.update_round()
        number_of_alterations += test_network.alterations
        test_network.clean_network()    
        
    assert set([(conn[0].ID, conn[1].ID) for conn in all_networks[(-1.0, 0)][1].connections]) == set([(conn[0].ID, conn[1].ID) for conn in test_network.connections]), "The networks that are generated should be the same at the end"



# Experimentation (Cascades, Assortativity, Social Ties)

## Calculating the cascades (parallelized implementation)
Reading in network and saving everything in datastructures, structured by size and correlation value. 


In [None]:

# datastructures for scale-free
cascades_before = defaultdict(lambda: defaultdict(list))
cascades_after = defaultdict(lambda: defaultdict(list))
cascades_before_averaged_sf = defaultdict(lambda: defaultdict(list))
cascades_after_averaged_sf = defaultdict(lambda: defaultdict(list))
save=True
sizes = defaultdict()
sizes_averaged = defaultdict()

# datatsturctures for random network
cascades_before_averaged_rand = defaultdict(lambda: defaultdict(list))
cascades_after_averaged_rand = defaultdict(lambda: defaultdict(list))
sizes_averaged_rand = defaultdict()




# run the cascades for different correlations (for both the initial and updated network), saving the cascade polarizations and cascade sizes in a dictionary
for corr in correlations: 
    print(f"starting experimentation for correlation: {corr}")
    print("-----------------------------------------------")

    # reads in the scale free networks (30 networks per correlation value) and runs 10 000 cascades per network
    (before_after, before_after_averaged, largest_sizes) = multiple_correlations_par(corr, num_runs, num_nodes, update_fraction, average_degree, starting_distribution,what_net)
    (collection_of_all_before, collection_of_all_after) = before_after
    (coll_of_all_before_averaged, coll_of_all_after_averaged) = before_after_averaged
    (largest_size_of_all, largest_size_of_all_averaged) = largest_sizes

    # contains a dictionary with the correlation as key, and dictionary as value.
    # containing the sizes and number of times size is observed as value (averaged over 30 runs)
    # in general, average cascade size per sampled indivudual and the average polarization of this cascade is saved for the metric
    sizes_averaged[corr] = largest_size_of_all_averaged
    cascades_before_averaged_sf[corr] = coll_of_all_before_averaged
    cascades_after_averaged_sf[corr] = coll_of_all_after_averaged

# repeat experiments for the scale-free
for corr in correlations: 
    print(f"starting experimentation for correlation: {corr} (random)")
    print("-----------------------------------------------")

    (before_after, before_after_averaged, largest_sizes) = multiple_correlations_par(corr, num_runs, num_nodes, update_fraction, average_degree, starting_distribution,"random")
    (collection_of_all_before, collection_of_all_after) = before_after
    (coll_of_all_before_averaged, coll_of_all_after_averaged) = before_after_averaged
    (largest_size_of_all, largest_size_of_all_averaged) = largest_sizes
    
    sizes_averaged_rand[corr] = largest_size_of_all_averaged
    cascades_before_averaged_rand[corr] = coll_of_all_before_averaged
    cascades_after_averaged_rand[corr] = coll_of_all_after_averaged



#### Annimation of cascade size distribution with average polarization 
uses averaged cascade size per sampled node calculation. Animates the distribution per correlation value.  
does this for both the scale free and random network 

In [None]:

# making animations for both random and scale free
plot_cascade_animation(cascades_before_averaged_sf, cascades_after_averaged_sf, list(reversed(correlations)), sizes_averaged, num_runs, what_net, save=True, averaged=True)
plot_cascade_animation(cascades_before_averaged_rand, cascades_after_averaged_rand, list(reversed(correlations)), sizes_averaged_rand, num_runs, "random", save=True, averaged=True)


##### Summarizing all cascade info in one plot 
for both the random and scale-free network (before vs after), and random vs scale-free (after updating)

In [None]:
# comparing the distributions in one plot: for before and after updating for scale free and random, and after updating for scale free vs random
plot_cascades_gamma((cascades_before_averaged_sf, cascades_after_averaged_sf), num_runs, what_net)
plot_cascades_gamma((cascades_after_averaged_rand, cascades_after_averaged_sf), num_runs, "both")
plot_cascades_gamma((cascades_before_averaged_rand, cascades_after_averaged_rand), num_runs, "random")

#### Phase transition at value 0.8

visualization of phase transition and fitting a pwerlaw. First fitting the full dist at correlation 0.8 and than zooming in for cascade sizes => 2. 

In [None]:
# plotting raw distribution at transition point
plot_cascade_dist_average(cascades_after_averaged_rand[np.float64(0.8)], "after", "random", sizes_averaged_rand[np.float64(0.8)], num_runs, save, np.float64(0.8))
plot_cascade_dist_average(cascades_after_averaged_sf[np.float64(0.8)], "after", "scale_free", sizes_averaged[np.float64(0.8)], num_runs, save, np.float64(0.8))

#plotting zoomed in powerlaw
plot_cascade_power_law(cascades_after_averaged_rand[np.float64(0.8)], "after", "random", sizes_averaged_rand[np.float64(0.8)], num_runs, save, np.float64(0.8))
plot_cascade_power_law(cascades_after_averaged_sf[np.float64(0.8)], "after", "scale_free", sizes_averaged[np.float64(0.8)], num_runs, save, np.float64(0.8))



## Statistical testing 
calculate significance between random and scale-free (after network is updated), random before and after network is updated and scale-free before vs after network is updated. These values are saved in the folder designated for statistical testing. This is done for different correlation values

In [12]:
which_cas = [(cascades_before_averaged_sf, cascades_after_averaged_sf), (cascades_before_averaged_rand, cascades_after_averaged_rand), (cascades_after_averaged_rand, cascades_after_averaged)]

for i, what in enumerate(["scale_free", "random", "both"]):
    values_bef, values_af, variance_bef, variance_af = calculate_average_per_gamma(which_cas[i], num_runs)
    results = test_significance(values_bef, values_af, variance_bef, variance_af, num_runs)
    output_file = f"statistics/cascades/results_bef_af_{what}.txt"

    with open(output_file, "w") as f:
        f.write(f"Statistical significance for {what} network type (cascade experiments)\n")
        for gamma, res in results.items():
            f.write(f"Gamma = {gamma}:\n")
            f.write(f"  Size: t = {res['t_size']:.3f}, p = {res['p_size']:.3g}\n")
            f.write(f"  Polarization: t = {res['t_pol']:.3f}, p = {res['p_pol']:.3g}\n")
            f.write("--------------------------------------------------------\n")
