In [None]:
from src.classes.network import Network
from src.classes.node import Node
from multiprocessing import Pool
from src.viusalization import plot_cascade_dist, plot_cascade_dist_average
from collections import defaultdict
from concurrent.futures import ProcessPoolExecutor
from functools import partial
import os, sys
import numpy as np
from src.experimentation import generate_networks


#### Global Values

In [6]:
correlations = np.linspace(-1, 1, 11)
initial_seeds = np.linspace(13, 1600, 11)
num_runs = 10
num_nodes = 20
update_fraction = 0.1
average_degree = 8
starting_distribution = 0.5     # L / R ratio (niet per se nodig maar kan misschien leuk zijn om te varieern)
p = average_degree/(num_nodes-1) 
updates = 100


#### Setting seed for fixed order for sets, for reproducability purposes

In [7]:
# %env PYTHONHASHSEED=134

In [8]:
# def multiple_correlations(corr, start_seed = 39):

#     num_nodes = 200
#     correlation = corr
#     update_fraction = 0.1
#     average_degree = 8
#     starting_distribution = 0.5     # L / R ratio (niet per se nodig maar kan misschien leuk zijn om te varieern)

#     # average degree of 8
#     p = average_degree/(num_nodes-1)
#     seedje = start_seed
#     number_of_experiments = 10
#     collection_of_all_before = defaultdict(list)
#     collection_of_all_after = defaultdict(list)
#     largest_size_of_all = 0
#     save=True


#     def develop_network(num_nodes, correlation, update_fraction, starting_distribution, seedje, p):
#         network = Network(num_nodes, mean=0, correlation=correlation, update_fraction=update_fraction, starting_distribution=starting_distribution, seed=seedje, p=p)

#         number_of_iters=10000
#         data_before, average_data_before = create_data(number_of_iters, network)
#         largest_size = max(data_before.keys())

#         number_of_alterations = 0

        
#         for _ in range(1000000):
#             network.update_round()
#             number_of_alterations += network.alterations
#         print(number_of_alterations)

#         after_data, average_after_data = create_data(number_of_iters, network)
#         if max(after_data.keys()) > largest_size:
#             largest_size = max(after_data.keys())
#         return (data_before, average_data_before), (after_data, average_after_data), largest_size

#     for i in range(number_of_experiments):

#         seedje +=i
#         (before_data, averaged_before_data), (after_data, averaged_after_data), largest_size=develop_network(num_nodes, correlation, update_fraction, starting_distribution, seedje, p)
#         if largest_size > largest_size_of_all:
#             largest_size_of_all = largest_size
#         for size, polarizations in before_data.items():
#             collection_of_all_before[size].extend(polarizations)

#         for size, polarizations in after_data.items():
#             collection_of_all_after[size].extend(polarizations)
        

#     plot_cascade_dist_average(collection_of_all_before, "before", largest_size_of_all, number_of_experiments, save, correlation)
#     plot_cascade_dist_average(collection_of_all_after, "after", largest_size_of_all, number_of_experiments, save, correlation)

#### Developing and saving network

In [9]:


# num_runs = 10
# correlations = np.linspace(-1, 1, 10)
# initial_seeds = np.linspace(13, 130, 10)
# num_nodes = 200
# update_fraction = 0.1
# average_degree = 8
# starting_distribution = 0.5     # L / R ratio (niet per se nodig maar kan misschien leuk zijn om te varieern)
# p = average_degree/(num_nodes-1) 


generate_networks(correlations, initial_seeds, num_nodes=num_nodes, iterations=updates, how_many=10, update_fraction=update_fraction, starting_distribution=starting_distribution, p=p)

 #### Reading in and generating Network

In [None]:
def read_network_properties(file_path):
    """
    Reads network properties from a .txt file and converts them back
    into a dictionary with appropriate datatypes.

    Args:
        file_path (str): Path to the .txt file containing network properties.

    Returns:
        dict: Network properties with restored data types.
    """
    properties = {}

    with open(file_path, "r") as file:
        lines = file.readlines()
    
    for line in lines[2:]:  # Skip the header lines
        key, value = line.strip().split(": ", 1)
        if key == "Number of Nodes" or key == "Number of Edges":
            properties[key] = int(value)
        elif key == "Correlation" or key == "P value" or key == "Update fraction":
            properties[key] = float(value)
        elif key == "Seed":
            properties[key] = int(value)
        elif key == "Connections":
            # Parse connections as a list of tuples
            connections = eval(value)  # Use eval to safely parse the list of tuples
            properties[key] = [(int(a), int(b)) for a, b in connections]
        elif key == "Nodes":
            # Parse nodes as a list of tuples
            nodes = eval(value)  # Use eval to safely parse the list of tuples
            properties[key] = [(int(node_id), identity, float(threshold)) for node_id, identity, threshold in nodes]
        else:
            properties[key] = value
    return properties

def read_and_load_networks(num_runs, num_nodes, update_fraction, average_degree, starting_distribution, correlations):
    p = average_degree/(num_nodes-1) 
    networks = defaultdict(tuple)
    for corr in correlations:
        for i in range(num_runs):
            network_properties = read_network_properties(f"networks/random/{corr}/network_{i}.txt")
            seedje = network_properties["Seed"]
            search_nodes = defaultdict(Node)
            before_network = Network(num_nodes,"random", mean=0, correlation=corr, update_fraction=update_fraction, starting_distribution=starting_distribution, seed=seedje, p=p)
            after_network = Network(num_nodes, "random", mean=0, correlation=corr, update_fraction=update_fraction, starting_distribution=starting_distribution, seed=seedje, p=p)
            after_network.connections = set()

            for nodeje in after_network.all_nodes:
                nodeje.node_connections = set()
                search_nodes[nodeje.ID] = nodeje
                
            for (node1, node2) in network_properties["Connections"]:
                search_nodes[node1].node_connections.add(search_nodes[node2])
                after_network.connections.add((search_nodes[node1], search_nodes[node2]))
            
            networks[(corr, i)] = (before_network, after_network)

    return networks
    

all_networks = read_and_load_networks(num_runs, num_nodes, update_fraction, average_degree, starting_distribution, correlations)


used_seed = int(initial_seeds[0] +1)
test_network = Network(num_nodes, mean=0, correlation=-1.0, update_fraction=update_fraction, starting_distribution=starting_distribution, seed=used_seed, p=p)
number_of_alterations = 0

for _ in range(updates):
    test_network.update_round()
    number_of_alterations += test_network.alterations
    test_network.clean_network()

# print(number_of_alterations)
# print([(conn[0].ID, conn[1].ID) for conn in all_networks[(-1.0, 0)][1].connections])
# print([(conn[0].ID, conn[1].ID) for conn in test_network.connections])
assert set([(conn[0].ID, conn[1].ID) for conn in all_networks[(-1.0, 0)][1].connections]) == set([(conn[0].ID, conn[1].ID) for conn in test_network.connections]), "The networks that are generated should be the same"



#### Analyzing and plotting the Cascade distributions

In [6]:
def create_data(iters, network):

    all_cascade_sizes = []
    all_polarizations = []
    average_cascade_per_round = []
    average_polarization_per_round = []
    number_of_samplers = 20

    for _ in range(iters): 
        cascades, cascade_dist, cascade_polarization = network.analyze_network()
        average_cascade_per_round.append(sum(cascade_dist)/number_of_samplers)
        average_polarization_per_round.append(sum(cascade_polarization))
        all_cascade_sizes += cascade_dist
        all_polarizations += cascade_polarization

        # plot_network(network, cascades)

    data = defaultdict(list)
    for i, (size, polarization) in enumerate(zip(all_cascade_sizes, all_polarizations), 1):
        data[size].append(polarization)
    for size in data:
        data[size].sort()

    average_data = defaultdict(list)
    for (size, polarization) in zip(average_cascade_per_round, average_polarization_per_round):
        average_data[size].append(polarization) 
    for size in average_data: 
        average_data[size].sort()
        
    return data, average_data
    

In [11]:
def multiple_correlations(corr,all_networks):

    number_of_experiments = 10
    number_of_iters = 10000
    collection_of_all_before = defaultdict(list)
    collection_of_all_after = defaultdict(list)
    largest_size_of_all = 0
    save=False
    
    # return (data_before, average_data_before), (after_data, average_after_data), largest_size

    for i in range(number_of_experiments):

        before_network, after_network = all_networks[(corr, i)]
    
        before_data, average_before_data = create_data(number_of_iters, before_network)
        after_data, average_after_data = create_data(number_of_iters, after_network)

        largest_size = max(before_data.keys())
        if max(after_data.keys()) > largest_size:
            largest_size = max(after_data.keys())

        if largest_size > largest_size_of_all:
            largest_size_of_all = largest_size
        for size, polarizations in before_data.items():
            collection_of_all_before[size].extend(polarizations)

        for size, polarizations in after_data.items():
            collection_of_all_after[size].extend(polarizations)
        
        print(f"finsihed cascade experimentation {i}")
        

    plot_cascade_dist_average(collection_of_all_before, "before", largest_size_of_all, number_of_experiments, save, corr)
    plot_cascade_dist_average(collection_of_all_after, "after", largest_size_of_all, number_of_experiments, save, corr)

    return collection_of_all_before, collection_of_all_after

In [None]:
cascades_before = defaultdict(lambda: defaultdict(list))
cascades_after = defaultdict(lambda: defaultdict(list))

for corr in correlations: 
    print(f"starting experimentation for correlation: {corr}")
    print("-----------------------------------------------")
    # initial_seed = int(initial_seeds[i])
    cs_before, cs_after = multiple_correlations(corr,all_networks)
    cascades_before[corr] = cs_before
    cascades_after[corr] = cs_after
