### **Installation and Loading**: Packages and Libraries

In [None]:
!pip install ndlib

In [None]:
import random
import json
import numpy as np
import pandas as pd
import networkx as nx
import ndlib.models.ModelConfig as mc
import ndlib.models.epidemics as ep
from ndlib.utils import multi_runs
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
from pylab import rcParams

### **Cloning the Git Repository**

In [None]:
!git clone https://github.com/virgiiim/EC_Reddit_CaseStudy
!git clone https://github.com/nicollasro/Echochamber

### **Structuring the Echo Chambers:** Reddit

In [None]:
def load_echochambers(topic):
  networks = []
  for snapshot in ['2017-01-01_2017-07-01','2017-07-01_2018-01-01','2018-01-01_2018-07-01','2018-07-01_2019-01-01','2019-01-01_2019-07-01']:
    df_edges = pd.read_csv("/content/EC_Reddit_CaseStudy/data/"+topic+'/'+topic+'_'+snapshot+'_edgelist.csv',header=0)
    G = nx.from_pandas_edgelist(df_edges,'from_id', 'to_id', ['weight'])
    dict_nodes_label = pd.read_csv("/content/EC_Reddit_CaseStudy/data/"+topic+'/'+topic+'_'+snapshot+'_nodelist.csv', index_col='id')['leaning'].to_dict()
    nx.set_node_attributes(G, dict_nodes_label, 'label')
    connected_components = list(nx.connected_components(G))
    largest_component = max(connected_components, key=len)
    G = G.subgraph(largest_component)
    networks.append(G)
  return networks

### **Selection of Initial Nodes:** Random, Closeness, Degree, Betweenness, Information, Eigenvector

In [None]:
def find_initial_nodes(graph, nodes_quantity):

    random_nodes=random.sample(list(graph.nodes()), nodes_quantity)
    closeness_central_nodes = sorted(nx.closeness_centrality(graph).items(), key=lambda x: x[1], reverse=True)[:nodes_quantity]
    closeness_central_nodes = [node[0] for node in closeness_central_nodes]
    betweenness_central_nodes = sorted(nx.betweenness_centrality(graph,weight="weight").items(), key=lambda x: x[1], reverse=True)[:nodes_quantity]
    betweenness_central_nodes = [node[0] for node in betweenness_central_nodes]
    degree_central_nodes = sorted(nx.degree_centrality(graph).items(), key=lambda x: x[1], reverse=True)[:nodes_quantity]
    degree_central_nodes = [node[0] for node in degree_central_nodes]
    information_central_nodes = sorted(nx.information_centrality(graph,weight="weight").items(), key=lambda x: x[1], reverse=True)[:nodes_quantity]
    information_central_nodes = [node[0] for node in information_central_nodes]
    return [random_nodes, closeness_central_nodes, betweenness_central_nodes, degree_central_nodes, information_central_nodes]

### **Models:** Independent Cascades Propagation, Profile and Profile-Threshold

In [None]:
def configura_probabilidades(G, model_name, initial_nodes, profile, blocked_nodes):
    low_propagation_prob = 0.1
    high_propagation_prob =0.9
    config = mc.Configuration()
    if model_name =="IndependentCascadesModel":
      for edge in G.edges():
          config.add_edge_configuration("probability", edge, high_propagation_prob)
      for node in initial_nodes:
          neighbors = list(G.neighbors(node))
          for neighbor in neighbors:
              if neighbor in initial_nodes:
                  config.add_edge_configuration("probability", (node, neighbor), low_propagation_prob)
      return config

    elif model_name =="ProfileThresholdModel":
      print(model_name)
      config.add_model_initial_configuration("Blocked", blocked_nodes)
      print("Blocked Nodes: ", blocked_nodes)
      config.add_model_initial_configuration("Infected", initial_nodes)
      print("Initial Nodes: ",initial_nodes)
      threshold = 0
      for i in G.nodes():
          config.add_node_configuration("threshold", i, threshold)
          config.add_node_configuration("profile", i, profile)
      print(config.get_model_configuration())
      return config

    else:
      print(model_name)
      config.add_model_initial_configuration("Blocked", blocked_nodes)
      config.add_model_initial_configuration("Infected", initial_nodes)
      for i in G.nodes():
        config.add_node_configuration("profile", i, profile)
      print("Configuration: ", config.get_model_configuration())
      return config

def run_simulation(G, model_name, initial_nodes, config, execution_rounds, iterations, total_nodes):
    if model_name =="IndependentCascadesModel":
      model = ep.IndependentCascadesModel(G)
      model.set_initial_status(config)
      trends = multi_runs(model, execution_number=execution_rounds, iteration_number=iterations, infection_sets=initial_nodes, nprocesses=4)
      return trends

    else:
      influence_evolution = []
      for i in range(execution_rounds):
        model = ep.ProfileModel(G)
        model.set_initial_status(config)
        iterations_set = model.iteration_bunch(iterations,)
        influence_list = collect_node_counts(iterations_set, total_nodes)
        influence_evolution.append(influence_list)
      return influence_evolution

def collect_node_counts(iterations_set, total_nodes):
    node_counts = []
    for entry in iterations_set:
        if 'node_count' in entry:
          node_counts.append(entry['node_count'][1]/total_nodes)
    return node_counts

def find_stability_position(lists):
    positions = []
    for sublist in lists:
        position = len(sublist)
        for i in range(len(sublist) - 1):
            if all(x == sublist[i] for x in sublist[i:]):
                position = i
                break
        positions.append(position)
    return positions

### **Main Program:** No Blocking or Random Blocking

In [None]:
topics = ['politics','guncontrol','minority']
topic_index = 2
networks = load_echochambers(topic = topics[topic_index])

model_name= ["IndependentCascadesModel",'ProfileThresholdModel', 'Profile']
model_num = 2
inicial_nodes_logic = ["aleatory", "closeness","betweeness", "degree", "information"]
inicial_nodes_type = 4
print("Criterion: ", inicial_nodes_logic[inicial_nodes_type])

iterations = 100
execution_rounds = 30
inicial_percentage = 0.025
block_percentage = 0.05
print("Model: ", model_name[model_num])
print("Topic: ", topics[topic_index])

influence_percentage_list=[]
instability_iteration = []
for profile in [0.9, 0.5]:
  for snapshot in range(5):
    n_nodes = networks[snapshot].nodes
    total_nodes = len(n_nodes)
    nodes_quantity = int(inicial_percentage * total_nodes)
    initial_nodes_list = find_initial_nodes(networks[snapshot], nodes_quantity)
    initial_nodes = initial_nodes_list[inicial_nodes_type]

    available_nodes = list(set(n_nodes) - set(initial_nodes_list[inicial_nodes_type]))
    blocked_nodes_quantity = int(total_nodes*block_percentage)
    blocked_nodes = random.sample(available_nodes, blocked_nodes_quantity)
    print("Inicial Nodes: ", initial_nodes)
    print('Blocked Nodes Quantity:', len(blocked_nodes))
    config = configura_probabilidades(networks[snapshot], model_name[model_num], initial_nodes, profile, blocked_nodes)
    influence_evolution = run_simulation(networks[snapshot], model_name[model_num], initial_nodes, config, execution_rounds, iterations, total_nodes)
    print(influence_evolution)
    stability_positions = find_stability_position(influence_evolution)
    print(stability_positions)
    instability_iteration.append(stability_positions)
    influence_percentage_list.append(influence_evolution)
    print(influence_percentage_list)

### **Main Program:** Selective Blocking

In [None]:
topics = ['politics','guncontrol','minority']
topic_index = 2
networks = load_echochambers(topic = topics[topic_index])

model_name= ["IndependentCascadesModel",'ProfileThresholdModel', 'Profile']
model_num = 2
inicial_nodes_logic = ["aleatory", "closeness","betweeness", "degree", "information"]
inicial_nodes_type = 4
print("Criterion: ", inicial_nodes_logic[inicial_nodes_type])

iterations = 100
execution_rounds = 30
inicial_percentage = 0.025
block_percentage = 0.05
print("Model: ", model_name[model_num])
print("Topic: ", topics[topic_index])

with open("diff_strategic_nodes_"+topics[topic_index]+".json", 'r') as f:
  blocked_nodes = json.load(f)

influence_percentage_list=[]
instability_iteration = []
for profile in [0.9, 0.5]:
  for snapshot in range(5):
    n_nodes = networks[snapshot].nodes
    total_nodes = len(n_nodes)
    nodes_quantity = int(inicial_percentage * total_nodes)
    initial_nodes_list = find_initial_nodes(networks[snapshot], nodes_quantity)
    initial_nodes = initial_nodes_list[inicial_nodes_type]

    config = configura_probabilidades(networks[snapshot], model_name[model_num], initial_nodes, profile, blocked_nodes[snapshot])
    influence_evolution = run_simulation(networks[snapshot], model_name[model_num], initial_nodes, config, execution_rounds, iterations, total_nodes)
    print(influence_evolution)
    stability_positions = find_stability_position(influence_evolution)
    print(stability_positions)
    instability_iteration.append(stability_positions)
    influence_percentage_list.append(influence_evolution)
    print(influence_percentage_list)

In [None]:
inicial_nodes_logic =["aleatory", "closeness","betweeness", "degree", "information"]
nodes_labels = ["$S_{1} (Pfl=0.9)$", "$S_{2} (Pfl=0.9)$", "$S_{3} (Pfl=0.9)$", "$S_{4} (Pfl=0.9)$", "$S_{5} (Pfl=0.9)$","$S_{1} (Pfl=0.5)$", "$S_{2} (Pfl=0.5)$", "$S_{3} (Pfl=0.5)$", "$S_{4} (Pfl=0.5)$", "$S_{5} (Pfl=0.5)$"]
colors = ['green', 'green', 'green', 'green', 'green','orange', 'orange', 'orange', 'orange', 'orange']
markers= ["o", "^", "s", "<", "*", "o", "^", "s", "<", "*"]
linestyles = ['-', '--', ':', '-.', (0, (3, 1, 1, 1, 1, 1)),'-', '--', ':', '-.', (0, (3, 1, 1, 1, 1, 1))]


def plot_lines(influence_percentage_list):
    plt.figure(figsize=(7, 6))
    for i in range(10):
      mean = [sum(element) / len(influence_percentage_list[i]) for element in zip(*influence_percentage_list[i])]
      num_iterations = [i for i in range(iterations)]
      plt.plot(num_iterations, mean, label=nodes_labels[i],color=colors[i],
               linestyle=linestyles[i],
               )

    plt.xlabel('Iterations',fontsize=20)
    plt.ylabel('% of Network Affected',fontsize=20)
    plt.xticks(np.arange(0, iterations+1, 15))
    plt.grid(True)
    plt.tick_params(labelsize=20)
    plt.subplots_adjust(left=0.125, bottom=0.18, right=0.9, top=0.888, wspace=0.2, hspace=0.2)

    ax = plt.gca()
    for axis in ['top','bottom','left','right']:
      ax.spines[axis].set_linewidth(2)
      ax.spines[axis].set_color('black')
    plt.subplots_adjust(left=0.2)
    plt.savefig('/content/Evolutional_blocking_strategic_'+model_name[model_num]+'_'+topics[topic_index]+'_'+inicial_nodes_logic[inicial_nodes_type]+'.pdf',bbox_inches='tight')
    plt.show()

plot_lines(influence_percentage_list)

### **Nodes Triviality Analysis**

In [None]:
def get_sorted_nodes_by_centrality(G, centrality_func):
    centrality = centrality_func(G)
    return sorted(centrality, key=centrality.get, reverse=True)

def calculate_probability(nodes_to_check, sorted_nodes, percentile):
    top_percentile_count = int(len(sorted_nodes) * percentile)
    if top_percentile_count < len(nodes_to_check):
        return 0.0
    top_percentile_nodes = sorted_nodes[:top_percentile_count]
    present_count = sum(1 for node in nodes_to_check if node in top_percentile_nodes)
    return present_count / len(nodes_to_check)

def calculate_confidence_interval(probability, n, confidence=0.95):
    interval = z * np.sqrt((probability * (1 - probability)) / n)
    lower_bound = np.clip(probability - interval, 0, 1)
    upper_bound = np.clip(probability + interval, 0, 1)
    return (lower_bound, upper_bound)

In [None]:
topics = ['politics','guncontrol','minority']
topic_index = 2
with open("strategic_nodes_"+topics[topic_index]+".json", 'r') as f:
  blocked_nodes = json.load(f)
networks = load_echochambers(topic = topics[topic_index])

for snapshot in range(5):
  G = networks[snapshot]

  nodes_to_check = blocked_nodes[snapshot]

  centrality_functions = {
      'Degree': nx.degree_centrality,
      'Closeness': nx.closeness_centrality,
      'Betweenness': nx.betweenness_centrality,
      'PageRank': lambda G: nx.pagerank(G, alpha=0.85),
      'farmonic': nx.harmonic_centrality,
  }

  percentiles = [0.05, 0.10, 0.15, 0.20]

  results = []

  for centrality_name, centrality_func in centrality_functions.items():
      sorted_nodes = get_sorted_nodes_by_centrality(G, centrality_func)
      for p in percentiles:
          probability = calculate_probability(nodes_to_check, sorted_nodes, p)
          conf_interval = calculate_confidence_interval(probability, len(nodes_to_check))
          margin_of_error = conf_interval[1] - probability
          results.append((centrality_name, p, probability*100, margin_of_error*100))

  centralities = list(centrality_functions.keys())
  num_centralities = len(centralities)
  x_labels = [f"{int(p * 100)}%" for p in percentiles]

  probabilities = np.array([prob for _, _, prob, _ in results]).reshape(num_centralities, len(percentiles))
  errors = np.array([err for _, _, _, err in results]).reshape(num_centralities, len(percentiles))

  colors = ["lightgray","gray","green","goldenrod","darkkhaki"]

  fig, ax = plt.subplots(figsize=(7, 6))
  bar_width = 0.15
  index = np.arange(len(percentiles))
  for i, (centrality, color) in enumerate(zip(centralities, colors)):
      ax.bar(index + i * bar_width, probabilities[i], bar_width, yerr=errors[i], label=centrality, capsize=5, color=color)
  ax.set_xticks(index + bar_width * (num_centralities - 1) / 2)
  ax.set_xticklabels(x_labels)
  tamanho = 25
  plt.tick_params(labelsize=tamanho)
  plt.xlabel('Percentil', fontsize=tamanho)
  plt.ylabel('Probability (%)',fontsize=tamanho)
  ax.set_ylim(0, 105)
  plt.subplots_adjust(left=0.125, bottom=0.18, right=0.9, top=0.888, wspace=0.2, hspace=0.2)
  ax = plt.gca()
  for axis in ['top','bottom','left','right']:
    ax.spines[axis].set_linewidth(2)
    ax.spines[axis].set_color('black')
  plt.subplots_adjust(left=0.2)
  plt.savefig('/content/Percentil_'+topics[topic_index]+'_'+str(snapshot)+'.pdf',bbox_inches='tight')

  plt.show()
