In [None]:
import pandas as pd
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt

In [None]:
# load data
df_edges = pd.read_csv("../data/musae_git_edges.csv")
df_nodes = pd.read_csv("../data/musae_git_target.csv")

In [None]:
df_nodes.head()

In [None]:
df_edges.head()

In [None]:
# create graph
G = nx.Graph()

# Add nodes to the graph
for index, row in df_nodes.iterrows():
    G.add_node(row['id'], status=row['ml_target'])

# Add edges to the graph
for index, row in df_edges.iterrows():
    G.add_edge(row['id_1'], row['id_2'])

In [None]:
def run_simulation(G, beta, gamma, max_iter=False):
    # Initialize influence and recovery attributes for each node
    num_infected = []
    num_recovered = []
    num_susceptible = []
    iter = 0

    for node in G.nodes:
        G.nodes[node]['influence'] = 0  # initially, no node has influenced anyone
    
    # Perform the contagion simulation
    while True:
        if max_iter == iter:
            break
        
        # Select an infected node that has not yet recovered
        infected_nodes = [node for node, data in G.nodes(data=True) if data['status'] == 1]
        overall_susceptible = [node for node, data in G.nodes(data=True) if data['status'] == 0]
        recovered_nodes = [node for node, data in G.nodes(data=True) if data['status'] == 2]
        
        # add number of each node group for plotting
        num_infected.append(len(infected_nodes))
        num_susceptible.append(len(overall_susceptible))
        num_recovered.append(len(recovered_nodes))

        if not infected_nodes:  # if no more active infected nodes
            break
        node = np.random.choice(infected_nodes)

        # List its susceptible neighbors
        susceptible_neighbors = [n for n in G.neighbors(node) if G.nodes[n]['status'] == 0]

        # Infect susceptible neighbors with probability beta
        for n in susceptible_neighbors:
            if np.random.random() <= beta:
                G.nodes[n]['status'] = 1  # Change to infected
                G.nodes[node]['influence'] += 1  # Increase influence of the infecting node

        # Recovery process for infected node with probability gamma
        if np.random.random() <= gamma:
            G.nodes[node]['status'] = 2  # Change to recovered
            
        iter += 1
    
    return G, num_infected, num_susceptible, num_recovered, iter, beta, gamma

In [None]:
result_groups = []
result_groups.append(run_simulation(G.copy(), 0.3, 0.2, max_iter=50000))
result_groups.append(run_simulation(G.copy(), 0.5, 0.2, max_iter=50000))
result_groups.append(run_simulation(G.copy(), 0.5, 0.3, max_iter=50000))
result_groups.append(run_simulation(G.copy(), 0.7, 0.3, max_iter=50000))
result_groups.append(run_simulation(G.copy(), 0.7, 0.5, max_iter=50000))
result_groups.append(run_simulation(G.copy(), 1.0, 0.5, max_iter=50000))

In [None]:
aa = [([1,2,3,4], [5,4,3,2], [2,3,4,5]), ([1,2,3,4], [1,2,3,4], [1, 5, 6, 9])]

for results in aa:
    fig, ax = plt.subplots()

    ax.plot(results[0], results[1], color='red', label='lab1')
    ax.plot(results[0], results[2], color='blue', label='lab2')
    ax.legend()
    
    ax.set_title("Plot 1")
plt.show()   


In [None]:
import matplotlib.pyplot as plt

x_vals = range(results[4])
# Plotting the three lines with different colors
plt.plot(x_vals, results[1], color='red', label='Infected')
plt.plot(x_vals, results[2], color='green', label='Susceptible')
plt.plot(x_vals, results[3], color='blue', label='Recovered')

# Adding labels and title
plt.xlabel('Iterations')
plt.ylabel('Number of nodes')
plt.title(f'S vs I vs R for {results[4]} iterations')

# Adding a legend
plt.legend()

# Displaying the plot
plt.show()

In [None]:
# Get the node attributes as a dictionary
node_dict = nx.get_node_attributes(G1[0], 'status')
influence_dict = nx.get_node_attributes(G1[0], 'influence')

# Convert the dictionary to a DataFrame
df = pd.DataFrame({
    'id': node_dict.keys(),
    'status': node_dict.values(),
    'influence': influence_dict.values()
})