In [9]:
import csv, sys, copy, random
import networkx as nx
import pandas as pd
import numpy as np
import scipy.stats as st  # for confidence intervals

def independent_cascade(G, seeds, steps):
  if type(G) == nx.MultiGraph or type(G) == nx.MultiDiGraph:
      raise Exception( \
          "independent_cascade() is not defined for graphs with multiedges.")

  #print(G.nodes(),'\n')
  #print(seeds)
  # make sure the seeds are in the graph
  for s in seeds:
    #print(s)
    #print(type(s))
    if int(s) not in G.nodes():
     raise Exception("seed", s, "is not in graph")

  # perform diffusion
  A = copy.deepcopy(seeds)  # prevent side effect
  if steps <= 0:
    # perform diffusion until no more nodes can be activated
    return _diffuse_all(G, A)
  # perform diffusion for at most "steps" rounds
  return _diffuse_k_rounds(G, A, steps)

def _diffuse_all(G, A):
  tried_edges = set()
  layer_i_nodes = [ ]
  #iter_cntr = 0
  layer_i_nodes.append([i for i in A])  # prevent side effect
  while True:
    #iter_cntr += 1
    len_old = len(A)
    (A, activated_nodes_of_this_round, cur_tried_edges) = \
        _diffuse_one_round(G, A, tried_edges)
    layer_i_nodes.append(activated_nodes_of_this_round)
    tried_edges = tried_edges.union(cur_tried_edges)
    if len(A) == len_old:
      break
  return layer_i_nodes
  #return iter_cntr

def _diffuse_k_rounds(G, A, steps):
  tried_edges = set()
  layer_i_nodes = [ ]
  layer_i_nodes.append([i for i in A])
  while steps > 0 and len(A) < len(G):
    len_old = len(A)
    #print("Step 1")
    (A, activated_nodes_of_this_round, cur_tried_edges) = \
        _diffuse_one_round(G, A, tried_edges)
    layer_i_nodes.append(activated_nodes_of_this_round)
    tried_edges = tried_edges.union(cur_tried_edges)
    if len(A) == len_old:
      break
    steps -= 1
  return layer_i_nodes

def _diffuse_one_round(G, A, tried_edges):
  activated_nodes_of_this_round = set()
  cur_tried_edges = set()
  for s in A:
    for nb in G.successors(s):
      #print("Step 2")
      if nb in A or (s, nb) in tried_edges or (s, nb) in cur_tried_edges:
        continue
      if _prop_success(G, s, nb):
        activated_nodes_of_this_round.add(nb)
      cur_tried_edges.add((s, nb))
  activated_nodes_of_this_round = list(activated_nodes_of_this_round)
  A.extend(activated_nodes_of_this_round)
  return A, activated_nodes_of_this_round, cur_tried_edges

def _prop_success(G, src, dest):
  return random.random() <= G[src][dest]['Rating']

data = pd.read_csv('soc-sign-bitcoinotc_3col.csv')
G = nx.from_pandas_edgelist(data, 'Source', 'Target', ['Rating'], create_using=nx.DiGraph)
G_nodes = G.number_of_nodes()
G_edges = G.number_of_edges()

node_list = G.nodes()
#print(node_list)
#print('Graph has', G_nodes, 'nodes and', G_edges, 'edges\n')

  # change to directed graph
if not G.is_directed():
    DG = G.to_directed()
else:
    DG = copy.deepcopy(G)

for e in DG.edges():
  DG[e[0]][e[1]]['Rating'] += 10
  #print(DG[e[0]][e[1]]['Rating'])

for n in DG.nodes():
  if 1==1:
    total_in_weight = 0
    for p in G.successors(n):
      total_in_weight += DG[n][p]['Rating']
      #print(n,p)

    #print('----------------')
    p=0
    for p in G.successors(n):
      if total_in_weight ==0:
        DG[n][p]['Rating'] = 0
      else:
        DG[n][p]['Rating'] = round((DG[n][p]['Rating']/total_in_weight)*100,1)

      #print(n,p,total_in_weight,DG[n][p]['Rating'])

# seed_set_size="50"
no_of_iterations = 100
step_no = 0

seeds_DC = [35,2642,1810,2125,2028,905,4172,7,1,4197,13,1018,2296,1953,2388,4291,1334,546,1386,3988,2067,2045,1396,1352,1899,3735,2942,202,3897,3129,1565,1317,2625,3451,304,3649,3828,41,2266,1566,1832,4559,135,1731,353,832,2600,1383,1217,1585]
seeds_BC = [35,2642,1810,905,1,4172,2125,7,2028,1953,13,2388,1018,2067,546,1334,3988,1352,2296,4197,1396,4291,3735,3129,1386,202,41,2045,304,1899,832,353,1217,1162,1317,2266,3828,1615,1565,2942,3018,2635,2625,775,1566,2647,592,2378,3451,1383]
seeds_CC = [905,35,1,2642,13,2388,1810,1334,353,4172,2028,1832,4291,2067,7,1018,2125,57,1565,304,1386,546,592,41,2045,25,1396,4197,1899,550,729,1555,1953,1316,3744,1352,1383,1731,1629,2942,3828,6,3735,135,2296,2110,3465,1675,3897,2625]
seeds_EC = [2642,905,1810,35,2028,4172,1,4291,1334,4197,1018,2125,2388,13,1565,1832,2942,2067,2045,1386,3897,57,1899,1731,2625,546,353,2296,1396,3451,7,304,3649,25,3828,1317,135,1316,1352,3735,1585,41,2063,2198,2835,1953,3744,3598,2110,2600]
seeds_PC = [35,2642,1810,2028,7,905,1953,1,4172,4197,13,1018,2125,2296,202,3735,546,4291,1334,1899,832,3897,1386,2625,1396,2388,2942,3988,2067,1566,41,1217,2045,1771,1317,1162,1352,3451,304,3828,1383,3018,25,3649,1744,1832,4635,60,1565,2600]
seeds_HC = [3744,905,2,1,1201,2680,4,10,425,1018,1217,1383,1396,1386,2028,2725,3760,4172,4291,13,21,39,57,202,257,304,353,540,1316,1334,1565,1810,1953,2296,2388,2642,2647,2670,2682,2942,3500,3756,3759,3897,4163,4531,4694,6,7,23]
seeds_kcore = [35,2642,1810,2125,2028,905,4172,7,1,4197,13,1018,2296,1953,2388,4291,1334,546,1386,3988,2067,2045,1396,1352,1899,3735,2942,202,3897,1565,1317,2625,3451,304,3649,3828,41,2266,1566,1832,4559,135,1731,353,2600,1383,1585,3598,4694,2897]
seeds_icc = [2642,905,1810,35,2028,4172,1,2125,4291,1334,4197,13,2388,1018,1565,2045,2067,2296,2942,1396,1386,546,7,1899,3897,3988,1832,1731,1352,2625,1317,3649,3451,304,353,3828,57,3735,135,41,1585,2835,1316,1953,3598,2600,1566,202,361,4559]


for step_no in range (1,6):
  spread_counts = [] # Initialize a list to store spread counts for each iteration
  for x in range(no_of_iterations):
      layer = independent_cascade(DG, seeds_kcore, step_no)
      tot_cnt = 0
      for f in layer:
          tot_cnt += len(f)

      spread_counts.append(tot_cnt) # Append the total spread count to the list

  # Convert to numpy array for easy math
  spread_array = np.array(spread_counts)

  # Calculate mean and standard deviation
  mean_spread = np.mean(spread_array)
  std_spread = np.std(spread_array, ddof=1)  # sample standard deviation

  # 95% confidence interval
  conf_level = 0.95
  conf_int = st.t.interval(conf_level, df=len(spread_array)-1,
       loc=mean_spread,
       scale=st.sem(spread_array))  # standard error of the mean

  # Print results
  print(f"Step = {step_no}")
  print(f"  Average Spread       = {mean_spread:.2f}")
  print(f"  Standard Deviation   = {std_spread:.2f}")
  print(f"  95% Confidence Int.  = ({conf_int[0]:.2f}, {conf_int[1]:.2f})")
  print('Step =',step_no, round((mean_spread),0)) # Use mean_spread for the rounded output

Step = 1
  Average Spread       = 2048.51
  Standard Deviation   = 16.11
  95% Confidence Int.  = (2045.31, 2051.71)
Step = 1 2049.0
Step = 2
  Average Spread       = 4298.77
  Standard Deviation   = 37.20
  95% Confidence Int.  = (4291.39, 4306.15)
Step = 2 4299.0
Step = 3
  Average Spread       = 4829.97
  Standard Deviation   = 15.71
  95% Confidence Int.  = (4826.85, 4833.09)
Step = 3 4830.0
Step = 4
  Average Spread       = 4907.36
  Standard Deviation   = 15.60
  95% Confidence Int.  = (4904.26, 4910.46)
Step = 4 4907.0
Step = 5
  Average Spread       = 4916.57
  Standard Deviation   = 17.45
  95% Confidence Int.  = (4913.11, 4920.03)
Step = 5 4917.0
