# Libraries

In [None]:
# importing libraries
import networkx as nx
import numpy as np
import matplotlib.pyplot as plt
import pickle
import random
import pandas as pd

# Mounting Drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Dataset Reading

In [None]:
data_vil2 = pd.read_csv("/content/drive/MyDrive/Project/vil2.csv")
data_vil2.drop("Unnamed: 0",axis=1,inplace=True)
data_vil2

Unnamed: 0,level_0,level_1
0,0,1
1,0,2
2,0,3
3,0,4
4,0,6
...,...,...
6115,875,876
6116,876,872
6117,876,873
6118,876,874


# Helper Functions

In [None]:
# -----------------------------------
# function to create graph
# -----------------------------------
def create_graph(dataset):

  # empty directed graph Gc
  Gc = nx.DiGraph()

  # populating graph
  for i,j in zip(dataset["level_0"],dataset["level_1"]):
      Gc.add_edge(i,j)
  
  # return graph
  return Gc


# -----------------------------------
# function to set status of all nodes of G to "inactive"
# -----------------------------------
def set_status_inactive(G):
    for i in G.nodes():
        G.nodes[i]['status'] = "inactive"

# -----------------------------------
# function to set status of list of nodes to active
# -----------------------------------
def set_status_active(G,l):
    for i in l:
        G.nodes[i]['status'] = "active"


# -----------------------------------
# function to set threshold of the nodes
# -----------------------------------
def set_threshold(G):
    random.seed(1)
    x = [random.uniform(0,1) for i in range(G.number_of_nodes())]
    for i,j in zip(G.nodes(),x):
        G.nodes[i]['threshold'] = j


# -----------------------------------
# function to set node influence
# -----------------------------------
def active_only(Gc,v):
    node_influence = 0
    status = Gc.nodes[v]["status"]
    if status == "active":
        node_influence = 1
    return node_influence

# -----------------------------------
# function to define aggregated influence on node u
# -----------------------------------
def peer_influence(Gc,u):
    
    count = 0
    pi = 0
    for v in Gc.predecessors(u):
        count += 1
        pi += (1 * active_only(Gc,v))
        
    if count != 0:
        pi = pi / count
    else:
        pi = 0
    
    return pi


# -----------------------------------
# function to calculate number of active nodes
# -----------------------------------
def count_active(G):
    count = 0
    for i in G.nodes():
       if G.nodes[i]["status"] == "active":
            count += 1
    return count


# -----------------------------------
# function to select set of p nodes of Gc as seed nodes based on some centrality
# -----------------------------------
def Centrality(Gc,p,centrality_measure):
        if centrality_measure == "degree":
            pr = nx.degree_centrality(Gc)
        elif centrality_measure == "closeness":
            pr = nx.closeness_centrality(Gc)
        elif centrality_measure == "betweeness":
            pr = nx.betweenness_centrality(Gc)
        elif centrality_measure == "pagerank":
            pr = nx.pagerank(Gc)
        elif centrality_measure == "katz":
            pr = nx.katz_centrality(Gc, 0.07)
        n = Gc.number_of_nodes()
        n = p
        pr = {k: v for k, v in sorted(pr.items(), key=lambda item: item[1],reverse=True)}
        return list(pr.keys())[:n]

# -----------------------------------
# general threshold model for cascade
# -----------------------------------
def gt_model(Gc,p=5,imax=7,centrality_measure="pagerank"):
    
    AS = [0 for i in range(0,imax+1)]
    flag = 1
    
    # ----------------------------------------
    # Step 1
    # ----------------------------------------
    
    # set status of all nodes in Gc as inactive
    set_status_inactive(Gc)
    
    # initial set of active nodes
    AS[0] = Centrality(Gc,p,centrality_measure=centrality_measure)
    
    # set status of AS nodes to active
    set_status_active(Gc,AS[0])
    
    # ----------------------------------------
    # Step 2
    # ----------------------------------------
    for i in range(0,imax+1):
        
        print("-"*50)
        print("Value of i:",i)
        print("-"*50)
        
        print("At start of iteration:",i)
        print(f"Number of active nodes: {count_active(Gc)}/{Gc.number_of_nodes()}, {(count_active(Gc)/Gc.number_of_nodes()*100):.2f}% nodes")
        
        PS = [] # pending active set
        for u in Gc.nodes():
            if ((Gc.nodes[u]['status'] == "inactive") and (u not in PS)):
                score = peer_influence(Gc,u)
                if score > Gc.nodes[u]['threshold']:
                    PS.append(u)

        if i < imax: 
          AS[i+1] = AS[i] + PS
          AS[i+1] = list(set(sorted(AS[i+1])))
          
          if AS[i+1] == AS[i]:
              print("Converged at iteration:",i)
              flag = 0
              break
          
          set_status_active(Gc,AS[i+1])
          print(f"Number of active nodes: {count_active(Gc)}/{Gc.number_of_nodes()}, {(count_active(Gc)/Gc.number_of_nodes()*100):.2f}% nodes")
    
    if flag:
        print("Converged at iteration:",imax)

# Centrality Measures

# Degree Centrality

In [None]:
G2 = create_graph(data_vil2)
set_status_inactive(G2)
set_threshold(G2)
gt_model(G2,p=10,imax=10,centrality_measure="degree") 

--------------------------------------------------
Value of i: 0
--------------------------------------------------
At start of iteration: 0
Number of active nodes: 10/876, 1.14% nodes
Number of active nodes: 34/876, 3.88% nodes
--------------------------------------------------
Value of i: 1
--------------------------------------------------
At start of iteration: 1
Number of active nodes: 34/876, 3.88% nodes
Number of active nodes: 61/876, 6.96% nodes
--------------------------------------------------
Value of i: 2
--------------------------------------------------
At start of iteration: 2
Number of active nodes: 61/876, 6.96% nodes
Number of active nodes: 92/876, 10.50% nodes
--------------------------------------------------
Value of i: 3
--------------------------------------------------
At start of iteration: 3
Number of active nodes: 92/876, 10.50% nodes
Number of active nodes: 116/876, 13.24% nodes
--------------------------------------------------
Value of i: 4
---------------

# Closeness Centrality

In [None]:
G2 = create_graph(data_vil2)
set_status_inactive(G2)
set_threshold(G2)
gt_model(G2,p=10,imax=10,centrality_measure="closeness") 

--------------------------------------------------
Value of i: 0
--------------------------------------------------
At start of iteration: 0
Number of active nodes: 10/876, 1.14% nodes
Number of active nodes: 29/876, 3.31% nodes
--------------------------------------------------
Value of i: 1
--------------------------------------------------
At start of iteration: 1
Number of active nodes: 29/876, 3.31% nodes
Number of active nodes: 53/876, 6.05% nodes
--------------------------------------------------
Value of i: 2
--------------------------------------------------
At start of iteration: 2
Number of active nodes: 53/876, 6.05% nodes
Number of active nodes: 77/876, 8.79% nodes
--------------------------------------------------
Value of i: 3
--------------------------------------------------
At start of iteration: 3
Number of active nodes: 77/876, 8.79% nodes
Number of active nodes: 94/876, 10.73% nodes
--------------------------------------------------
Value of i: 4
------------------

# Betweeness Centrality

In [None]:
G2 = create_graph(data_vil2)
set_status_inactive(G2)
set_threshold(G2)
gt_model(G2,p=10,imax=10,centrality_measure="betweeness") 

--------------------------------------------------
Value of i: 0
--------------------------------------------------
At start of iteration: 0
Number of active nodes: 10/876, 1.14% nodes
Number of active nodes: 30/876, 3.42% nodes
--------------------------------------------------
Value of i: 1
--------------------------------------------------
At start of iteration: 1
Number of active nodes: 30/876, 3.42% nodes
Number of active nodes: 54/876, 6.16% nodes
--------------------------------------------------
Value of i: 2
--------------------------------------------------
At start of iteration: 2
Number of active nodes: 54/876, 6.16% nodes
Number of active nodes: 83/876, 9.47% nodes
--------------------------------------------------
Value of i: 3
--------------------------------------------------
At start of iteration: 3
Number of active nodes: 83/876, 9.47% nodes
Number of active nodes: 104/876, 11.87% nodes
--------------------------------------------------
Value of i: 4
-----------------

# PageRank

In [None]:
G2 = create_graph(data_vil2)
set_status_inactive(G2)
set_threshold(G2)
gt_model(G2,p=10,imax=10,centrality_measure="pagerank") 

--------------------------------------------------
Value of i: 0
--------------------------------------------------
At start of iteration: 0
Number of active nodes: 10/876, 1.14% nodes
Number of active nodes: 35/876, 4.00% nodes
--------------------------------------------------
Value of i: 1
--------------------------------------------------
At start of iteration: 1
Number of active nodes: 35/876, 4.00% nodes
Number of active nodes: 62/876, 7.08% nodes
--------------------------------------------------
Value of i: 2
--------------------------------------------------
At start of iteration: 2
Number of active nodes: 62/876, 7.08% nodes
Number of active nodes: 95/876, 10.84% nodes
--------------------------------------------------
Value of i: 3
--------------------------------------------------
At start of iteration: 3
Number of active nodes: 95/876, 10.84% nodes
Number of active nodes: 119/876, 13.58% nodes
--------------------------------------------------
Value of i: 4
---------------

# Katz

In [None]:
G2 = create_graph(data_vil2)
set_status_inactive(G2)
set_threshold(G2)
gt_model(G2,p=10,imax=10,centrality_measure="katz") 

--------------------------------------------------
Value of i: 0
--------------------------------------------------
At start of iteration: 0
Number of active nodes: 10/876, 1.14% nodes
Number of active nodes: 32/876, 3.65% nodes
--------------------------------------------------
Value of i: 1
--------------------------------------------------
At start of iteration: 1
Number of active nodes: 32/876, 3.65% nodes
Number of active nodes: 48/876, 5.48% nodes
--------------------------------------------------
Value of i: 2
--------------------------------------------------
At start of iteration: 2
Number of active nodes: 48/876, 5.48% nodes
Number of active nodes: 62/876, 7.08% nodes
--------------------------------------------------
Value of i: 3
--------------------------------------------------
At start of iteration: 3
Number of active nodes: 62/876, 7.08% nodes
Number of active nodes: 71/876, 8.11% nodes
--------------------------------------------------
Value of i: 4
-------------------