In [None]:
!cp "/content/drive/MyDrive/Startup Project/Acquisition Network/acquisitionNet.graphml" .

# Importing Libraries

In [None]:
!pip install python-igraph

Collecting python-igraph
  Downloading python_igraph-0.11.5-py3-none-any.whl (9.1 kB)
Collecting igraph==0.11.5 (from python-igraph)
  Downloading igraph-0.11.5-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.3/3.3 MB[0m [31m13.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting texttable>=1.6.2 (from igraph==0.11.5->python-igraph)
  Downloading texttable-1.7.0-py2.py3-none-any.whl (10 kB)
Installing collected packages: texttable, igraph, python-igraph
Successfully installed igraph-0.11.5 python-igraph-0.11.5 texttable-1.7.0


In [None]:
import networkx as nx
import pandas as pd
import os.path as path
import numpy as np
import networkx.algorithms.community as nx_comm
from random import choice
import igraph as ig

In [None]:
G = nx.read_graphml("acquisitionNet.graphml")

# Calculating Structural Features

In [None]:
n = nx.number_of_nodes(G)
print("Number of Nodes:", n)

Number of Nodes: 119263


In [None]:
m = nx.number_of_edges(G)
print("Number of Edges:", m)

Number of Edges: 92788


In [None]:
density = nx.density(G)
print("Density:", density)

Density: 6.52355001068747e-06


In [None]:
transitivity = nx.transitivity(G)
print("Transitivity:", transitivity)

Transitivity: 6.023992702363126e-05


In [None]:
H = G.to_undirected()
max_lcc = max(nx.connected_components(H), key=len)
max_wcc = H.subgraph(max_lcc)

avg_clustering_coefficient = nx.average_clustering(max_wcc)
print("Average Clustering Coefficient:", avg_clustering_coefficient)

Average Clustering Coefficient: 0.0020806098635726173


In [None]:
total = 0
for i in range(10000):
  a = choice(list(max_wcc.nodes()))
  b = choice(list(max_wcc.nodes()))
  dist = nx.shortest_path_length(max_wcc, source=a, target=b, method='dijkstra')
  total += dist
average_shortest_path_length = total / 10000
print("Average Shortest Path Length:", average_shortest_path_length)

Average Shortest Path Length: 14.1938


In [22]:
g = ig.Graph.from_networkx(G)
communities = g.community_infomap()
c = len(communities)
print("Number of Communities:", c)

Number of Communities: 32246


In [23]:
lwcc = max(nx.weakly_connected_components(G), key=len)
lwcc_size = len(lwcc)
print("Largest Weakly Connected Component Size:", lwcc_size)

Largest Weakly Connected Component Size: 25285


In [24]:
lscc = max(nx.strongly_connected_components(G), key=len)
lscc_size = len(lscc)
print("Largest Strongly Connected Component Size:", lscc_size)

Largest Strongly Connected Component Size: 3


# Creating Baseline Model

In [None]:
p = m / (n * (n - 1))
baseline = nx.erdos_renyi_graph(n, p, seed=42, directed=True)

In [None]:
baseline_density = nx.density(baseline)
print("Baseline Model Density:", baseline_density)

Baseline Model Density: 6.479468169752209e-06


In [None]:
baseline_transitivity = nx.transitivity(baseline)
print("Baseline Model Transitivity:", baseline_transitivity)

Baseline Model Transitivity: 0


In [None]:
H = baseline.to_undirected()
max_lcc = max(nx.connected_components(H), key=len)
max_wcc = H.subgraph(max_lcc)

baseline_avg_clustering_coefficient = nx.average_clustering(max_wcc)
print("Baseline Model Average Clustering Coefficient:", baseline_avg_clustering_coefficient)

Baseline Model Average Clustering Coefficient: 0.0


In [None]:
total = 0
for i in range(10000):
  a = choice(list(max_wcc.nodes()))
  b = choice(list(max_wcc.nodes()))
  dist = nx.shortest_path_length(max_wcc, source=a, target=b, method='dijkstra')
  total += dist
baseline_average_shortest_path_length = total / 10000
print("Baseline Model Average Shortest Path Length:", baseline_average_shortest_path_length)

Baseline Model Average Shortest Path Length: 23.2348


In [19]:
baseline_g = ig.Graph.from_networkx(baseline)
communities = baseline_g.community_infomap()
baseline_c = len(communities)
print("Baseline Model Number of Communities:", baseline_c)

Baseline Model Number of Communities: 41767


In [20]:
lwcc = max(nx.weakly_connected_components(baseline), key=len)
baseline_lwcc_size = len(lwcc)
print("Baseline Model Largest Weakly Connected Component Size:", baseline_lwcc_size)

Baseline Model Largest Weakly Connected Component Size: 72941


In [21]:
lscc = max(nx.strongly_connected_components(baseline), key=len)
baseline_lscc_size = len(lscc)
print("Baseline Model Largest Strongly Connected Component Size:", baseline_lscc_size)

Baseline Model Largest Strongly Connected Component Size: 1
