In [1]:
!cp "/content/drive/MyDrive/Startup Project/Acquisition Network/commonAcquirer.graphml" .

# Importing Libraries

In [2]:
!pip install python-igraph



In [3]:
import networkx as nx
import pandas as pd
import os.path as path
import numpy as np
import networkx.algorithms.community as nx_comm
from random import choice
import igraph as ig

In [4]:
G = nx.read_graphml("commonAcquirer.graphml")

# Calculating Structural Features

In [5]:
n = nx.number_of_nodes(G)
print("Number of Nodes:", n)

Number of Nodes: 68297


In [6]:
m = nx.number_of_edges(G)
print("Number of Edges:", m)

Number of Edges: 639064


In [7]:
density = nx.density(G)
print("Density:", density)

Density: 0.00027401696593557624


In [8]:
transitivity = nx.transitivity(G)
print("Transitivity:", transitivity)

Transitivity: 0.992995173453566


In [9]:
lcc = G.subgraph(max(nx.connected_components(G), key=len))
avg_clustering_coefficient = nx.average_clustering(lcc)
print("Average Clustering Coefficient:", avg_clustering_coefficient)

Average Clustering Coefficient: 0.9589313755116959


In [10]:
total = 0
for i in range(10000):
  a = choice(list(lcc.nodes()))
  b = choice(list(lcc.nodes()))
  dist = nx.shortest_path_length(lcc, source=a, target=b, method='dijkstra')
  total += dist
average_shortest_path_length = total / 10000
print("Average Shortest Path Length:", average_shortest_path_length)

Average Shortest Path Length: 7.2171


In [11]:
g = ig.Graph.from_networkx(G)
communities = g.community_infomap()
c = len(communities)
print("Number of Communities:", c)

Number of Communities: 12786


In [12]:
lcc = max(nx.connected_components(G), key=len)
lcc_size = len(lcc)
print("Largest Connected Component Size:", lcc_size)

Largest Connected Component Size: 10427


# Creating Baseline Model

In [13]:
p = m / (n * (n - 1))
baseline = nx.erdos_renyi_graph(n, p, seed=42, directed=False)

In [14]:
baseline_density = nx.density(baseline)
print("Baseline Model Density:", baseline_density)

Baseline Model Density: 0.00013707194220028825


In [15]:
baseline_transitivity = nx.transitivity(baseline)
print("Baseline Model Transitivity:", baseline_transitivity)

Baseline Model Transitivity: 0.0001294552942699474


In [16]:
lcc = baseline.subgraph(max(nx.connected_components(baseline), key=len))
baseline_avg_clustering_coefficient = nx.average_clustering(lcc)
print("Baseline Model Average Clustering Coefficient:", baseline_avg_clustering_coefficient)

Baseline Model Average Clustering Coefficient: 0.0001255106210298623


In [17]:
total = 0
for i in range(10000):
  a = choice(list(lcc.nodes()))
  b = choice(list(lcc.nodes()))
  dist = nx.shortest_path_length(lcc, source=a, target=b, method='dijkstra')
  total += dist
baseline_average_shortest_path_length = total / 10000
print("Baseline Model Average Shortest Path Length:", baseline_average_shortest_path_length)

Baseline Model Average Shortest Path Length: 5.2251


In [18]:
baseline_g = ig.Graph.from_networkx(baseline)
communities = baseline_g.community_infomap()
baseline_c = len(communities)
print("Baseline Model Number of Communities:", baseline_c)

Baseline Model Number of Communities: 4615


In [19]:
lscc = max(nx.connected_components(baseline), key=len)
baseline_lcc_size = len(lscc)
print("Baseline Model Largest Connected Component Size:", baseline_lcc_size)

Baseline Model Largest Connected Component Size: 68296
