In [1]:
!cp "/content/drive/MyDrive/Startup Project/Acquisition Network/commonAcquiree.graphml" .

# Importing Libraries

In [2]:
!pip install python-igraph

Collecting python-igraph
  Downloading python_igraph-0.11.5-py3-none-any.whl (9.1 kB)
Collecting igraph==0.11.5 (from python-igraph)
  Downloading igraph-0.11.5-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.3/3.3 MB[0m [31m13.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting texttable>=1.6.2 (from igraph==0.11.5->python-igraph)
  Downloading texttable-1.7.0-py2.py3-none-any.whl (10 kB)
Installing collected packages: texttable, igraph, python-igraph
Successfully installed igraph-0.11.5 python-igraph-0.11.5 texttable-1.7.0


In [3]:
import networkx as nx
import pandas as pd
import os.path as path
import numpy as np
import networkx.algorithms.community as nx_comm
from random import choice
import igraph as ig

In [4]:
G = nx.read_graphml("commonAcquiree.graphml")

# Calculating Structural Features

In [5]:
n = nx.number_of_nodes(G)
print("Number of Nodes:", n)

Number of Nodes: 4063


In [6]:
m = nx.number_of_edges(G)
print("Number of Edges:", m)

Number of Edges: 3115


In [None]:
density = nx.density(G)
print("Density:", density)

Density: 0.00037748639625068153


In [None]:
transitivity = nx.transitivity(G)
print("Transitivity:", transitivity)

Transitivity: 0.08067614291202459


In [None]:
lcc = G.subgraph(max(nx.connected_components(G), key=len))
avg_clustering_coefficient = nx.average_clustering(lcc)
print("Average Clustering Coefficient:", avg_clustering_coefficient)

Average Clustering Coefficient: 0.10756864052463501


In [None]:
total = 0
for i in range(10000):
  a = choice(list(lcc.nodes()))
  b = choice(list(lcc.nodes()))
  dist = nx.shortest_path_length(lcc, source=a, target=b, method='dijkstra')
  total += dist
average_shortest_path_length = total / 10000
print("Average Shortest Path Length:", average_shortest_path_length)

Average Shortest Path Length: 6.9218


In [None]:
lcc = max(nx.connected_components(G), key=len)
lcc_size = len(lcc)
print("Largest Connected Component Size:", lcc_size)

Largest Connected Component Size: 1098


# Creating Baseline Model

In [7]:
p = 2 * m / (n * (n - 1))
baseline = nx.erdos_renyi_graph(n, p, seed=25, directed=False)

In [8]:
baseline_density = nx.density(baseline)
print("Baseline Model Density:", baseline_density)

Baseline Model Density: 0.00038899882246057387


In [9]:
baseline_transitivity = nx.transitivity(baseline)
print("Baseline Model Transitivity:", baseline_transitivity)

Baseline Model Transitivity: 0.0005896226415094339


In [10]:
lcc = baseline.subgraph(max(nx.connected_components(baseline), key=len))
baseline_avg_clustering_coefficient = nx.average_clustering(lcc)
print("Baseline Model Average Clustering Coefficient:", baseline_avg_clustering_coefficient)

Baseline Model Average Clustering Coefficient: 0.0003216261417728033


In [11]:
total = 0
for i in range(10000):
  a = choice(list(lcc.nodes()))
  b = choice(list(lcc.nodes()))
  dist = nx.shortest_path_length(lcc, source=a, target=b, method='dijkstra')
  total += dist
baseline_average_shortest_path_length = total / 10000
print("Baseline Model Average Shortest Path Length:", baseline_average_shortest_path_length)

Baseline Model Average Shortest Path Length: 15.451


In [12]:
lscc = max(nx.connected_components(baseline), key=len)
baseline_lcc_size = len(lscc)
print("Baseline Model Largest Connected Component Size:", baseline_lcc_size)

Baseline Model Largest Connected Component Size: 2591
