# Different Measures Computation

## Graph declaration

In [1]:
import networkx as nx

# retrieve the graph from the text file
my_graph = nx.read_edgelist("../data/dataset.txt", create_using=nx.Graph(), nodetype=int)

## Graph Properties

### Number of Nodes & Edges

In [2]:
# calculate the number of nodes
number_of_nodes = my_graph.number_of_nodes()
print("Number of nodes: ", number_of_nodes)
# calculate the number of links
number_of_links = my_graph.number_of_edges()
print("Number of edges: ", number_of_links)

Number of nodes:  81306
Number of edges:  1342310


## Plotting the graph using Gephi

In [3]:
# Using Gephi to plot the graph
nx.write_gexf(my_graph, "../data/dataset.gexf")

## Giant Component

In [4]:
import networkx as nx


def get_giant_component_info(graph):
    giant_component = max(nx.connected_components(graph), key=len)
    giant_component_graph = graph.subgraph(giant_component)
    return giant_component, giant_component_graph

### Giant Component Info

In [5]:
giant_component, giant_component_graph = get_giant_component_info(my_graph)
print("Giant Component - Number of nodes: ", giant_component_graph.number_of_nodes())
print("Giant Component - Number of edges: ", giant_component_graph.number_of_edges())

Giant Component - Number of nodes:  81306
Giant Component - Number of edges:  1342310


## Transformation of the graph into a smaller one
from this section we will transform the graph into a smaller one by taking a sample of the graph.

In [6]:
# We can make our graph smaller by taking the sample from the graph
import random

# Convert the set of nodes in the graph to a list
graph_nodes_list = list(my_graph.nodes())

new_graph_size = 20000  # Change this to the number of nodes you want in your sample

# Choose a random sample of nodes from the graph
new_graph_nodes = random.sample(graph_nodes_list, new_graph_size)

# Create a subgraph from the sampled nodes
new_graph = my_graph.subgraph(new_graph_nodes)
# show the number of nodes
print("New Graph - Number of nodes: ", new_graph.number_of_nodes())
print("New Graph - Number of edges: ", new_graph.number_of_edges())

New Graph - Number of nodes:  20000
New Graph - Number of edges:  80535


In [7]:
nx.write_gexf(new_graph, "../data/new_graph.gexf")

In [8]:
# Find the giant component
new_giant_component, new_giant_component_graph = get_giant_component_info(new_graph)
print("Giant Component - Number of nodes: ", new_giant_component_graph.number_of_nodes())
print("Giant Component - Number of edges: ", new_giant_component_graph.number_of_edges())

Giant Component - Number of nodes:  16330
Giant Component - Number of edges:  79586


In [9]:
# Using Gephi to plot the graph
nx.write_gexf(new_giant_component_graph, "../data/new_giant_component_graph.gexf")

In [10]:
# calculate the average degree
average_degree = sum(
    dict(new_giant_component_graph.degree()).values()) / new_giant_component_graph.number_of_nodes()
print("Average Degree: ", average_degree)

Average Degree:  9.747213717085119


In [11]:
import random


def approximate_diameter(G, approximate_size=100):
    # Select a random sample of nodes
    nodes = list(G.nodes())
    sample_chosen_nodes = random.sample(nodes, min(approximate_size, len(nodes)))

    # Calculate the shortest paths from the sampled nodes
    shortest_paths = nx.multi_source_dijkstra_path_length(G, sample_chosen_nodes)

    # The approximate diameter is the maximum shortest path length
    return max(shortest_paths.values())


# Use the function to calculate the approximate diameter
approx_diameter = approximate_diameter(giant_component_graph)
print("Approximate Diameter: ", approx_diameter)

Approximate Diameter:  4


## Centrality Measures

### Closeness

In [12]:
# calculate the average closeness for a random sample of giant component
sample_size = 100
closeness_values = []
for i in range(sample_size):
    closeness_values.append(
        nx.closeness_centrality(giant_component_graph, u=random.choice(list(giant_component_graph.nodes()))))
print("Average Closeness Centrality: ", sum(closeness_values) / sample_size)

Average Closeness Centrality:  0.2593564187094184


### Betweenness

In [13]:
# calculate the average betweenness for a random sample of giant component
sample_size = min(100, len(giant_component_graph.nodes()))
betweenness_values = []
for i in range(sample_size):
    node = random.choice(list(giant_component_graph.nodes()))
    betweenness_values.append(nx.betweenness_centrality(giant_component_graph, k=1)[node])
print("Average Betweenness Centrality: ", sum(betweenness_values) / sample_size)

Average Betweenness Centrality:  1.847376660445129e-05


## Clustering Coefficient

In [14]:
clustering_coefficient = nx.average_clustering(new_giant_component_graph)
print("Clustering Coefficient: ", clustering_coefficient)

Clustering Coefficient:  0.44847796701795783


## Transitivity

In [15]:
transitivity = nx.transitivity(new_giant_component_graph)
print("Transitivity: ", transitivity)

Transitivity:  0.16827397141027697
