In [15]:
import community
import statistics
import networkx as nx
from rdflib import Graph
from rdflib.extras.external_graph_libs import rdflib_to_networkx_graph

In [2]:
file_path = "datasets/1/curso-sf-dump.rdf"

In [3]:
graph = Graph()
graph.parse(file_path)

<Graph identifier=Nd46ff71f6aff4812b4852817865e1281 (<class 'rdflib.graph.Graph'>)>

In [4]:
nx_graph = rdflib_to_networkx_graph(graph)
print("networkx Graph loaded successfully with length {}".format(len(nx_graph)))

networkx Graph loaded successfully with length 26780


## Overall graph structure metrics

### Degree distribution of the network

This metric measures the distribution of degrees across all nodes in the graph. A skewed degree distribution may indicate the presence of hub or isolated nodes, which can affect the overall structure of the graph.

In [5]:
# calculate the degree distribution
degree_dist = nx.degree_histogram(nx_graph)

In [6]:
degree_dist

[0,
 13876,
 5410,
 182,
 148,
 81,
 76,
 60,
 35,
 24,
 12,
 23,
 22,
 15,
 440,
 1248,
 3696,
 959,
 188,
 6,
 6,
 5,
 7,
 5,
 7,
 4,
 6,
 2,
 2,
 1,
 1,
 5,
 0,
 5,
 0,
 2,
 5,
 2,
 2,
 0,
 0,
 2,
 0,
 3,
 2,
 1,
 3,
 2,
 3,
 3,
 5,
 4,
 3,
 2,
 4,
 4,
 3,
 2,
 1,
 3,
 2,
 3,
 5,
 0,
 0,
 1,
 1,
 4,
 0,
 4,
 2,
 1,
 2,
 3,
 0,
 0,
 0,
 2,
 1,
 3,
 2,
 1,
 4,
 2,
 0,
 3,
 5,
 1,
 0,
 0,
 3,
 2,
 2,
 1,
 0,
 0,
 1,
 2,
 3,
 0,
 4,
 0,
 0,
 2,
 2,
 1,
 1,
 1,
 0,
 0,
 3,
 3,
 3,
 1,
 0,
 0,
 1,
 0,
 2,
 1,
 2,
 2,
 0,
 2,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 2,
 2,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 2,
 1,
 0,
 0,
 0,
 0,
 2,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 1,
 2,
 0,
 0,
 0,
 0,
 0,
 0,
 2,
 0,
 0,
 0,
 0,
 0,
 2,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,


### Clustering coefficient

This metric measures the extent to which a node's neighbors are connected to each other. A higher clustering coefficient indicates that the node's neighbors are more densely connected.

In [9]:
clustering_coefficient = nx.average_clustering(nx_graph)
print("Clustering coefficient:", clustering_coefficient)

Clustering coefficient: 0.0


### Calculate communities for the graph

The Louvain community is a widely used community detection algorithm that aims to partition a graph into groups of nodes that are highly interconnected within the group and sparsely connected between groups.

I want to calculate the number of communities of the graph, the size of the smallest, the average size and the size of the largest community

In [13]:
community_size = list()

partition = community.best_partition(nx_graph)

for community_id in set(partition.values()):
    nodes = [n for n in partition.keys() if partition[n] == community_id]
    community_size.append(len(nodes))

In [18]:
print(f"MIN: {min(community_size)}")
print(f"AVG: {statistics.mean(community_size):.2f}")
print(f"MAX: {max(community_size)}")

MIN: 23
AVG: 704.74
MAX: 2554


### Graph density

This metric measures the proportion of edges in the graph compared to the total number of possible edges. 

A higher graph density indicates a more densely connected graph, which can influence the overall structure of the graph.

The graph density is defined as the ratio of the number of edges in the graph to the total number of possible edges. For an undirected graph with n nodes, the total number of possible edges is `(n*(n-1))/2`. 

A higher density indicates that the graph has more edges relative to its size and is more densely connected.

In [None]:
density = nx.density(nx_graph)

print("Graph density:", density)

### Assortativity coefficient

The assortativity coefficient measures the extent to which nodes with similar degrees are connected to each other. 

A positive assortativity coefficient indicates that high-degree nodes tend to be connected to other high-degree nodes, while a negative assortativity coefficient indicates that high-degree nodes tend to be connected to low-degree nodes. 

A value of 0 indicates no assortativity.

In [12]:
assortativity = nx.degree_assortativity_coefficient(nx_graph)

print("Assortativity coefficient:", assortativity)

Assortativity coefficient: -0.18346691579346272


## Metrics related to the nodes

### Top 10 central nodes

In [None]:
# calculate degree centrality
degree_centrality = nx.degree_centrality(nx_graph)

# find the top 10 most central nodes
most_central = sorted(degree_centrality, key=degree_centrality.get, reverse=True)[:10]

In [None]:
most_central