In [None]:
import networkx as nx
import matplotlib.pyplot as plt
import numpy as np

from pyrheadb.RheaDB import RheaDB
from pyrheadb.ReactionNetwork import ReactionNetwork

# Build reaction network

In [None]:
rdb = RheaDB()
rxnnet = ReactionNetwork()

rxnnet.build_network_from_long_format_table(rdb.rhea_reaction_long_format_smiles_chebi)

# Analysis
## 1. Analyse basic properties of the network

NB: 

[Average degree](https://networkx.org/documentation/stable/reference/classes/generated/networkx.Graph.degree.html) - The node degree is the number of edges adjacent to the node.

[Graph density](https://networkx.org/documentation/stable/reference/generated/networkx.classes.function.density.html) - The density is 0 for a graph without edges and 1 for a complete graph. The density of multigraphs can be higher than 1.

In [None]:
graph_properties = rxnnet.analyze_basic_properties()
for key, value in graph_properties.items():
    print(key, ":", value)

## 2. How many disconnected subgraphs the network has

Identify and analyze connected components within the network.
This is particularly useful for understanding isolated subnetworks within your reaction network.

In [None]:
component_properties = rxnnet.analyze_components()
for key, value in component_properties.items():
    print(key, ":", value)

## 3. Plot component sizes (without largest component)
Largest component is not plotted since it is in different order of size, and the rest of the components would be dwarfed.

In [None]:
components_size = [len(c) for c in sorted(nx.connected_components(rxnnet.participants_graph), key=len, reverse=True)]
sizes = list(set(components_size))
sizes.sort(reverse=True)

count_subgraphs=[]
for s in sizes:
    count_subgraphs.append(components_size.count(s))

In [None]:
%matplotlib inline

In [None]:
plt.bar(sizes[1:], count_subgraphs[1:])
plt.xlabel('Compounds per subgraph')
plt.ylabel('Number of subgraphs')
plt.show()

## 4. Analyse centrality (~7 min, MacBook Pro, 2.6 GHz 6-Core Intel Core i7)

Centrality Measures. Computes centrality measures for nodes in the participants graph.

Compute various centrality measures to find the most influential nodes (compounds) in your network.

Measures like degree centrality, betweenness centrality, and closeness centrality can be useful.

In [None]:
centrality_properties = rxnnet.analyze_centrality()

In [None]:
for key, value in centrality_properties.items():
    print('Mean', key, ":", np.mean(list(value.values())))
    print('Median', key, ":", np.median(list(value.values())))
    print('Max', key, ":", max(list(value.values())))

## 5. Analyse clustering

Clustering Coefficient. Analyzes the clustering coefficient of the participants graph.

This analysis provides insights into the clustering behavior of nodes, indicating how nodes tend to cluster together.

In [None]:
clustering_properties = rxnnet.analyze_clustering()
print('Average Clustering:', clustering_properties['Average Clustering'])
print('Max 20 Clustering:', clustering_properties['Max 20 Clustering'])

Plot the clustering coefficients and see, that the majority of nodes are not highly clustered

In [None]:
plt.hist(clustering_properties['Clustering Coefficients'].values())
plt.xlabel('Clustering Coefficient')
plt.ylabel('Number of compounds')
plt.show()

## 6. Analyse shortest path (~ 1 min, MacBook Pro, 2.6 GHz 6-Core Intel Core i7)

Examine shortest paths within the network to understand the connectivity and distance between different compounds.

In [None]:
all_shortest_paths = rxnnet.analyze_shortest_paths()
path_lengths = [max(list(path.values())) for path in all_shortest_paths.values()]
print('Max compound to compound path in the network:', max(path_lengths))