In [1]:
import networkx as nx
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from scipy import stats

# Functions

In [2]:
def create_graph(file_path):
    '''
    Reads file with an edgelist and creates NetworkX graph from it.

    Parameters:
        file_path (str): Path to the file containing the edge list.

    Returns:
        G (networkx.Graph): Graph created from the edgelist.
    '''
    G = nx.Graph() # Create an empty graph object
    with open(file_path, 'r') as file: # Open the file containing the edgelist
        for line in file:
            node1, node2 = line.strip().split('\t') # Split ech line by the tab into two nodes
            G.add_edge(node1, node2) # Add edges to the graph
    return G

## Centrality measurements

In [3]:
def degree_centrality(graph):
    '''
    Calcualtes nodes with the highest degree centrality measures for a given graph.

    Parameters:
        graph (networkx.Graph): Graph for which the nodes with the highest degree centrality are determined.
    '''
        
    # Calculate degree centrality
    degree_centrality = nx.degree_centrality(graph)
    
    # Print top 5 most central nodes (diseases)
    top_5_degree = sorted(degree_centrality.items(), key=lambda x: x[1], reverse=True)[:5]
    print("Top 5 most central diseases by degree centrality:", top_5_degree)

In [4]:
def betweenness_centrality(graph):
    '''
    Calcualtes nodes with the highest betweenness centrality measures for a given graph.

    Parameters:
        graph (networkx.Graph): Graph for which the nodes with the highest betweenness centrality are determined.
    '''
        
    # Calculate betweenness centrality
    betweenness_centrality = nx.betweenness_centrality(graph)
    
    # Print top 5 most central nodes by betweenness centrality
    top_5_betweenness = sorted(betweenness_centrality.items(), key=lambda x: x[1], reverse=True)[:5]
    print("Top 5 most central diseases by betweenness centrality:", top_5_betweenness)

In [5]:
def closeness_centrality(graph):
    '''
    Calcualtes nodes with the highest closeness centrality measures for a given graph.

    Parameters:
        graph (networkx.Graph): Graph for which the nodes with the highest closeness centrality are determined.
    '''
        
    # Calculate closeness centrality
    closeness_centrality = nx.closeness_centrality(graph)
    
    # Print top 5 most central nodes by closeness centrality
    top_5_closeness = sorted(closeness_centrality.items(), key=lambda x: x[1], reverse=True)[:5]
    print("Top 5 most central diseases by closeness centrality:", top_5_closeness)

In [6]:
def eigenvector_centrality(graph):
    '''
    Calcualtes nodes with the highest eigenvector centrality measures for a given graph.

    Parameters:
        graph (networkx.Graph): Graph for which the nodes with the highest eigenvector centrality are determined.
    '''

    # Calculate eigenvector centrality
    eigenvector_centrality = nx.eigenvector_centrality(graph)
    
    # Print top 5 most central nodes by eigenvector centrality
    top_5_eigenvector = sorted(eigenvector_centrality.items(), key=lambda x: x[1], reverse=True)[:5]
    print("Top 5 most central diseases by eigenvector centrality:", top_5_eigenvector)

In [7]:
def all(G):
    '''
    Calcualtes nodes with the highest centrality measures for a given graph.

    Parameters:
        G (networkx.Graph): Graph for which the central nodes are determined.
    '''
    degree_centrality(G)
    betweenness_centrality(G)
    closeness_centrality(G)
    eigenvector_centrality(G)

# Analysis

In [8]:
networks = ['disease_on_genes', 'disease_on_symptoms', 'disease_on_drugs']

for layer in networks:
    print(layer)
    spars_path = f'../00_Data/Sparsified_networks/{layer}.tsv'
    G = create_graph(spars_path)
    all(G)
    print()

disease_on_genes
Top 5 most central diseases by degree centrality: [('Seizure', 0.1976362442547603), ('Epilepsy', 0.18877216021011162), ('Schizophrenia', 0.14215364412344056), ('Nystagmus', 0.12672357189757058), ('Cryptorchidism', 0.11556139198949442)]
Top 5 most central diseases by betweenness centrality: [('Seizure', 0.047767817841415525), ('Epilepsy', 0.04597507495898787), ('Schizophrenia', 0.040577255094925366), ('Nystagmus', 0.03985375589749492), ('Cryptorchidism', 0.025404255800147116)]
Top 5 most central diseases by closeness centrality: [('Epilepsy', 0.44512455762339814), ('Seizure', 0.4438893115444563), ('Schizophrenia', 0.4400106670746504), ('Neoplasm_of_stomach', 0.4394418103622074), ('Nystagmus', 0.4361292578221876)]
Top 5 most central diseases by eigenvector centrality: [('Seizure', 0.12851892416216196), ('Epilepsy', 0.11954773607701524), ('Schizophrenia', 0.10690364093222653), ('Depression', 0.09463092084088719), ('Hypertensive_disorder', 0.09243261968751389)]

disease_on