In [1]:
%matplotlib inline

import glob
from pathlib import Path
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
from pyvis.network import Network

In [4]:
# Utilies (Save Graph as File)
def save_graph(graph, file_name):
    # initialze Figure
    nt = Network('800px', '1200px')
    # populates the nodes and edges data structures
    nt.from_nx(graph)
    nt.show(f'{file_name}.html')


def plot_graph(graph, title):
    pos = nx.spring_layout(graph, k=10)  # For better example looking
    nx.draw(graph, pos, with_labels=True)
    plt.title(title)
    plt.show()


def print_graph_analytics(graph, title):
    print(f"{title} - nodes:                {nx.number_of_nodes(graph)}")
    print(f"{title} - edges:                {nx.number_of_edges(graph)}")
    print(f"{title} - density:              {nx.density(graph)}")
    print(f"{title} - avg-clustering:       {nx.algorithms.average_clustering(graph)}")
    print(f"{title} - wiener-index:         {nx.algorithms.wiener_index(graph)}")
    #print(f"{title} - small-world sigma:    {nx.sigma(graph)}")
    #print(f"{title} - small-world omega:    {nx.omega(graph)}")

# The next step is to read all single gene-networks and plot them
> Note: Run this in the online jupyter notebook if its not working

## Alzheimer Disease

In [None]:
path = "data/ad/genes/*.csv"
for fname in glob.glob(path):
    df = pd.read_csv(fname)
    Graphtype = nx.Graph()
    file_name = Path(fname).stem
    G = nx.from_pandas_edgelist(df, source="Source", target="Target",
                                create_using=Graphtype)
    print_graph_analytics(G, file_name)
    plot_graph(G, file_name)

## Schizophrenia

In [None]:
path = "data/schiz/genes/*.csv"
for fname in glob.glob(path):
    df = pd.read_csv(fname)
    Graphtype = nx.Graph()
    file_name = Path(fname).stem
    G = nx.from_pandas_edgelist(df, source="Source", target="Target",
                                create_using=Graphtype)
    print_graph_analytics(G, file_name)
    plot_graph(G, file_name)

# Read CSV Files (AD Network Full and Schizophrenia Network Full)

In [7]:
df_ad = pd.read_csv('data/ad/ad_network_full_with_labels.csv', sep=",")
df_schiz = pd.read_csv('data/schiz/schiz_network_full_with_labels.csv', sep=",")

In [3]:
df_all = pd.read_csv('data/all_protein_combined.csv', sep="\s+")

In [5]:
Graphtype = nx.Graph()
G = nx.from_pandas_edgelist(df_all, source="Source", target="Target",
                            create_using=Graphtype, edge_attr=True)
# save_graph(G, file_name="ad_network_graph")
print_graph_analytics(G, "full_homo_sapiens_network")

full_homo_sapiens_network - nodes:                19385
full_homo_sapiens_network - edges:                5969249
full_homo_sapiens_network - density:              0.03177170229714356


KeyboardInterrupt: 

# Create networkX Graph from Pandas Edgelist (AD Network)
> Note: This will take some time

In [None]:
Graphtype = nx.Graph()
G = nx.from_pandas_edgelist(df_ad, source="Source", target="Target",
                            create_using=Graphtype, edge_attr=True)
# save_graph(G, file_name="ad_network_graph")
print_graph_analytics(G, "ad_network_graph")
# plot_graph(G, "alzheimer disease")

ad_network_graph - nodes:                6089
ad_network_graph - edges:                11310
ad_network_graph - density:              0.0006101996901415685


# Create networkX Graph from Pandas Edgelist (Schizophrenia Network)
> Note: This will take some time

In [14]:
Graphtype = nx.Graph()
G = nx.from_pandas_edgelist(df_schiz, source="Source", target="Target",
                            create_using=Graphtype, edge_attr=True)
# save_graph(G, file_name="schiz_network_graph")
print_graph_analytics(G, "schiz_network_graph")
# plot_graph(G, "schizophrenia network full")