**Computational Health Laboratory Project, A.Y. 2021/2022**

**Authors:** Niko Dalla Noce, Alessandro Ristori, Andrea Zuppolini

**Project:** Starting fron one or more genes, extract from interaction databases the genes they interact with. Using the expanded gene set, perform pathway analysis and obtain all disease pathways in which the genes appear. Merge the pathways to obtain a larger graph. Perform further network analysis to extract central biomarkers and communities beyond pathways. Compute a distance between the initial gene set and the various pathways (diseases).

# **CHL Project, Plots**

## **Colab setup**
Takes care of the project setup on Colab.

In [1]:
if 'google.colab' in str(get_ipython()):
    import subprocess
    out_clone = subprocess.run(["git", "clone", "https://github.com/nikodallanoce/ComputationalHealthLaboratory"], text=True, capture_output=True)
    print("{0}{1}".format(out_clone.stdout, out_clone.stderr))
    %cd ComputationalHealthLaboratory

## **Draw the protein, disease and community graphs**
Plot the graphs to show the results of our work.

In [2]:
import pandas as pd
import seaborn as sn
import numpy as np
import networkx as nx
from pyvis.network import Network

We assume that you have already run **0_Pathway_Enrichment**, **1_Network_Analysis** and **2_Community_Analysis** and, therefore, all the datasets needed here are available. If so, then load everything.

In [3]:
df_diseases = pd.read_csv("datasets/diseases_pathways.csv", sep=",", index_col=0)
protein_graph = nx.read_gpickle("datasets/protein_graph.gpickle")
df_communities = pd.read_csv("datasets/communities.csv")
df_biomarkers = pd.read_csv("datasets/biomarkers.csv", index_col=0)
communities = [set(community) for i, community in df_communities.iterrows()]
diseases = dict()
for i, disease in df_diseases.iterrows():
    disease_genes = disease['Genes'].split(";")
    term = disease['Term']
    diseases[i] = {"name": term, "genes": disease_genes}

Methods used to build the pyvis network.

In [4]:
def __build_network_disease__(protein_graph: nx.Graph, genes_disease: list = None, protein: str = None, plot_other_edges: bool = True) -> Network:
    net = Network(width=1080, height=720)
    node_index = dict()
    for i, node in enumerate(protein_graph.nodes()):
        shape = "dot"
        size = 8
        color = "grey"
        node_index[node] = i
        if not genes_disease is None and node in genes_disease:
            color = "orange"
            size = 16
            shape = "diamond"

        if node == protein:
            color = "slateblue"
            size = 32

        net.add_node(i, label=node, size=size, color=color, shape=shape)

    for edge_from, edge_to in protein_graph.edges():
        if not genes_disease is None and ((edge_from == protein and edge_to in genes_disease) or (edge_from in genes_disease and edge_to == protein)) and protein not in genes_disease:
            net.add_edge(node_index[edge_from], node_index[edge_to], color="blue", value=1)
        elif not genes_disease is None and edge_from in genes_disease and edge_to in genes_disease:
            net.add_edge(node_index[edge_from], node_index[edge_to], color="orangered", value=1)
        elif plot_other_edges:
            net.add_edge(node_index[edge_from], node_index[edge_to], color="grey")

    return net


def __build_network_protein__(protein_graph: nx.Graph, protein: str = None, plot_other_edges: bool = True) -> Network:
    net = Network(width=1080, height=720)
    node_index = dict()
    for i, node in enumerate(protein_graph.nodes()):
        node_index[node] = i
        if node != protein:
            net.add_node(i, label=node, size=8)
        else:
            net.add_node(i, label=node, size=16, color="slateblue")

    for edge_from, edge_to in protein_graph.edges():
        if edge_from == protein or edge_to == protein:
            net.add_edge(node_index[edge_from], node_index[edge_to], color="blue", value=1)
        elif plot_other_edges:
            net.add_edge(node_index[edge_from], node_index[edge_to], color="grey")

    return net

Plot the protein graph, you can also plot a disease’s genes in the same graph. If a node is red, then it belongs to the disease.

In [5]:
def plot_protein_network(protein_graph: nx.Graph, genes_disease: list = None, biomarkers: list = None, protein: str = None, plot_other_edges: bool = True) -> None:
    if biomarkers is not None:
        plot_graph = protein_graph.subgraph(biomarkers)
    else:
        plot_graph = protein_graph.copy()

    net = __build_network_disease__(plot_graph, genes_disease, protein, plot_other_edges)
    net.toggle_drag_nodes(False)
    net.show_buttons(['physics'])
    net.force_atlas_2based(spring_strength=0.02)
    net.show("protein_graph.html")

In [6]:
plot_protein_network(protein_graph, biomarkers=df_biomarkers.index, protein="SON")

Plot the disease graph, you can also plot a disease’s genes on the same plot, the latters will be colored in red.

In [7]:
def plot_disease_network(protein_graph: nx.Graph, genes_disease: list, protein: str = None) -> None:
    sub_graph = protein_graph.subgraph(genes_disease)
    net = __build_network_protein__(sub_graph, protein)
    net.toggle_drag_nodes(False)
    net.show_buttons(['physics'])
    net.force_atlas_2based(spring_strength=0.02)
    net.show("disease_graph.html")

In [8]:
plot_disease_network(protein_graph, diseases[5]["genes"], "SON")

Plot a community graph and color the chosen protein, if any is passed, and the edges linked to it.

In [9]:
def look_for_gene_community(protein: str, communities: list) -> int:
    for i, community in enumerate(communities):
        if protein in community:
            return i

    return -1


def plot_community_protein(protein_graph: nx.Graph, communities: list, protein: str = None, plot_other_edges: bool = True) -> None:
    if protein is not None:
        community = communities[look_for_gene_community(protein, communities)]
    else:
        community = np.random.randint(0, len(communities))

    sub_graph = protein_graph.subgraph(community)
    net = __build_network_protein__(sub_graph, protein, plot_other_edges)
    net.toggle_drag_nodes(False)
    net.show_buttons(['physics'])
    net.force_atlas_2based(spring_strength=0.02)
    net.show("community_protein_graph.html")

In [10]:
plot_community_protein(protein_graph, communities, "SON")

Plot a community and color the disease’s genes passed to the method.

In [11]:
def plot_community_disease(protein_graph: nx.Graph, genes_disease: list, community: set, protein: str = None, plot_other_edges: bool = True) -> None:
    sub_graph = protein_graph.subgraph(community)
    net = __build_network_disease__(sub_graph, genes_disease, protein, plot_other_edges)
    net.toggle_drag_nodes(False)
    net.show_buttons(['physics'])
    net.force_atlas_2based(spring_strength=0.02)
    net.show("community_disease_graph.html")

In [None]:
plot_community_disease(protein_graph, diseases[228]["genes"], communities[look_for_gene_community("SON", communities)], "SON", True)

# Chosen disease pathway to plot
# SON -> Intellectual disability: 5, Undergrowth: 102, Strabismus: 16
# No SON -> Small head: 1, Epilepsy: 228, Hyperreflexia: 118"

Plot all the communities.

In [None]:
def plot_communities(protein_graph: nx.Graph, communities: list, protein: str = None) -> None:
    palette = sn.color_palette("tab10", len(communities)).as_hex()
    net = Network(width=1080, height=720)
    node_index = dict()
    index = 0
    gene_community = look_for_gene_community(protein, communities)
    if gene_community == -1:
        raise Warning("Protein {0} not found, no community will be highlighted with gold".format(protein))

    for i in range(len(communities)):
        community = communities[i]
        color = palette[i]
        sub_graph = protein_graph.subgraph(community)
        for _, node in enumerate(sub_graph.nodes()):
            node_index[node] = index
            if node == protein:
                net.add_node(index, label=node, size=32)
            else:
                net.add_node(index, label=node, size=8)

            index += 1

        for edge_from, edge_to in sub_graph.edges():
            net.add_edge(node_index[edge_from], node_index[edge_to], color=color)

    net.show_buttons(['physics'])
    net.force_atlas_2based(spring_strength=0.02)
    net.show("communities.html")

In [None]:
plot_communities(protein_graph, communities, "SON")

Retrieve all the genes, inside a community and belonging to a disease pathway, that interact with the starting gene.

In [None]:
def disease_genes_community_interacting_with(community_graph: nx.Graph, genes_disease: list = None, protein: str = None) -> list:
    """
    Retrieve all the genes that belongs to a disease pathway that interact with the passed protein
    :param community_graph: subgraph of the protein-to-protein network, represents the community in which the protein was found, we advise to use the look_for_gene_community() method to find such community
    :param genes_disease: genes that belong to a disease pathway
    :param protein: name of the protein
    :return: list of genes, which lie inside the community, that interact with the protein and belong to the disease pathway
    """
    genes_interacting = list()
    for edge_from, edge_to in community_graph.edges():
        if (edge_from in genes_disease or edge_to in genes_disease) and (edge_from == protein or edge_to == protein):
            if not edge_from == protein:
                genes_interacting.append(edge_from)
            else:
                genes_interacting.append(edge_to)

    return genes_interacting

In [None]:
community = communities[look_for_gene_community("SON", communities)]
gene_interactions_disease = disease_genes_community_interacting_with(protein_graph.subgraph(community), diseases[228]["genes"], "SON")
gene_interactions_disease