### Obtain different biological networks
#### Original PPI networks from STRING

In [1]:

import get_PPI_STRING as string

species = 9606
identity = 'lhlorenzo'

In [2]:
disease = 'AD'

genes_of_interest = string.load_GDAs(f'data/{disease}_GDAs.tsv')
string_ids = string.get_string_ids(genes_of_interest, species, identity)
string.get_PPI_network(string_ids, species, identity, disease)

Unique genes from DisGeNET: 101
Genes in STRING: 91
Interactions between these genes: 230
Proteins interacting at least with another one: 59


In [3]:
disease = 'ND'

genes_of_interest = string.load_GDAs(f'data/{disease}_GDAs.tsv')
string_ids = string.get_string_ids(genes_of_interest, species, identity)
string.get_PPI_network(string_ids, species, identity, disease)

Unique genes from DisGeNET: 231
Genes in STRING: 218
Interactions between these genes: 778
Proteins interacting at least with another one: 160


### NetworkS from other data sources
Using AD gene set

#### BioGRID (PPI)

In [5]:
import requests
import json
import pandas as pd

In [7]:
request_url = "https://webservice.thebiogrid.org" + "/interactions"
access_key = "6a15ac18eadaa8786bc1a88d0ae00171"

infile = open("data/AD_GDAs.txt", "r")
geneList = infile.read().split("\n")
infile.close()

# These parameters can be modified to match any search criteria following
# the rules outlined in the Wiki: https://wiki.thebiogrid.org/doku.php/biogridrest
params = {
    "accesskey": access_key,
    "format": "json",  # Return results in TAB2 format
    "geneList": "|".join(geneList),  # Must be | separated
    "searchNames": "true",  # Search against official names
    'interSpeciesExcluded': 'true', # interactions w/ different species are excluded
    'throughputTag': 'high', 
    'includeHeader': 'true',
    "includeInteractors": "false",  # set to false to get interactions between genes
    "taxId": 9606  # Limit to Homo sapiens
}

r = requests.get(request_url, params=params)
interactions = r.json()

# Create a hash of results by interaction identifier
data = {}
for interaction_id, interaction in interactions.items():
    data[interaction_id] = interaction
    # Add the interaction ID to the interaction record, so we can reference it easier
    data[interaction_id]["INTERACTION_ID"] = interaction_id

# Load the data into a pandas dataframe
dataset = pd.DataFrame.from_dict(data, orient="index")

# Re-order the columns and select only the columns we want to see

columns = [
    "INTERACTION_ID",
    "ENTREZ_GENE_A",
    "ENTREZ_GENE_B",
    "OFFICIAL_SYMBOL_A",
    "OFFICIAL_SYMBOL_B",
    "EXPERIMENTAL_SYSTEM",
    "PUBMED_ID",
    "PUBMED_AUTHOR",
    "THROUGHPUT",
    "QUALIFICATIONS",
]
dataset = dataset[columns]

edgelist = dataset[['OFFICIAL_SYMBOL_A', 'OFFICIAL_SYMBOL_B']]
edgelist.to_csv('data/other_networks/AD_BioGrid_PPI.edgelist', sep='\t', index=False, header=None)

#### HuRI (PPI)
HuRI interactions were downloaded from HuRI webpage and then parsed to obtain the corresponding edgelist.

In [8]:
interactions = pd.read_csv('data/other_networks/HuRI_download_interactions_dec_16_2021_11_54_03.csv', comment='#')
edgelist = interactions[['Interactor A Gene Name', 'Interactor B Gene Name']]
edgelist.to_csv('data/other_networks/AD_HuRI_PPI.edgelist', sep='\t', index=None, header=False)

#### PPT-Ohmnet (PPI, brain-specific)

In [9]:
import networkx as nx
import mygene
import pandas as pd
import matplotlib.pyplot as plt

In [10]:
infile = 'data/other_networks/PPT-Ohmnet_tissues-combined.edgelist'
G = nx.read_edgelist(infile, nodetype=int, data=(('tissue', str),))

tissues_edgelist = pd.read_csv(infile, sep='\t')
brain_specific = tissues_edgelist[tissues_edgelist['tissue'] == 'brain']

brain_specific.to_csv('data/other_networks/PPT-Ohmnet_tissues-brain.edgelist', sep='\t', index=False)
G_brain = nx.read_edgelist('data/other_networks/PPT-Ohmnet_tissues-brain.edgelist', nodetype=int, data=(('tissue', str),))

Genes in PPT-Ohmnet are Entrez IDs, it is necessary to convert them to gene Symbols.

In [11]:
# List of genes to search for
infile = open("data/AD_GDAs.txt", "r")
genes = infile.read().split("\n")
infile.close()
len(genes)

mg = mygene.MyGeneInfo()
out = mg.querymany(genes, scopes='symbol', fields='entrezgene', species='human')

entrezgenes = []
for o in out:
    entrezgenes.append(int(o['entrezgene']))

A_brain_frozen = G_brain.subgraph(entrezgenes)
A_brain = nx.Graph(A_brain_frozen)

querying 1-102...done.
Finished.


In [12]:
# Delete nodes from components with less than 5 nodes
nodes_to_remove = []
for component in list(nx.connected_components(A_brain)):
    if len(component)<5:
        for node in component:
            A_brain.remove_node(node)

# Remove self-loops
A_brain.remove_edges_from(list(nx.selfloop_edges(A_brain)))

nx.write_edgelist(A_brain, 'data/other_networks/AD_SNAP_brain.edgelist')

#### GIANT (PPI, brain-specific)

In [13]:
G_brain = nx.read_edgelist('data/other_networks/brain_C1.dat', nodetype=int, data=(('code', int),))

Genes in GIANT are Entrez IDs, it is necessary to convert them to gene Symbols.

In [14]:
# List of genes to search for
infile = open("data/AD_GDAs.txt", "r")
genes = infile.read().split("\n")
infile.close()

mg = mygene.MyGeneInfo()
out = mg.querymany(genes, scopes='symbol', fields='entrezgene', species='human')

entrezgenes = []
for o in out:
    entrezgenes.append(int(o['entrezgene']))

A_brain_frozen = G_brain.subgraph(entrezgenes)
A_brain = nx.Graph(A_brain_frozen)
len(A_brain.nodes)

querying 1-102...done.
Finished.


45

In [15]:
# Delete nodes from components with less than 5 nodes
nodes_to_remove = []
for component in list(nx.connected_components(A_brain)):
    if len(component)<5:
        for node in component:
            A_brain.remove_node(node)

# Remove self-loops
A_brain.remove_edges_from(list(nx.selfloop_edges(A_brain)))

nx.write_edgelist(A_brain, 'data/other_networks/AD_GIANT_brain.edgelist')