# Milieu

Milieu is a disease protein discovery algorithm based on the hypothesis that proteins associated with the same disease share mutual interactors in the protein-protein interaction network.   

In [138]:
%load_ext autoreload
%autoreload 2

import os

import visJS2jupyter.visJS_module
import networkx as nx

from milieu.data.network import PPINetwork
from milieu.data.associations import load_diseases
from milieu.milieu import MilieuDataset, Milieu
from milieu.figures.network_vis import show_network

#os.chdir("/dfs/scratch0/sabri/milieu")
os.chdir("/Users/sabrieyuboglu/Documents/sabri/research/milieu")

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Load the PPI Network

We use the protein-protein interaction network compiled by Menche *et al.*[1]. The network consists of 342,353 interactions between 21,557 proteins. Se
In `data/networks`, you can find this network `bio-pathways-network.txt`. See methods for a more detailed description of the network. 
You can also find two other protein-protein interaction networks `string-network.txt` and `bio-grid-network.txt`. See Supplementary Note 3 for a detailed description.

In [55]:
network = PPINetwork("data/networks/bio-pathways-network.txt")

## Build the *Milieu* Model

We use params

In [3]:
params = {
    "cuda": False,
    "device": 2,
    
    "batch_size": 200,
    "num_workers": 4,
    "num_epochs": 3,
    
    "optim_class": "Adam",
    "optim_args": {
        "lr": 1,
        "weight_decay": 0
    },
    
    "metric_configs": [
        {
            "name": "recall_at_25",
            "fn": "batch_recall_at", 
            "args": {"k":25}
        }
    ]
}

In [4]:
milieu = Milieu(network, params)

Milieu
Setting parameters...
Building model...
Building optimizer...
Done.


## Train the Model
*Milieu* is trained on a large set of known disease-protein associations. We use

In [5]:
diseases = list(load_diseases("data/associations/disgenet-associations.csv", exclude_splits=["none"]).values())
train_diseases = diseases[:int(len(diseases)* 0.9)]
valid_diseases = diseases[int(len(diseases)* 0.9):]
train_dataset = MilieuDataset(network, diseases=train_diseases)
valid_dataset = MilieuDataset(network, diseases=valid_diseases)

## Predict Novel Associations

In [6]:
cholecystitis_proteins = ['ENG', 'ALDOA', 'GDF2', 'GPI', 'HK1', 'SMAD4','ARSA', 
                          'ABCB4', 'PKLR', 'BPGM', 'TPI1', 'ACVRL1']

In [148]:
predicted_proteins = milieu.discover(genbank_ids=cholecystitis_proteins, top_k=10)
predicted_proteins = list(zip(*predicted_proteins))[0]

In [149]:
show_network(network, cholecystitis_proteins, predicted_proteins, style=style, show_seed_mi=False)

111


Cytoscape(data={'elements': {'nodes': [{'data': {'role': 'seed', 'id': 1443, 'entrez': 2022, 'genbank': 'ENG'}…

In [32]:
from cyjupyter import Cytoscape


In [33]:

minimal_cyjs_network = {
    'elements': {
        'nodes': [
            {'data' : {'id': 'node 1'}},
            {'data' : {'id': 'node 2'}},
            {'data' : {'id': 'node 3'}},
            {'data' : {'id': 'node 4'}}
        ],
        'edges': [
            { 'data': {'id': 'edge1','source': 'node 1','target': 'node 2'}},
            { 'data': {'id': 'edge2','source': 'node 1','target': 'node 3'}},
            { 'data': {'id': 'edge3','source': 'node 2','target': 'node 3'}},
            { 'data': {'id': 'edge4','source': 'node 2','target': 'node 4'}},
            { 'data': {'id': 'edge5','source': 'node 3','target': 'node 4'}}
        ]
    }
}

In [34]:

# Python client for the NDEx database
import ndex2.client as nc

# Unique ID for a network entry in NDEx
uuid ='f28356ce-362d-11e5-8ac5-06603eb7f303'

# NDEx public server URL
ndex_url = 'http://public.ndexbio.org/'

# Create an instance of NDEx client
ndex=nc.Ndex2(ndex_url)

# Download the network in CX format
response=ndex.get_network_as_cx_stream(uuid)
print('Response code from NDEx: ', response.status_code)

# Store the data in a Python object
cx = response.json()

Response code from NDEx:  200


In [35]:
Cytoscape(data=minimal_cyjs_network)


Cytoscape(data={'elements': {'nodes': [{'data': {'id': 'node 1'}}, {'data': {'id': 'node 2'}}, {'data': {'id':…

In [14]:
arnold_proteins = ["FGFR1", "ERF", "MKS1", "POR", "FGFR3", "FGFR2", "NOTCH2", "PTCH1", "ZIC1"]

In [15]:
milieu.discover(genbank_ids=arnold_proteins, top_k=25)

[('PHEX', 0.9999964),
 ('COL3A1', 0.99689615),
 ('ZIC3', 0.9864237),
 ('FGFRL1', 0.9861942),
 ('COL6A1', 0.97802424),
 ('NRP1', 0.9757894),
 ('DUSP3', 0.9686838),
 ('FGFBP1', 0.96764284),
 ('PTCH2', 0.9665226),
 ('ZIC2', 0.96509117),
 ('HSPG2', 0.9603956),
 ('DESI1', 0.950775),
 ('FGF9', 0.94697315),
 ('DLL3', 0.9399293),
 ('CSHL1', 0.9371621),
 ('COL2A1', 0.9358387),
 ('COL1A1', 0.9207403),
 ('STK36', 0.8977488),
 ('PAX2', 0.87458116),
 ('CYB5A', 0.87166786),
 ('FGFR4', 0.85650355),
 ('STIL', 0.85361814)]

In [46]:
import networkx as nx
import visJS2jupyter.visJS_module

In [47]:
G = nx.connected_watts_strogatz_graph(30, 5, 0.2)
nodes = list(G.nodes()) # must cast to list to maintain compatibility between nx 1.11 and 2.0
edges = list(G.edges()) # will return an "EdgeView" object in nx 2.0

In [50]:
# define the initial positions of the nodes using networkx's spring_layout function, and add to the nodes_dict.
pos = nx.spring_layout(G)

nodes_dict = [{"id":n,
              "x":pos[n][0]*300,
              "y":pos[n][1]*300} for n in nodes]

node_map = dict(zip(nodes,range(len(nodes))))  # map to indices for source/target in edges

edges_dict = [{"source":node_map[edges[i][0]], "target":node_map[edges[i][1]], 
              "title":'test'} for i in range(len(edges))]

visJS2jupyter.visJS_module.visjs_network(nodes_dict, edges_dict)

1. Menche, J. et al. Uncovering disease-disease relationships through the incomplete interactome. Science 347, 1257601–1257601 (2015).
2.