In [1]:
import requests
import gzip
import pandas as pd
import networkx as nx
import numpy as np

%matplotlib inline
import seaborn as sns

import sys
sys.path = ['/cellar/users/mikeyu/DeepTranslate/ddot'] + sys.path

import ddot
from ddot import Ontology, get_gene_name_converter, parse_gaf, parse_obo, nx_to_NdexGraph, read_term_descriptions, ndex_to_sim_matrix
from ddot import expand_seed, melt_square, make_index, align_hierarchies, update_nx_with_alignment, parse_ndex_uuid, NdexGraph_to_nx, bubble_layout_nx, ddot_pipeline




# Setup parameters

In [2]:
alpha = 0.1
beta = 0.5

In [3]:
seed = ['FANCA','FANCB', 'FANCC', 'BRCA2', 'FANCD2', 'FANCE',
        'FANCF', 'FANCG', 'FANCI', 'BRIP1', 'FANCL', 'FANCM', 'PALB2', 'RAD51C', 'SLX4', 'ERCC4', 'RAD51',
        'BRCA1', 'UBE2T', 'XRCC2']

# Download GO from NDEx

In [4]:
go_human = Ontology.from_ndex(ddot.config.GO_HUMAN_URL)
print go_human.summary()

In [4]:
# go_human.to_pickle('/cellar/users/mikeyu/go.pkl')
go_human = Ontology.read_pickle('/cellar/users/mikeyu/go.pkl')

# Download gene similarity network from NDEx

In [5]:
# sim, sim_names = ndex_to_sim_matrix(
#     ndex_uuid='d2dfa5cc-56de-11e7-a2e2-0660b7976219',
#     similarity='similarity',
#     input_fmt='cx_matrix',
#     output_fmt='matrix',
#     subset=None)

In [6]:
%time tmp = np.load('/cellar/users/mikeyu/DeepTranslate/hnexo/RFv2r3_square.npz')
rf, rf_genes = tmp['rf'], tmp['genes']
np.fill_diagonal(rf, 0)
rf[np.isnan(rf)] = 0
sim, sim_names = rf, rf_genes

CPU times: user 0 ns, sys: 0 ns, total: 0 ns
Wall time: 720 ms


# Download gene-drug targets

In [7]:
pharos, pharos_nodes = ndex_to_sim_matrix(
    parse_ndex_uuid(ddot.config.PHAROS_URL),
    similarity=None,
    input_fmt='cx',
    output_fmt='sparse')
pharos.rename(columns={'Node1' : 'gene', 'Node2' : 'drug'}, inplace=True)
gene_drug = pharos.groupby('gene').agg({'drug' : lambda x : ','.join(x)})
gene_drug['has_drug'] = True




# Run Ontology pipeline

In [8]:
ont, url, ont_ndexgraph = ddot_pipeline(
      alpha,
      beta,
      sim,
      sim_names,
      seed,
      go_human,
      node_attr=gene_drug,
      name='Fanconi Anemia Gene Ontology (FanGO)',
      expand_kwargs={'seed_perc':0, 'agg':'mean', 'figure':False},
      align_kwargs={'iterations':100, 'threads': 40},
      verbose=True
)
print url

min_sim: 5.20081410586
Expanded gene set: 246
temp output: /tmp/tmpJ118Mb
temp graph: /tmp/tmp7T7KTZ
temp output log: /tmp/tmpL6SEKs
/tmp/tmp7T7KTZ	0.1	0.5	-10000000 2017-08-15 19:53:28.585888
CLIXO command: /cellar/users/mikeyu/DeepTranslate/ddot/ddot/mhk7-clixo_0.3-cec3674/clixo /tmp/tmp7T7KTZ 0.1 0.5 | awk '{if ( $1 ~ /^#/ ) {print "\#", strftime("%Y-%m-%d %H:%M:%S"), $0 ; fflush() } else {print $0}}' | tee /tmp/tmpL6SEKs
Extracting by grep -v # 2017-08-15 19:53:30.768520
Elapsed time (sec): 2.21404886246 2017-08-15 19:53:30.927493
Ontology: 246 genes, 38 terms, 290 gene-term relations, 38 term-term relations
node_attributes: 
edge_attributes: CLIXO_score
collapse command: /cellar/users/mikeyu/alignOntology/collapseRedundantNodes /tmp/tmpypIRwj
collapse command: /cellar/users/mikeyu/alignOntology/collapseRedundantNodes /tmp/tmp6U2Eb2
Alignment command: /cellar/users/mikeyu/DeepTranslate/ddot/ddot/alignOntology/calculateFDRs /tmp/tmpS5Nm7I /tmp/tmp2opGPc 0.05 criss_cross /tmp/tmpZyVW