In [1]:
import requests
import gzip
import pandas as pd
import networkx as nx
import numpy as np

%matplotlib inline
import seaborn as sns

import sys
sys.path = ['/cellar/users/mikeyu/DeepTranslate/ddot'] + sys.path

import ddot
from ddot import Ontology, get_gene_name_converter, parse_gaf, parse_obo, nx_to_NdexGraph, read_term_descriptions, ndex_to_sim_matrix
from ddot import expand_seed, melt_square, make_index, align_hierarchies, update_nx_with_alignment, parse_ndex_uuid, NdexGraph_to_nx, bubble_layout_nx, ddot_pipeline

# Setup parameters

In [2]:
alpha = 0.1
beta = 0.5

In [3]:
seed = ['FANCA','FANCB', 'FANCC', 'BRCA2', 'FANCD2', 'FANCE',
        'FANCF', 'FANCG', 'FANCI', 'BRIP1', 'FANCL', 'FANCM', 'PALB2', 'RAD51C', 'SLX4', 'ERCC4', 'RAD51',
        'BRCA1', 'UBE2T', 'XRCC2']

# Download GO from NDEx

In [4]:
# go_human = Ontology.from_ndex(ddot.config.GO_HUMAN_URL, ndex_server, ndex_user, ndex_pass)
# go_human.summary()

# go_human.to_3col_table('/cellar/users/mikeyu/go.txt')

In [4]:
go_human = Ontology.from_table('/cellar/users/mikeyu/go.txt')

In [5]:
term_descriptions = read_term_descriptions('/cellar/users/mikeyu/DeepTranslate/go_human_7jun2017/goID_2_name.tab')
go_human.update_node_attr(term_descriptions.to_frame())

# Download gene similarity network from NDEx

In [6]:
# sim, sim_names = ndex_to_sim_matrix(
#     ndex_uuid='d2dfa5cc-56de-11e7-a2e2-0660b7976219',
#     similarity='similarity',
#     input_fmt='cx_matrix',
#     output_fmt='matrix',
#     subset=None)

In [7]:
%time tmp = np.load('/cellar/users/mikeyu/DeepTranslate/hnexo/RFv2r3_square.npz')
rf, rf_genes = tmp['rf'], tmp['genes']
np.fill_diagonal(rf, 0)
rf[np.isnan(rf)] = 0
sim, sim_names = rf, rf_genes

CPU times: user 4 ms, sys: 0 ns, total: 4 ms
Wall time: 1.75 ms


In [8]:
pharos, pharos_nodes = ndex_to_sim_matrix(
    parse_ndex_uuid(ddot.config.PHAROS_URL),
    similarity=None,
    input_fmt='cx',
    output_fmt='sparse')
assert (pharos_nodes.loc[pharos['Node1'], 'Type'] == 'Gene').all()
assert (pharos_nodes.loc[pharos['Node2'], 'Type'] == 'Drug').all()
pharos.rename(columns={'Node1' : 'gene', 'Node2' : 'drug'}, inplace=True)
gene_drug = pharos.groupby('gene').agg({'drug' : lambda x : ','.join(x)})
gene_drug['has_drug'] = True

In [9]:
# sim_flat = sim[np.triu_indices(sim.shape[0], k=1)]
# min_sim = np.percentile(sim_flat, 98)
# print min_sim

# 99th percentile
min_sim = 5.8080535279783332

# 98th percentile
# min_sim = 5.559528536

In [10]:
# %%time

ont, alignment, uuid = ddot_pipeline(
      alpha,
      beta,
      sim,
      sim_names,
      seed,
      go_human,
      name='Fanconi Anemia',
      expand_kwargs={'expand_size':500, 'min_sim':min_sim, 'figure':False},
      align_kwargs={'iterations':100, 'threads': 101},
      verbose=True
)

min_sim: 5.80805352798
Expanded gene set: 69
temp output: /tmp/tmpo6T5HE
temp graph: /tmp/tmppRATvK
temp output log: /tmp/tmp8oLzJP
/tmp/tmppRATvK	0.1	0.5	-10000000 2017-08-08 23:05:39.785566
CLIXO command: /cellar/users/mikeyu/DeepTranslate/ddot/ddot/mhk7-clixo_0.3-cec3674/clixo /tmp/tmppRATvK 0.1 0.5 | awk '{if ( $1 ~ /^#/ ) {print "\#", strftime("%Y-%m-%d %H:%M:%S"), $0 ; fflush() } else {print $0}}' | tee /tmp/tmp8oLzJP
Extracting by grep -v # 2017-08-08 23:05:39.951674
Elapsed time (sec): 0.165986061096 2017-08-08 23:05:39.974577
Ontology: 69 genes, 15 terms, 78 gene-term relations, 14 term-term relations
collapse command: /cellar/users/mikeyu/alignOntology/collapseRedundantNodes /tmp/tmpqgQoFK
collapse command: /cellar/users/mikeyu/alignOntology/collapseRedundantNodes /tmp/tmp7TjwRf
Alignment command: /cellar/users/mikeyu/alignOntology/calculateFDRs /tmp/tmprozzBg /tmp/tmpFQNT_b 0.05 criss_cross /tmp/tmpbFz9df 100 101 gene
Alignment: 5 matches


In [25]:
idx = pd.MultiIndex.from_tuples([('a','b'), ('a','c'), ('c','d')], names=['1', '2'])
df = pd.DataFrame({'v' : np.arange(3)}, index=idx)

In [33]:
for x, y in list(df.iteritems())[0][1].iteritems():
    print x, y

('a', 'b') 0
('a', 'c') 1
('c', 'd') 2
