In [55]:
import deregnet.core
import deregnet.graphs

Before finding subgraphs, we have to decide on an underlying graph to find subgraphs in. Here, we search for subgraphs in the in-build KEGG graph (for humans). See 'graphs.ipynb' for further detail.

In [56]:
kegg = deregnet.graphs.KEGG()

In [57]:
len(kegg.vs)

5412

In [58]:
len(kegg.es)

56674

In [59]:
node = list(kegg.vs)[0]
print(node.attributes())

{'entrez': '55904', 'name': '55904', 'symbol': 'KMT2E', 'uniprot_ids': ['Q8IZD2'], 'ensembl': 'ENSG00000005483'}


In [60]:
edge = list(kegg.es)[0]
print(edge.attributes())

{'interactions': ['compound']}


Next, for finding subgraphs we'll have to use the 'SubgraphFinder' from deregnet.core:

In [10]:
help(deregnet.core.SubgraphFinder)

Help on class SubgraphFinder in module deregnet.core:

class SubgraphFinder(builtins.object)
 |  This is the main class in deregnet. You can use it to run the subgraph detection
 |  algorithms which are the essence of DeRegNet.
 |  
 |  Methods defined here:
 |  
 |  __del__(self)
 |  
 |  __init__(self, graph, id_attr='name', deregnet_binpath=None, tmp_file_path=None, delete_temporary_files=True)
 |      Args:
 |      
 |          graph (ig.Graph): The regulatory digraph in which you want to find the subgraphs.
 |                            See also the deregnet.graph module.
 |          id_attr (str): name of the attribute which should be taken as id attribute (and
 |                         hence be used to match nodes to scores, etc.).
 |                         Default: 'name'
 |          deregnet_binpath (str): If you want to use this class with other binaries than the
 |                                  default ones you can set the path here. Usually you should 
 |              

Initialize the SubgraphFinder with the big kegg graph:

In [29]:
finder = deregnet.core.SubgraphFinder(kegg)

In [13]:
help(finder.run_average_deregnet)

Help on method run_average_deregnet in module deregnet.core:

run_average_deregnet(scores={}, default_score=None, receptors=None, terminals=None, excluded_nodes=None, included_nodes=None, flip_orientation=False, min_size=15, max_size=50, num_suboptimal=0, max_overlap=0, abs_values=False, model_sense='max', algorithm='GeneralizedCharnesCooper', time_limit=None, gap_cut=None, debug=False) method of deregnet.core.SubgraphFinder instance
    This method runs the average score version of DeRegNet.
    
    Args:
    
        min_size (int): Minimal size of the subgraph(s)
                        Default: 15
        max_size (int): Maximal size of the subgraph(s)
                        Default: 50
        receptors (list): List of node ids which define the receptors
                          Default: []
        terminals (list): List of node ids which define the terminals
                          Default: []
        algorithm (str): Algorithm with which to solve the resulting
             

The other minimum piece of information we need are suitable 'scores' to score the nodes in the graph with. DeRegNet will find optimal subgraph with respect to these scores:

In [30]:
import tcga.analysis.wxs_somatic_masked.wxs_somatic_masked as mutations # tcga mutations
from biomap import BioMap # identifier mapping

Get somatic mutation data for LIHC patients:

In [18]:
data = mutations.TcgaWxsMaskedMutationData('lihc')

/opt/anaconda/3.5/lib/python3.5/site-packages/tcga/lihc/wxs_somatic_masked


The mutation data:

In [20]:
X, genes, patients = data.binary_mutation_matrix_consensus()

In [21]:
X

<14112x361 sparse matrix of type '<class 'numpy.int8'>'
	with 37422 stored elements in COOrdinate format>

In [22]:
genes[:5]

['ENSG00000130383',
 'ENSG00000184014',
 'ENSG00000154493',
 'ENSG00000187918',
 'ENSG00000145022']

In [35]:
genes_entrez = BioMap.get_mapper('hgnc').map(genes, 'ensembl', 'entrez')

In [37]:
genes_entrez[:5]

['2527', '23258', '118611', '390064', '6988']

In [23]:
patients[:5]

['A25T', 'A73A', 'A5UD', 'AB4B', 'A4NB']

In [25]:
X = X.todense()

Now we can define a mutation score for a patient (here patient A25T):

In [39]:
A25T_mutation_score = {gene: X[genes_entrez.index(gene),patients.index('A25T')] for gene in genes_entrez}

In [40]:
A25T_mutation_score

{'404785': 0,
 '9354': 0,
 '10514': 0,
 '2768': 0,
 '6919': 0,
 '285848': 0,
 '5469': 0,
 '54777': 0,
 '89780': 0,
 '57829': 0,
 '8473': 0,
 '165918': 0,
 '7274': 0,
 '80206': 0,
 '51447': 0,
 '2823': 0,
 '4771': 0,
 '6932': 0,
 '4025': 0,
 '6580': 0,
 '3671': 0,
 '6558': 0,
 '64174': 0,
 '80177': 0,
 '1798': 0,
 '338755': 0,
 '221935': 0,
 '134492': 0,
 '51324': 0,
 '6871': 0,
 '26094': 0,
 '55629': 0,
 '26253': 0,
 '9407': 0,
 '56922': 0,
 '7010': 0,
 '390084': 0,
 '27067': 0,
 '85479': 0,
 '23418': 0,
 '7200': 0,
 '57562': 0,
 '3502': 0,
 '51008': 0,
 '54436': 0,
 '23102': 0,
 '3964': 0,
 '57620': 0,
 '6514': 0,
 '3146': 0,
 '57508': 0,
 '480': 0,
 '90007': 0,
 '84962': 0,
 '11064': 0,
 '79623': 0,
 '6545': 0,
 '1153': 0,
 '6137': 0,
 '140689': 0,
 '6093': 0,
 '79295': 0,
 '51050': 0,
 '150350': 0,
 '120586': 0,
 '51347': 0,
 '222901': 0,
 '1263': 0,
 '6360': 0,
 '121512': 0,
 '83538': 0,
 '3593': 0,
 '4312': 0,
 '84366': 0,
 '118442': 0,
 '2516': 0,
 '7570': 0,
 '80714': 0,
 '90025

In [43]:
A25Tsubgraphs = finder.run_average_deregnet(scores=A25T_mutation_score, time_limit=1200) 

/home/sebastian/.deregnet/tmp/2017-11-06-07-09-15/run_2017-11-06-07-31-10
['scores.tsv']
/opt/anaconda/3.5/lib/python3.5/site-packages/deregnet/../../bin/avgdrgnt --time-limit 1200 --model-sense max --max-size 50 --suboptimal 0 --algorithm gcc --score /home/sebastian/.deregnet/tmp/2017-11-06-07-09-15/run_2017-11-06-07-31-10/scores.tsv --min-size 15 --output-dir /home/sebastian/.deregnet/tmp/2017-11-06-07-09-15/run_2017-11-06-07-31-10 --graph /home/sebastian/.deregnet/tmp/2017-11-06-07-09-15/graph.lgf --max-overlap-percentage 0


In [47]:
for v in A25Tsubgraphs.optimal.vs:
    print(v.attributes())

{'deregnet_score': 0, 'name': '27445', 'symbol': 'PCLO', 'uniprot_ids': ['Q9Y6V0'], 'entrez': '27445', 'ensembl': 'ENSG00000186472'}
{'deregnet_score': 0, 'name': '48', 'symbol': 'ACO1', 'uniprot_ids': ['P21399'], 'entrez': '48', 'ensembl': 'ENSG00000122729'}
{'deregnet_score': 0, 'name': '107', 'symbol': 'ADCY1', 'uniprot_ids': ['Q08828'], 'entrez': '107', 'ensembl': 'ENSG00000164742'}
{'deregnet_score': 1, 'name': '22999', 'symbol': 'RIMS1', 'uniprot_ids': ['Q86UR5'], 'entrez': '22999', 'ensembl': 'ENSG00000079841'}
{'deregnet_score': 0, 'name': '5519', 'symbol': 'PPP2R1B', 'uniprot_ids': ['P30154'], 'entrez': '5519', 'ensembl': 'ENSG00000137713'}
{'deregnet_score': 1, 'name': '25865', 'symbol': 'PRKD2', 'uniprot_ids': ['Q9BZL6'], 'entrez': '25865', 'ensembl': 'ENSG00000105287'}
{'deregnet_score': 0, 'name': '11069', 'symbol': 'RAPGEF4', 'uniprot_ids': ['Q8WZA2'], 'entrez': '11069', 'ensembl': 'ENSG00000091428'}
{'deregnet_score': 0, 'name': '57818', 'symbol': 'G6PC2', 'uniprot_ids':

In [48]:
A25Tsubgraphs.to_graphml()

In [49]:
ls

find_subgraphs.ipynb  graphs.ipynb  optimal.graphml
gmon.out              grbfrc.log    visualization.ipynb


In [50]:
A25Tsubgraphs.to_graphml('/home/sebastian/prjcts/SubgraphVisualization/data/tutorial/')

In [51]:
A73A_mutation_score = {gene: X[genes_entrez.index(gene),patients.index('A73A')] for gene in genes_entrez}

In [52]:
A73Asubgraphs = finder.run_average_deregnet(scores=A73A_mutation_score, time_limit=1200) 

/home/sebastian/.deregnet/tmp/2017-11-06-07-09-15/run_2017-11-06-08-00-51
['scores.tsv']
/opt/anaconda/3.5/lib/python3.5/site-packages/deregnet/../../bin/avgdrgnt --time-limit 1200 --model-sense max --max-size 50 --suboptimal 0 --algorithm gcc --score /home/sebastian/.deregnet/tmp/2017-11-06-07-09-15/run_2017-11-06-08-00-51/scores.tsv --min-size 15 --output-dir /home/sebastian/.deregnet/tmp/2017-11-06-07-09-15/run_2017-11-06-08-00-51 --graph /home/sebastian/.deregnet/tmp/2017-11-06-07-09-15/graph.lgf --max-overlap-percentage 0


In [54]:
A73Asubgraphs.to_graphml('/home/sebastian/prjcts/SubgraphVisualization/data/tutorial')

In [61]:
help(kegg.expand_nodes)

Help on method expand_nodes in module deregnet.graphs:

expand_nodes(node_attr, keep) method of deregnet.graphs.KEGG instance

