In [1]:
from deregnet.core import SubgraphFinder

In [2]:
help(SubgraphFinder)

Help on class SubgraphFinder in module deregnet.core:

class SubgraphFinder(builtins.object)
 |  SubgraphFinder(graph, id_attr='name', deregnet_binpath=None, tmp_file_path=None, delete_temporary_files=True, log_file=None)
 |  
 |  This is the main class in deregnet. You can use it to run the subgraph detection
 |  algorithms which are the essence of DeRegNet.
 |  
 |  Methods defined here:
 |  
 |  __del__(self)
 |  
 |  __init__(self, graph, id_attr='name', deregnet_binpath=None, tmp_file_path=None, delete_temporary_files=True, log_file=None)
 |      Args:
 |      
 |          graph (ig.Graph): The regulatory digraph in which you want to find the subgraphs.
 |                            See also the deregnet.graph module.
 |          id_attr (str): name of the attribute which should be taken as id attribute (and
 |                         hence be used to match nodes to scores, etc.).
 |                         Default: 'name'
 |          deregnet_binpath (str): If you want to use thi

## Define the regulatory network

In [3]:
import igraph as ig

In [4]:
kegg_graph = ig.Graph.Read_GraphML('kegg_hsa.graphml')

In [5]:
len(kegg_graph.vs)

5512

In [6]:
len(kegg_graph.es)

71363

In [7]:
kegg_graph.vs[0]

igraph.Vertex(<igraph.Graph object at 0x7fdf98107a98>, 0, {'name': '4799', 'entrez': '4799', 'ensembl': 'ENSG00000086102', 'symbol': 'NFX1', 'id': 'n0'})

## Define the node scores

In [8]:
import pandas as pd

In [9]:
scores = pd.read_csv('score.csv')

In [10]:
scores.head()

Unnamed: 0,id,score
0,ENSG00000197465,0.0
1,ENSG00000137955,0.0
2,ENSG00000135341,0.0
3,ENSG00000187997,0.0
4,ENSG00000162736,0.0


In [14]:
scores.set_index('id', inplace=True)

In [17]:
scores = scores.to_dict(orient='dict')['score']

In [18]:
len(scores)

14112

## Find a deregulated subgraph

In [19]:
subgraph_finder = SubgraphFinder(kegg_graph)

In [20]:
subgraphs =  subgraph_finder.run_average_deregnet(scores)

In [37]:
cat deregnet.log

Academic license - for non-commercial use only


Calculating Umax ...
 
Gurobi Optimizer version 9.0.2 build v9.0.2rc0 (linux64)
Optimize a model with 11027 rows, 11024 columns and 96462 nonzeros
Model fingerprint: 0xdc2ea599
Variable types: 0 continuous, 11024 integer (11024 binary)
Coefficient statistics:
  Matrix range     [1e+00, 5e+00]
  Objective range  [1e+00, 1e+00]
  Bounds range     [1e+00, 1e+00]
  RHS range        [1e+00, 5e+01]
Found heuristic solution: objective 15.0000000
Presolve removed 2193 rows and 1095 columns
Presolve time: 0.26s
Presolved: 8834 rows, 9929 columns, 91770 nonzeros
Variable types: 0 continuous, 9929 integer (9926 binary)

Root relaxation: cutoff, 2 iterations, 0.01 seconds

    Nodes    |    Current Node    |     Objective Bounds      |     Work
 Expl Unexpl |  Obj  Depth IntInf | Incumbent    BestBd   Gap | It/Node Time

     0     0     cutoff    0        15.00000   15.00000  0.00%     -    0s

Explored 0 nodes (2 simplex iterations) in 0.30 second

In [22]:
help(subgraphs)

Help on SubgraphFinderResult in module deregnet.core object:

class SubgraphFinderResult(builtins.object)
 |  SubgraphFinderResult(optimal, suboptimal, mode)
 |  
 |  Instances of this class will be returned by calls to SubgraphFinder's
 |  subgraph detection algorithm run methods.
 |  
 |  Methods defined here:
 |  
 |  __init__(self, optimal, suboptimal, mode)
 |      Initialize self.  See help(type(self)) for accurate signature.
 |  
 |  to_graphml(self, path='.', compress=False)
 |  
 |  to_graphmlz(self, i, filename)
 |  
 |  ----------------------------------------------------------------------
 |  Data descriptors defined here:
 |  
 |  __dict__
 |      dictionary for instance variables (if defined)
 |  
 |  __weakref__
 |      list of weak references to the object (if defined)
 |  
 |  abs_scores
 |  
 |  avg_scores
 |  
 |  mode
 |  
 |  num_nodes
 |  
 |  num_nodes_optimal
 |  
 |  num_nodes_suboptimal
 |  
 |  optimal_abs_score
 |  
 |  optimal_avg_score
 |  
 |  optimal_sco

In [31]:
optimal_subgraph = subgraphs.subgraphs[0]

In [33]:
len(optimal_subgraph.vs)

50

In [32]:
for v in optimal_subgraph.vs:
    print(v['ensembl'])

ENSG00000065675
ENSG00000097007
ENSG00000160999
ENSG00000148053
ENSG00000124181
ENSG00000143933
ENSG00000099942
ENSG00000110395
ENSG00000169750
ENSG00000160691
ENSG00000139687
ENSG00000154380
ENSG00000128340
ENSG00000007264
ENSG00000129946
ENSG00000119630
ENSG00000146648
ENSG00000197943
ENSG00000113721
ENSG00000148082
ENSG00000126561
ENSG00000115415
ENSG00000111252
ENSG00000129007
ENSG00000163932
ENSG00000136997
ENSG00000185634
ENSG00000134853
ENSG00000173511
ENSG00000177885
ENSG00000167193
ENSG00000169885
ENSG00000109339
ENSG00000169855
ENSG00000185008
ENSG00000104998
ENSG00000178363
ENSG00000198668
ENSG00000114423
ENSG00000112715
ENSG00000169047
ENSG00000173757
ENSG00000178188
ENSG00000178372
ENSG00000150630
ENSG00000160014
ENSG00000136238
ENSG00000172575
ENSG00000120156
ENSG00000108342


In [34]:
subgraphs.to_graphml()

In [35]:
cat optimal.graphml

<?xml version="1.0" encoding="UTF-8"?>
<graphml xmlns="http://graphml.graphdrawing.org/xmlns"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://graphml.graphdrawing.org/xmlns
         http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd">
<!-- Created by igraph -->
  <key id="v_name" for="node" attr.name="name" attr.type="string"/>
  <key id="v_entrez" for="node" attr.name="entrez" attr.type="string"/>
  <key id="v_ensembl" for="node" attr.name="ensembl" attr.type="string"/>
  <key id="v_symbol" for="node" attr.name="symbol" attr.type="string"/>
  <key id="v_id" for="node" attr.name="id" attr.type="string"/>
  <key id="v_deregnet_score" for="node" attr.name="deregnet_score" attr.type="boolean"/>
  <key id="e_interaction" for="edge" attr.name="interaction" attr.type="string"/>
  <graph id="G" edgedefault="directed">
    <node id="n0">
      <data key="v_name">5588</data>
      <data key="v_entrez">5588</data>
      <data key="v_ensembl">