# Motif search pipeline

In [1]:
import os
import sys
import json
import numpy as np
import networkx as nx
sys.path.insert(0, "./scripts")
from itertools import combinations, product
import joblib
from tqdm import tqdm
import functions as f

Actual parametrisation

In [2]:
cfg = f.get_actual_parametrization("./config.json")

RANDOM_SEED: 19
TEST_NETWORK_SIZE: 500
TEST_NETWORK_LINK_PROB: 0.1
N_CORES_TO_USE: -1
NETWORK_TO_SEARCH_IN: gs0.1


### Motifs library

There are conventional denotations for all possible triads as presented below

<img src="./pics/triads.png" width=2000 height=20/>

We are much more interested in counting of the connected ones (i.e. excluding 003, 012, 102), 

especially, in __021C (cascade)__, __021D (fan-out)__, __021U (fan-in)__, __030T (feed-forward loop)__.

The presence of any self-loops is ignored in the current version of the algorithm.

# 1. Random network for testing

In [3]:
cfg = f.update_cfg("./config.json", "NETWORK_TO_SEARCH_IN", "test")

RANDOM_SEED: 19
TEST_NETWORK_SIZE: 500
TEST_NETWORK_LINK_PROB: 0.1
N_CORES_TO_USE: -1
NETWORK_TO_SEARCH_IN: test


In [4]:
interaction_matrix = f.get_interacion_matrix(cfg)

In [5]:
%%time
motifs, counter = f.motif_search(cfg, interaction_matrix, batch_size=10000)

CPU times: user 11.6 s, sys: 314 ms, total: 11.9 s
Wall time: 24.4 s


In [6]:
counter

{'021C': 812563, '021D': 407130, '021U': 405182, '030C': 30253, '030T': 90008}

# 2. Yeast Tnet

In [7]:
cfg = f.update_cfg("./config.json", "NETWORK_TO_SEARCH_IN", "yeast")

RANDOM_SEED: 19
TEST_NETWORK_SIZE: 500
TEST_NETWORK_LINK_PROB: 0.1
N_CORES_TO_USE: -1
NETWORK_TO_SEARCH_IN: yeast


In [8]:
interaction_matrix = f.get_interacion_matrix(cfg)

In [9]:
%%time
motifs, counter = f.motif_search(cfg, interaction_matrix, batch_size=20000)

CPU times: user 7.24 s, sys: 252 ms, total: 7.49 s
Wall time: 12.8 s


In [10]:
counter

{'021C': 37631, '021D': 1059856, '021U': 26042, '030C': 8, '030T': 3370}

# 3. E. coli

In [11]:
cfg = f.update_cfg("./config.json", "NETWORK_TO_SEARCH_IN", "ecoli")

RANDOM_SEED: 19
TEST_NETWORK_SIZE: 500
TEST_NETWORK_LINK_PROB: 0.1
N_CORES_TO_USE: -1
NETWORK_TO_SEARCH_IN: ecoli


In [12]:
interaction_matrix = f.get_interacion_matrix(cfg)

In [13]:
%%time
motifs, counter = f.motif_search(cfg, interaction_matrix, batch_size=20000)

CPU times: user 2.01 s, sys: 116 ms, total: 2.12 s
Wall time: 4.09 s


In [14]:
counter

{'021C': 3938, '021D': 329287, '021U': 4365, '030C': 0, '030T': 1392}


# 4. Gene Spyder

### SNR 0.01

In [15]:
cfg = f.update_cfg("./config.json", "NETWORK_TO_SEARCH_IN", "gs0.01")

RANDOM_SEED: 19
TEST_NETWORK_SIZE: 500
TEST_NETWORK_LINK_PROB: 0.1
N_CORES_TO_USE: -1
NETWORK_TO_SEARCH_IN: gs0.01


In [16]:
interaction_matrix = f.get_interacion_matrix(cfg)

In [17]:
%%time
motifs, counter = f.motif_search(cfg, interaction_matrix, batch_size=20000)

CPU times: user 131 ms, sys: 52.9 ms, total: 183 ms
Wall time: 1.34 s


In [18]:
counter

{'021C': 8741, '021D': 4709, '021U': 3816, '030C': 14, '030T': 51}

### SNR 0.1

In [19]:
cfg = f.update_cfg("./config.json", "NETWORK_TO_SEARCH_IN", "gs0.1")

RANDOM_SEED: 19
TEST_NETWORK_SIZE: 500
TEST_NETWORK_LINK_PROB: 0.1
N_CORES_TO_USE: -1
NETWORK_TO_SEARCH_IN: gs0.1


In [20]:
interaction_matrix = f.get_interacion_matrix(cfg)

In [21]:
%%time
motifs, counter = f.motif_search(cfg, interaction_matrix, batch_size=20000)

CPU times: user 126 ms, sys: 45.9 ms, total: 172 ms
Wall time: 1.33 s


In [22]:
counter

{'021C': 9459, '021D': 5261, '021U': 4079, '030C': 17, '030T': 73}

### SNR 1

In [23]:
cfg = f.update_cfg("./config.json", "NETWORK_TO_SEARCH_IN", "gs1")

RANDOM_SEED: 19
TEST_NETWORK_SIZE: 500
TEST_NETWORK_LINK_PROB: 0.1
N_CORES_TO_USE: -1
NETWORK_TO_SEARCH_IN: gs1


In [24]:
interaction_matrix = f.get_interacion_matrix(cfg)

In [25]:
%%time
motifs, counter = f.motif_search(cfg, interaction_matrix, batch_size=20000)

CPU times: user 114 ms, sys: 43.5 ms, total: 158 ms
Wall time: 1.32 s


In [26]:
counter

{'021C': 8626, '021D': 4702, '021U': 3666, '030C': 17, '030T': 58}

In [27]:
!pip freeze | \
grep\
-e 'numpy=='\
-e 'pandas=='\
-e 'numba=='\
-e 'joblib=='\
-e 'json=='\
-e 'tqdm=='\
> requirements.txt