# Motif search pipeline

In [2]:
import os
import sys
import json
import numpy as np
sys.path.insert(0, "./scripts")
from itertools import combinations, product
import joblib
from tqdm import tqdm
import functions as f

Actual parametrisation

In [3]:
cfg = f.get_actual_parametrization("./config.json")

RANDOM_SEED: 19
TEST_NETWORK_SIZE: 100
TEST_NETWORK_LINK_PROB: 0.3
PARALLEL_THREADS_NUMBER: 10
MOTIF_TO_SEARCH_FOR: fanout
SELFLOOPS_INCLUDED: 0
NETWORK_TO_SEARCH_IN: yeast
MATRIX_NAME: interaction_matrix


# 1. Random network for testing

In [3]:
cfg = f.update_cfg("./config.json", "NETWORK_TO_SEARCH_IN", "test", verbose=False)
cfg = f.update_cfg("./config.json", "MOTIF_TO_SEARCH_FOR", "feedforward", verbose=False)
cfg = f.update_cfg("./config.json", "SELFLOOPS_INCLUDED", 1)

RANDOM_SEED: 19
TEST_NETWORK_SIZE: 100
TEST_NETWORK_LINK_PROB: 0.3
PARALLEL_THREADS_NUMBER: 10
MOTIF_TO_SEARCH_FOR: feedforward
SELFLOOPS_INCLUDED: 1
NETWORK_TO_SEARCH_IN: test


In [5]:
# all possible gene triads
combs = list(combinations(range(interacion_matrix.shape[0]), 3))
print(f"Number of genes triads: {len(combs)}")

Number of genes triads: 161700


Search space splitting for parallel processing

In [6]:
f.split_search_space(cfg, combs)

Search space have been splitted into 10


Launching search pipeline in parallel

In [7]:
f.make_parallel_search(cfg)

400 feedforward with selfloops motifs have been found
Total time spent: 0:00:03.393634


# 2. Yeast Tnet

In [4]:
cfg = f.update_cfg("./config.json", "NETWORK_TO_SEARCH_IN", "yeast")

RANDOM_SEED: 19
TEST_NETWORK_SIZE: 100
TEST_NETWORK_LINK_PROB: 0.3
PARALLEL_THREADS_NUMBER: 10
MOTIF_TO_SEARCH_FOR: fanout
SELFLOOPS_INCLUDED: 0
NETWORK_TO_SEARCH_IN: yeast
MATRIX_NAME: interaction_matrix


In [5]:
meta = joblib.load(os.path.join("./networks/yeast/meta.pkl"));
tf_nodes, tf_only_nodes, tg_nodes, tg_only_nodes, tf_x_tg_nodes = meta

Motif specific search space design

1. Feed-forward

In [6]:
cfg = f.update_cfg("./config.json", "MOTIF_TO_SEARCH_FOR", "feedforward", verbose=False)
cfg = f.update_cfg("./config.json", "MATRIX_NAME", "interaction_matrix", verbose=False)
cfg = f.update_cfg("./config.json", "SELFLOOPS_INCLUDED", 0)

RANDOM_SEED: 19
TEST_NETWORK_SIZE: 100
TEST_NETWORK_LINK_PROB: 0.3
PARALLEL_THREADS_NUMBER: 10
MOTIF_TO_SEARCH_FOR: feedforward
SELFLOOPS_INCLUDED: 0
NETWORK_TO_SEARCH_IN: yeast
MATRIX_NAME: interaction_matrix


In [7]:
f.print_equivalents(cfg)

Equivalent forms for feedforward    (6 total):
[[0 0 0]
 [1 0 0]
 [1 1 0]]

[[0 0 0]
 [1 0 1]
 [1 0 0]]

[[0 1 0]
 [0 0 0]
 [1 1 0]]

[[0 0 1]
 [1 0 1]
 [0 0 0]]

[[0 1 1]
 [0 0 0]
 [0 1 0]]

[[0 1 1]
 [0 0 1]
 [0 0 0]]



In [8]:
%%time
combs = list(combinations(tf_nodes, 3))
combs += [(i, j, k) for (i, j), k in product(list(combinations(tf_nodes, 2)), tg_only_nodes)]
print(f"Number of genes triads (for feed-forward): {len(combs)}")

Number of genes triads (for feed-forward): 53094574
CPU times: user 5.79 s, sys: 1.01 s, total: 6.8 s
Wall time: 6.8 s


In [9]:
f.split_search_space(cfg, combs)

Search space have been splitted into 10


In [10]:
f.make_parallel_search(cfg)

3374 feedforward motifs have been found
Total time spent: 0:05:21.460526


2. Feedworward with selfloops

In [8]:
cfg = f.update_cfg("./config.json", "MOTIF_TO_SEARCH_FOR", "feedforward", verbose=False)
cfg = f.update_cfg("./config.json", "SELFLOOPS_INCLUDED", 1, verbose=False)
cfg = f.update_cfg("./config.json", "MATRIX_NAME", "interaction_matrix_sl")

RANDOM_SEED: 19
TEST_NETWORK_SIZE: 100
TEST_NETWORK_LINK_PROB: 0.3
PARALLEL_THREADS_NUMBER: 10
MOTIF_TO_SEARCH_FOR: feedforward
SELFLOOPS_INCLUDED: 1
NETWORK_TO_SEARCH_IN: yeast
MATRIX_NAME: interaction_matrix_sl


In [9]:
f.print_equivalents(cfg)

Equivalent forms for feedforward with selfloops    (6 total):
[[1 0 0]
 [1 1 0]
 [1 1 1]]

[[1 0 0]
 [1 1 1]
 [1 0 1]]

[[1 1 0]
 [0 1 0]
 [1 1 1]]

[[1 0 1]
 [1 1 1]
 [0 0 1]]

[[1 1 1]
 [0 1 0]
 [0 1 1]]

[[1 1 1]
 [0 1 1]
 [0 0 1]]



In [15]:
f.make_parallel_search(cfg)

3555 feedforward with selfloops motifs have been found
Total time spent: 0:05:36.353171


3. Fanin

In [6]:
cfg = f.update_cfg("./config.json", "MOTIF_TO_SEARCH_FOR", "fanin", verbose=False)
cfg = f.update_cfg("./config.json", "SELFLOOPS_INCLUDED", 0, verbose=False)
cfg = f.update_cfg("./config.json", "MATRIX_NAME", "interaction_matrix")

RANDOM_SEED: 19
TEST_NETWORK_SIZE: 100
TEST_NETWORK_LINK_PROB: 0.3
PARALLEL_THREADS_NUMBER: 10
MOTIF_TO_SEARCH_FOR: fanin
SELFLOOPS_INCLUDED: 0
NETWORK_TO_SEARCH_IN: yeast
MATRIX_NAME: interaction_matrix


In [7]:
f.print_equivalents(cfg)

Equivalent forms for fanin    (3 total):
[[0 1 1]
 [0 0 0]
 [0 0 0]]

[[0 0 0]
 [1 0 1]
 [0 0 0]]

[[0 0 0]
 [0 0 0]
 [1 1 0]]



In [10]:
f.make_parallel_search(cfg)

26042 fanin motifs have been found
Total time spent: 0:05:45.118447


4. Fanin with selfloops

In [11]:
cfg = f.update_cfg("./config.json", "MOTIF_TO_SEARCH_FOR", "fanin", verbose=False)
cfg = f.update_cfg("./config.json", "SELFLOOPS_INCLUDED", 1, verbose=False)
cfg = f.update_cfg("./config.json", "MATRIX_NAME", "interaction_matrix_sl")

RANDOM_SEED: 19
TEST_NETWORK_SIZE: 100
TEST_NETWORK_LINK_PROB: 0.3
PARALLEL_THREADS_NUMBER: 10
MOTIF_TO_SEARCH_FOR: fanin
SELFLOOPS_INCLUDED: 1
NETWORK_TO_SEARCH_IN: yeast
MATRIX_NAME: interaction_matrix_sl


In [12]:
f.print_equivalents(cfg)

Equivalent forms for fanin with selfloops    (3 total):
[[1 1 1]
 [0 1 0]
 [0 0 1]]

[[1 0 0]
 [1 1 1]
 [0 0 1]]

[[1 0 0]
 [0 1 0]
 [1 1 1]]



In [14]:
f.make_parallel_search(cfg)

26042 fanin with selfloops motifs have been found
Total time spent: 0:05:38.975707


5. Cascade

In [15]:
cfg = f.update_cfg("./config.json", "MOTIF_TO_SEARCH_FOR", "cascade", verbose=False)
cfg = f.update_cfg("./config.json", "SELFLOOPS_INCLUDED", 0, verbose=False)
cfg = f.update_cfg("./config.json", "MATRIX_NAME", "interaction_matrix")

RANDOM_SEED: 19
TEST_NETWORK_SIZE: 100
TEST_NETWORK_LINK_PROB: 0.3
PARALLEL_THREADS_NUMBER: 10
MOTIF_TO_SEARCH_FOR: cascade
SELFLOOPS_INCLUDED: 0
NETWORK_TO_SEARCH_IN: yeast
MATRIX_NAME: interaction_matrix


In [16]:
f.print_equivalents(cfg)

Equivalent forms for cascade    (6 total):
[[0 0 0]
 [1 0 0]
 [0 1 0]]

[[0 0 0]
 [0 0 1]
 [1 0 0]]

[[0 1 0]
 [0 0 0]
 [1 0 0]]

[[0 0 1]
 [1 0 0]
 [0 0 0]]

[[0 0 1]
 [0 0 0]
 [0 1 0]]

[[0 1 0]
 [0 0 1]
 [0 0 0]]



In [17]:
f.make_parallel_search(cfg)

37631 cascade motifs have been found
Total time spent: 0:05:50.536849


6. Cascade with selfloops

In [18]:
cfg = f.update_cfg("./config.json", "MOTIF_TO_SEARCH_FOR", "cascade", verbose=False)
cfg = f.update_cfg("./config.json", "SELFLOOPS_INCLUDED", 1, verbose=False)
cfg = f.update_cfg("./config.json", "MATRIX_NAME", "interaction_matrix_sl")

RANDOM_SEED: 19
TEST_NETWORK_SIZE: 100
TEST_NETWORK_LINK_PROB: 0.3
PARALLEL_THREADS_NUMBER: 10
MOTIF_TO_SEARCH_FOR: cascade
SELFLOOPS_INCLUDED: 1
NETWORK_TO_SEARCH_IN: yeast
MATRIX_NAME: interaction_matrix_sl


In [19]:
f.print_equivalents(cfg)

Equivalent forms for cascade with selfloops    (6 total):
[[1 0 0]
 [1 1 0]
 [0 1 1]]

[[1 0 0]
 [0 1 1]
 [1 0 1]]

[[1 1 0]
 [0 1 0]
 [1 0 1]]

[[1 0 1]
 [1 1 0]
 [0 0 1]]

[[1 0 1]
 [0 1 0]
 [0 1 1]]

[[1 1 0]
 [0 1 1]
 [0 0 1]]



In [20]:
f.make_parallel_search(cfg)

37631 cascade with selfloops motifs have been found
Total time spent: 0:05:52.107115



# 3. Gene Spyder

### SNR 0.01

In [22]:
cfg = f.update_cfg("./config.json", "NETWORK_TO_SEARCH_IN", "gs0.01", verbose=False)
cfg = f.update_cfg("./config.json", "MATRIX_NAME", "interaction_matrix")

RANDOM_SEED: 19
TEST_NETWORK_SIZE: 100
TEST_NETWORK_LINK_PROB: 0.3
PARALLEL_THREADS_NUMBER: 10
MOTIF_TO_SEARCH_FOR: cascade
SELFLOOPS_INCLUDED: 1
NETWORK_TO_SEARCH_IN: gs0.01
MATRIX_NAME: interaction_matrix


In [28]:
interaction_matrix = f.get_interacion_matrix(cfg)

In [30]:
%%time
# all possible gene triads
combs = list(combinations(range(interaction_matrix.shape[0]), 3))
print(f"Number of genes triads: {len(combs)}")

Number of genes triads: 85013600
CPU times: user 7.98 s, sys: 2.33 s, total: 10.3 s
Wall time: 10.3 s


Search space splitting for parallel processing

In [31]:
f.split_search_space(cfg, combs)

100%|██████████| 10/10 [11:06<00:00, 66.68s/it]


1. Feedforward with selfloops

In [32]:
cfg = f.update_cfg("./config.json", "MOTIF_TO_SEARCH_FOR", "feedforward", verbose=False)
cfg = f.update_cfg("./config.json", "SELFLOOPS_INCLUDED", 1)

RANDOM_SEED: 19
TEST_NETWORK_SIZE: 100
TEST_NETWORK_LINK_PROB: 0.3
PARALLEL_THREADS_NUMBER: 10
MOTIF_TO_SEARCH_FOR: feedforward
SELFLOOPS_INCLUDED: 1
NETWORK_TO_SEARCH_IN: gs0.01
MATRIX_NAME: interaction_matrix


In [33]:
f.make_parallel_search(cfg)

51 feedforward with selfloops motifs have been found
Total time spent: 0:09:04.512421


2. Fanout with selfloops

In [34]:
cfg = f.update_cfg("./config.json", "MOTIF_TO_SEARCH_FOR", "fanout", verbose=False)
cfg = f.update_cfg("./config.json", "SELFLOOPS_INCLUDED", 1)

RANDOM_SEED: 19
TEST_NETWORK_SIZE: 100
TEST_NETWORK_LINK_PROB: 0.3
PARALLEL_THREADS_NUMBER: 10
MOTIF_TO_SEARCH_FOR: fanout
SELFLOOPS_INCLUDED: 1
NETWORK_TO_SEARCH_IN: gs0.01
MATRIX_NAME: interaction_matrix


In [35]:
f.make_parallel_search(cfg)

4709 fanout with selfloops motifs have been found
Total time spent: 0:09:08.531130


3. Fanin with selfloops

In [36]:
cfg = f.update_cfg("./config.json", "MOTIF_TO_SEARCH_FOR", "fanin", verbose=False)
cfg = f.update_cfg("./config.json", "SELFLOOPS_INCLUDED", 1)

RANDOM_SEED: 19
TEST_NETWORK_SIZE: 100
TEST_NETWORK_LINK_PROB: 0.3
PARALLEL_THREADS_NUMBER: 10
MOTIF_TO_SEARCH_FOR: fanin
SELFLOOPS_INCLUDED: 1
NETWORK_TO_SEARCH_IN: gs0.01
MATRIX_NAME: interaction_matrix


In [37]:
f.make_parallel_search(cfg)

3816 fanin with selfloops motifs have been found
Total time spent: 0:09:30.114427


4. Cascade with selfloops

In [38]:
cfg = f.update_cfg("./config.json", "MOTIF_TO_SEARCH_FOR", "cascade", verbose=False)
cfg = f.update_cfg("./config.json", "SELFLOOPS_INCLUDED", 1)

RANDOM_SEED: 19
TEST_NETWORK_SIZE: 100
TEST_NETWORK_LINK_PROB: 0.3
PARALLEL_THREADS_NUMBER: 10
MOTIF_TO_SEARCH_FOR: cascade
SELFLOOPS_INCLUDED: 1
NETWORK_TO_SEARCH_IN: gs0.01
MATRIX_NAME: interaction_matrix


In [39]:
f.make_parallel_search(cfg)

8741 cascade with selfloops motifs have been found
Total time spent: 0:09:39.402747


### SNR 0.1

In [40]:
cfg = f.update_cfg("./config.json", "NETWORK_TO_SEARCH_IN", "gs0.1")
cfg = f.update_cfg("./config.json", "MATRIX_NAME", "interaction_matrix", verbose=False)

RANDOM_SEED: 19
TEST_NETWORK_SIZE: 100
TEST_NETWORK_LINK_PROB: 0.3
PARALLEL_THREADS_NUMBER: 10
MOTIF_TO_SEARCH_FOR: cascade
SELFLOOPS_INCLUDED: 1
NETWORK_TO_SEARCH_IN: gs0.1
MATRIX_NAME: interaction_matrix


In [65]:
interaction_matrix = f.get_interacion_matrix(cfg)

In [66]:
%%time
# all possible gene triads
combs = list(combinations(range(interaction_matrix.shape[0]), 3))
print(f"Number of genes triads: {len(combs)}")

Number of genes triads: 85013600
CPU times: user 10.1 s, sys: 5.77 s, total: 15.8 s
Wall time: 16 s


Search space splitting for parallel processing

In [42]:
f.split_search_space(cfg, combs)

100%|██████████| 10/10 [11:46<00:00, 70.62s/it]


1. Feedforward with selfloops

In [43]:
cfg = f.update_cfg("./config.json", "MOTIF_TO_SEARCH_FOR", "feedforward", verbose=False)
cfg = f.update_cfg("./config.json", "SELFLOOPS_INCLUDED", 1)

RANDOM_SEED: 19
TEST_NETWORK_SIZE: 100
TEST_NETWORK_LINK_PROB: 0.3
PARALLEL_THREADS_NUMBER: 10
MOTIF_TO_SEARCH_FOR: feedforward
SELFLOOPS_INCLUDED: 1
NETWORK_TO_SEARCH_IN: gs0.1
MATRIX_NAME: interaction_matrix


In [44]:
f.make_parallel_search(cfg)

73 feedforward with selfloops motifs have been found
Total time spent: 0:08:47.377029


2. Fanout with selfloops

In [45]:
cfg = f.update_cfg("./config.json", "MOTIF_TO_SEARCH_FOR", "fanout", verbose=False)
cfg = f.update_cfg("./config.json", "SELFLOOPS_INCLUDED", 1)

RANDOM_SEED: 19
TEST_NETWORK_SIZE: 100
TEST_NETWORK_LINK_PROB: 0.3
PARALLEL_THREADS_NUMBER: 10
MOTIF_TO_SEARCH_FOR: fanout
SELFLOOPS_INCLUDED: 1
NETWORK_TO_SEARCH_IN: gs0.1
MATRIX_NAME: interaction_matrix


In [46]:
f.make_parallel_search(cfg)

5261 fanout with selfloops motifs have been found
Total time spent: 0:09:02.822430


3. Fanin with selfloops

In [47]:
cfg = f.update_cfg("./config.json", "MOTIF_TO_SEARCH_FOR", "fanin", verbose=False)
cfg = f.update_cfg("./config.json", "SELFLOOPS_INCLUDED", 1)

RANDOM_SEED: 19
TEST_NETWORK_SIZE: 100
TEST_NETWORK_LINK_PROB: 0.3
PARALLEL_THREADS_NUMBER: 10
MOTIF_TO_SEARCH_FOR: fanin
SELFLOOPS_INCLUDED: 1
NETWORK_TO_SEARCH_IN: gs0.1
MATRIX_NAME: interaction_matrix


In [48]:
f.make_parallel_search(cfg)

4079 fanin with selfloops motifs have been found
Total time spent: 0:09:18.503020


4. Cascade with selfloops

In [49]:
cfg = f.update_cfg("./config.json", "MOTIF_TO_SEARCH_FOR", "cascade", verbose=False)
cfg = f.update_cfg("./config.json", "SELFLOOPS_INCLUDED", 1)

RANDOM_SEED: 19
TEST_NETWORK_SIZE: 100
TEST_NETWORK_LINK_PROB: 0.3
PARALLEL_THREADS_NUMBER: 10
MOTIF_TO_SEARCH_FOR: cascade
SELFLOOPS_INCLUDED: 1
NETWORK_TO_SEARCH_IN: gs0.1
MATRIX_NAME: interaction_matrix


In [50]:
f.make_parallel_search(cfg)

9459 cascade with selfloops motifs have been found
Total time spent: 0:09:55.123609


### SNR 1

In [51]:
cfg = f.update_cfg("./config.json", "NETWORK_TO_SEARCH_IN", "gs1")
cfg = f.update_cfg("./config.json", "MATRIX_NAME", "interaction_matrix", verbose=False)

RANDOM_SEED: 19
TEST_NETWORK_SIZE: 100
TEST_NETWORK_LINK_PROB: 0.3
PARALLEL_THREADS_NUMBER: 10
MOTIF_TO_SEARCH_FOR: cascade
SELFLOOPS_INCLUDED: 1
NETWORK_TO_SEARCH_IN: gs1
MATRIX_NAME: interaction_matrix


In [62]:
interaction_matrix = f.get_interacion_matrix(cfg)

In [64]:
%%time
# all possible gene triads
combs = list(combinations(range(interaction_matrix.shape[0]), 3))
print(f"Number of genes triads: {len(combs)}")

Number of genes triads: 85013600
CPU times: user 9.76 s, sys: 4.73 s, total: 14.5 s
Wall time: 14.6 s


Search space splitting for parallel processing

In [53]:
f.split_search_space(cfg, combs)

100%|██████████| 10/10 [11:17<00:00, 67.75s/it]


1. Feedforward with selfloops

In [54]:
cfg = f.update_cfg("./config.json", "MOTIF_TO_SEARCH_FOR", "feedforward", verbose=False)
cfg = f.update_cfg("./config.json", "SELFLOOPS_INCLUDED", 1)

RANDOM_SEED: 19
TEST_NETWORK_SIZE: 100
TEST_NETWORK_LINK_PROB: 0.3
PARALLEL_THREADS_NUMBER: 10
MOTIF_TO_SEARCH_FOR: feedforward
SELFLOOPS_INCLUDED: 1
NETWORK_TO_SEARCH_IN: gs1
MATRIX_NAME: interaction_matrix


In [55]:
f.make_parallel_search(cfg)

58 feedforward with selfloops motifs have been found
Total time spent: 0:10:19.129788


2. Fanout with selfloops

In [56]:
cfg = f.update_cfg("./config.json", "MOTIF_TO_SEARCH_FOR", "fanout", verbose=False)
cfg = f.update_cfg("./config.json", "SELFLOOPS_INCLUDED", 1)

RANDOM_SEED: 19
TEST_NETWORK_SIZE: 100
TEST_NETWORK_LINK_PROB: 0.3
PARALLEL_THREADS_NUMBER: 10
MOTIF_TO_SEARCH_FOR: fanout
SELFLOOPS_INCLUDED: 1
NETWORK_TO_SEARCH_IN: gs1
MATRIX_NAME: interaction_matrix


In [57]:
f.make_parallel_search(cfg)

4702 fanout with selfloops motifs have been found
Total time spent: 0:11:43.317575


3. Fanin with selfloops

In [58]:
cfg = f.update_cfg("./config.json", "MOTIF_TO_SEARCH_FOR", "fanin", verbose=False)
cfg = f.update_cfg("./config.json", "SELFLOOPS_INCLUDED", 1)

RANDOM_SEED: 19
TEST_NETWORK_SIZE: 100
TEST_NETWORK_LINK_PROB: 0.3
PARALLEL_THREADS_NUMBER: 10
MOTIF_TO_SEARCH_FOR: fanin
SELFLOOPS_INCLUDED: 1
NETWORK_TO_SEARCH_IN: gs1
MATRIX_NAME: interaction_matrix


In [59]:
f.make_parallel_search(cfg)

3666 fanin with selfloops motifs have been found
Total time spent: 0:11:27.493955


4. Cascade with selfloops

In [60]:
cfg = f.update_cfg("./config.json", "MOTIF_TO_SEARCH_FOR", "cascade", verbose=False)
cfg = f.update_cfg("./config.json", "SELFLOOPS_INCLUDED", 1)

RANDOM_SEED: 19
TEST_NETWORK_SIZE: 100
TEST_NETWORK_LINK_PROB: 0.3
PARALLEL_THREADS_NUMBER: 10
MOTIF_TO_SEARCH_FOR: cascade
SELFLOOPS_INCLUDED: 1
NETWORK_TO_SEARCH_IN: gs1
MATRIX_NAME: interaction_matrix


In [61]:
f.make_parallel_search(cfg)

8626 cascade with selfloops motifs have been found
Total time spent: 0:12:16.383227


In [None]:
!pip freeze | \
grep\
-e 'numpy=='\
-e 'pandas=='\
-e 'numba=='\
-e 'joblib=='\
-e 'json=='\
-e 'tqdm=='\
> requirements.txt