# Network construction

Network structure formulation in term of binary interaction matrix

In [9]:
import os
import sys
sys.path.insert(0, "./scripts")
import numpy as np
import pandas as pd
from itertools import combinations, product
import joblib
import functions as f

Actual parametrisation

In [10]:
cfg = f.get_actual_parametrization("./config.json")

RANDOM_SEED: 19
TEST_NETWORK_SIZE: 100
TEST_NETWORK_LINK_PROB: 0.3
PARALLEL_THREADS_NUMBER: 10
MOTIF_TO_SEARCH_FOR: feedforward
SELFLOOPS_INCLUDED: 1
NETWORK_TO_SEARCH_IN: yeast


## Random network for testing

In [5]:
TEST_NETWORK_PATH = "./networks/test"

In [6]:
np.random.seed(cfg["RANDOM_SEED"])
network_size = cfg["TEST_NETWORK_SIZE"]
link_probability = cfg["TEST_NETWORK_LINK_PROB"]
interaction_matrix = np.random.binomial(
    1, link_probability, size=(network_size, network_size)
)

In [8]:
if not os.path.exists(TEST_NETWORK_PATH):
    os.mkdir(TEST_NETWORK_PATH)

joblib.dump(interaction_matrix, os.path.join(TEST_NETWORK_PATH, "interaction_matrix.pkl"));

## Yeast Tnet

In [15]:
YEAST_NETWORK_PATH = "./networks/yeast"

In [16]:
edges = pd.read_csv("./raw_data/tfcomb/tnet.txt", sep="\t")
nodes = sorted(np.unique(np.concatenate((edges.Tf.unique(), edges.Tg.unique()))))
nodes = pd.DataFrame(data=range(len(nodes)), index=nodes, columns=["idx"])
print(f"Total genes number:\t{len(nodes)}")
print(f"Interactions number:\t{len(edges)}")
edges.head()

Total genes number:	4441
Interactions number:	12873


Unnamed: 0,Tf,Tg
0,YAL051W,YAL016W
1,YAL051W,YAL034WA
2,YAL051W,YAL035CA
3,YAL051W,YAL035W
4,YAL051W,YAL036C


In [17]:
edges_ = edges.join(nodes, on="Tf").join(nodes, on="Tg", lsuffix="_tf", rsuffix="_tg")
tf_nodes = edges_["idx_tf"].unique()
print(f"Total TF:\t{len(tf_nodes)}")
tg_nodes = edges_["idx_tg"].unique()
print(f"Total TG:\t{len(tg_nodes)}")
tf_x_tg_nodes = np.array(sorted(set(tf_nodes) & set(tg_nodes)))
print(f"TF and TG:\t{len(tf_x_tg_nodes)}")
tg_only_nodes = np.array(sorted(set(tg_nodes) - set(tf_nodes)))
print(f"TG only:\t{len(tg_only_nodes)}")
tf_only_nodes = np.array(sorted(set(tf_nodes) - set(tg_nodes)))
print(f"TF only:\t{len(tf_only_nodes)}")

meta = tf_nodes, tf_only_nodes, tg_nodes, tg_only_nodes, tf_x_tg_nodes
joblib.dump(meta, os.path.join(YEAST_NETWORK_PATH, "meta.pkl"));

Total TF:	157
Total TG:	4410
TF and TG:	126
TG only:	4284
TF only:	31


In [18]:
def build_Tnet(edges, n):
    Tnet = np.zeros((n, n))
    Tnet[edges[:, 0], edges[:, 1]] = 1
    return Tnet

np_edges = edges_[["idx_tg", "idx_tf"]].values
interaction_matrix = build_Tnet(np_edges, len(nodes))

if not os.path.exists(YEAST_NETWORK_PATH):
    os.mkdir(YEAST_NETWORK_PATH)

joblib.dump(interaction_matrix, os.path.join(YEAST_NETWORK_PATH, "interaction_matrix.pkl"));

Add another one version with self loops

In [19]:
interaction_matrix_sl = f.add_self_loops(interaction_matrix, list(range(interaction_matrix.shape[0])))

joblib.dump(interaction_matrix_sl, os.path.join(YEAST_NETWORK_PATH, "interaction_matrix_sl.pkl"));

## Gene Spyder

### SNR 0.01

In [30]:
GS_NETWORK_PATH = "./networks/gs0.01"

interaction_matrix = pd.read_csv("./raw_data/GS_N800/N800_SNR_0.01.csv", header=None).astype(bool).astype(int).values
print(interaction_matrix.shape)

if not os.path.exists(GS_NETWORK_PATH):
    os.mkdir(GS_NETWORK_PATH)

joblib.dump(interaction_matrix, os.path.join(GS_NETWORK_PATH, "interaction_matrix.pkl"));

(800, 800)


### SNR 0.1

In [32]:
GS_NETWORK_PATH = "./networks/gs0.1"

interaction_matrix = pd.read_csv("./raw_data/GS_N800/N800_SNR_0.1.csv", header=None).astype(bool).astype(int).values
print(interaction_matrix.shape)

if not os.path.exists(GS_NETWORK_PATH):
    os.mkdir(GS_NETWORK_PATH)

joblib.dump(interaction_matrix, os.path.join(GS_NETWORK_PATH, "interaction_matrix.pkl"));

(800, 800)


In [36]:
GS_NETWORK_PATH = "./networks/gs1"

interaction_matrix = pd.read_csv("./raw_data/GS_N800/N800_SNR_1.csv", header=None).astype(bool).astype(int).values
print(interaction_matrix.shape)

if not os.path.exists(GS_NETWORK_PATH):
    os.mkdir(GS_NETWORK_PATH)

joblib.dump(interaction_matrix, os.path.join(GS_NETWORK_PATH, "interaction_matrix.pkl"));

(800, 800)


## E. coli network

TBA