In [1]:
import sys

In [2]:
BASE_DIR = ".."
DATA_DIR = f"{BASE_DIR}/data"
NBDATA_DIR = f"{BASE_DIR}/nbdata"
FIG_DIR = f"{BASE_DIR}/figs"

In [3]:
sys.path.append(BASE_DIR)

In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import networkx as nx

In [5]:
import anndata as ad
import scanpy as sc

## Load GENIE Networks

In [79]:
rna_ctr_network = pd.read_csv(f"{NBDATA_DIR}/iPSC_rna_ctr_genie_weights.csv")

In [80]:
rna_als_network = pd.read_csv(f"{NBDATA_DIR}/iPSC_rna_als_genie_weights.csv")

In [161]:
rna_als_network

Unnamed: 0,ENSG00000110514,ENSG00000268358,ENSG00000086015,ENSG00000272373,ENSG00000254148,ENSG00000211767,ENSG00000211766,ENSG00000211765,ENSG00000169740,ENSG00000215869,...,ENSG00000156931,ENSG00000143457,ENSG00000152558,ENSG00000143452,ENSG00000143450,ENSG00000268351,ENSG00000182872,ENSG00000182873,ENSG00000182870,ENSG00000182871
ENSG00000169740,0.000032,0.000006,0.000295,0.000028,0.000183,0.000741,0.000065,5.185249e-07,0.000000,0.000446,...,5.205145e-05,4.544518e-04,0.000010,0.000728,0.000205,0.000299,0.000294,0.000064,1.789730e-04,0.000097
ENSG00000168298,0.000013,0.000031,0.000020,0.000045,0.000027,0.000575,0.000000,3.778312e-05,0.000328,0.000007,...,5.511694e-07,3.150463e-04,0.000210,0.002791,0.000026,0.000011,0.000000,0.000005,1.679530e-04,0.000025
ENSG00000162599,0.000006,0.000215,0.000542,0.000037,0.000967,0.000982,0.000793,0.000000e+00,0.001201,0.000472,...,7.256045e-05,5.707031e-05,0.000022,0.000727,0.000621,0.000000,0.000012,0.000158,1.104833e-07,0.000069
ENSG00000029363,0.000012,0.000109,0.000002,0.001055,0.000050,0.002819,0.000339,0.000000e+00,0.002079,0.000002,...,4.083337e-05,1.041173e-04,0.000130,0.000727,0.000304,0.000009,0.005481,0.000166,3.757347e-06,0.000000
ENSG00000162337,0.000235,0.000130,0.000179,0.000716,0.000456,0.000000,0.000000,2.206875e-04,0.000002,0.000002,...,2.207821e-03,2.790294e-04,0.000604,0.000000,0.000000,0.000270,0.000000,0.000005,1.941027e-03,0.000900
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ENSG00000143458,0.000002,0.000119,0.000853,0.000017,0.000746,0.000105,0.000046,0.000000e+00,0.000071,0.000000,...,5.489136e-05,3.637633e-08,0.000086,0.000244,0.000239,0.000078,0.000407,0.000575,7.758150e-05,0.000097
ENSG00000100219,0.002651,0.000127,0.000138,0.000149,0.000034,0.000000,0.000000,9.425163e-04,0.001020,0.000000,...,2.735787e-03,1.129219e-03,0.000088,0.000058,0.000302,0.001148,0.000041,0.000280,8.725854e-04,0.000064
ENSG00000197037,0.000553,0.000029,0.000375,0.000741,0.000733,0.002310,0.000000,0.000000e+00,0.000638,0.000000,...,2.060168e-04,6.438554e-04,0.000908,0.000005,0.000205,0.000115,0.000042,0.000009,1.505012e-04,0.000000
ENSG00000151623,0.001090,0.000048,0.000019,0.000000,0.000033,0.000149,0.000402,8.791408e-05,0.000000,0.000851,...,1.042824e-04,6.151708e-06,0.000309,0.000000,0.000020,0.000017,0.000141,0.000008,1.943372e-03,0.000028


In [125]:
edge_mag_thresh = 0.999

In [148]:
from sklearn.preprocessing import StandardScaler

def scale_and_threshold_network(df, quantile=0.95):
    scaled = StandardScaler().fit_transform(df.values)
    thresh = np.quantile(scaled, quantile)
    scaled[np.abs(scaled) < thresh] = 0
    scaled[np.abs(scaled) >= thresh] = 1
    newdf = pd.DataFrame(scaled,columns=df.columns,index=df.index)
    return newdf

In [149]:
scaled_rna_ctr_values = scale_and_threshold_network(rna_ctr_network, quantile=edge_mag_thresh)
scaled_rna_als_values = scale_and_threshold_network(rna_als_network, quantile=edge_mag_thresh)

In [150]:
rna_als_nx = nx.from_pandas_adjacency(scaled_rna_als_values)
rna_ctr_nx = nx.from_pandas_adjacency(scaled_rna_ctr_values)

In [152]:
nx.write_weighted_edgelist(rna_ctr_nx,f"{NBDATA_DIR}/iPSC_rna_ctr_genie_edgelist_0.999.tsv", delimiter="\t")
nx.write_weighted_edgelist(rna_als_nx,f"{NBDATA_DIR}/iPSC_rna_als_genie_edgelist_0.999.tsv", delimiter="\t")

## Map to Numeric for MFinder

In [168]:
def node_numeric_map(network, outfile):
    converted_nx = nx.convert_node_labels_to_integers(network, ordering="sorted")
    with open(f"{outfile}.txt", "w+") as f:
        for (i,j) in nx.edges(converted_nx):
            f.write(f"{i} {j} 1\n")
            # nx.write_weighted_edgelist(converted_nx, f"{outfile}.txt", delimiter=" ")
    with open(f"{outfile}_MAP.tsv", "w+") as f:
        for i, n in enumerate(sorted(network.nodes())):
            f.write(f"{n}\t{i}\n")

In [169]:
node_numeric_map(rna_ctr_nx,f"{NBDATA_DIR}/iPSC_rna_ctr_genie_integer_edgelist_0.999")
node_numeric_map(rna_als_nx,f"{NBDATA_DIR}/iPSC_rna_als_genie_integer_edgelist_0.999")

## Write TF ID File

In [142]:
tf_list = set(pd.read_csv(f"{NBDATA_DIR}/human_tflist.txt",header=None)[0])

In [158]:
def write_tf_mapper(network, outfile):
    with open(outfile,"w+") as f:
        for i, n in enumerate(sorted(network.nodes())):
            f.write(f"{i}\t{n}\t{int(n in tf_list)}\n")

In [159]:
write_tf_mapper(rna_ctr_nx, f"{NBDATA_DIR}/iPSC_rna_ctr_TF_id_list.tsv")
write_tf_mapper(rna_als_nx, f"{NBDATA_DIR}/iPSC_rna_als_TF_id_list.tsv")