In [1]:
import sys

In [2]:
BASE_DIR = "../.."
DATA_DIR = f"{BASE_DIR}/data"
NBDATA_DIR = f"{BASE_DIR}/nbdata"
FIG_DIR = f"{BASE_DIR}/figs"

In [3]:
sys.path.append(BASE_DIR)

In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import networkx as nx

In [5]:
import anndata as ad
import scanpy as sc

## Load GENIE Networks

In [6]:
rna_ctr_network = pd.read_csv(f"{NBDATA_DIR}/iMN_rna_ctr_genie_weights.csv")

In [7]:
rna_als_network = pd.read_csv(f"{NBDATA_DIR}/iMN_rna_als_genie_weights.csv")

In [8]:
rna_als_network

Unnamed: 0,ENSG00000110514,ENSG00000268358,ENSG00000086015,ENSG00000272373,ENSG00000254148,ENSG00000169740,ENSG00000261609,ENSG00000169744,ENSG00000215864,ENSG00000215866,...,ENSG00000235750,ENSG00000156931,ENSG00000143457,ENSG00000152558,ENSG00000143452,ENSG00000143450,ENSG00000182872,ENSG00000182873,ENSG00000182870,ENSG00000182871
ENSG00000169740,1.290431e-06,0.001301,1.235692e-03,0.000000,0.001240,0.000000,2.238965e-04,0.000000,4.877072e-05,0.000709,...,0.000000,0.000940,0.000000,1.787635e-04,0.000000e+00,0.000156,0.000648,0.000000e+00,0.000000e+00,2.794316e-05
ENSG00000168298,8.403352e-09,0.000628,3.692618e-05,0.000104,0.000192,0.000011,1.727130e-04,0.000896,0.000000e+00,0.000188,...,0.000000,0.000198,0.000149,8.887419e-04,1.182020e-04,0.000115,0.001050,6.285126e-04,3.406759e-07,2.911707e-05
ENSG00000162599,0.000000e+00,0.000000,2.335671e-07,0.000510,0.000000,0.000000,0.000000e+00,0.000563,6.314158e-05,0.001691,...,0.004067,0.000471,0.000968,2.732049e-06,0.000000e+00,0.001502,0.000000,3.733204e-05,4.878143e-04,3.103210e-04
ENSG00000029363,1.146667e-03,0.000181,1.232046e-03,0.000056,0.000222,0.000000,2.191623e-04,0.000000,2.148400e-06,0.000434,...,0.000000,0.000950,0.000406,1.692099e-06,0.000000e+00,0.000000,0.000294,2.059041e-05,-4.641394e-11,2.247557e-04
ENSG00000162337,0.000000e+00,0.000004,0.000000e+00,0.000028,0.000004,0.000000,8.016615e-07,0.000223,1.641644e-05,0.000069,...,0.000000,0.000054,0.000030,3.675390e-08,0.000000e+00,0.000000,0.000004,1.420305e-03,3.782981e-04,2.969446e-05
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ENSG00000143458,6.963654e-04,0.000002,0.000000e+00,0.000000,0.000023,0.000348,0.000000e+00,0.000375,2.902564e-04,0.000323,...,0.000774,0.000192,0.001164,4.757971e-04,1.498457e-07,0.000234,0.000869,2.698311e-04,1.196932e-04,1.321158e-05
ENSG00000100219,2.219983e-03,0.000000,7.476254e-05,0.000319,0.000023,0.001059,3.080756e-06,0.000000,3.839032e-04,0.000242,...,0.000231,0.000701,0.000000,9.454322e-06,8.897180e-05,0.000023,0.001430,1.649616e-05,5.696657e-04,1.019432e-05
ENSG00000197037,1.049991e-03,0.000002,6.748280e-04,0.001821,0.000000,0.001194,0.000000e+00,0.000046,7.896225e-04,0.000000,...,0.000000,0.000002,0.000047,5.233832e-04,0.000000e+00,0.000000,0.000686,1.971073e-08,3.109038e-05,1.012649e-08
ENSG00000151623,3.490384e-04,0.000540,6.320109e-06,0.000018,0.000007,0.000671,1.656869e-04,0.000002,6.665280e-08,0.000000,...,0.000125,0.000000,0.000137,3.214660e-04,1.469307e-04,0.000062,0.000000,1.387820e-06,0.000000e+00,6.276749e-04


In [9]:
edge_mag_thresh = 0.999

In [10]:
from sklearn.preprocessing import StandardScaler

def scale_and_threshold_network(df, quantile=0.95):
    scaled = StandardScaler().fit_transform(df.values)
    thresh = np.quantile(scaled, quantile)
    scaled[np.abs(scaled) < thresh] = 0
    scaled[np.abs(scaled) >= thresh] = 1
    newdf = pd.DataFrame(scaled,columns=df.columns,index=df.index)
    return newdf

In [11]:
scaled_rna_ctr_values = scale_and_threshold_network(rna_ctr_network, quantile=edge_mag_thresh)
scaled_rna_als_values = scale_and_threshold_network(rna_als_network, quantile=edge_mag_thresh)

In [12]:
rna_als_nx = nx.from_pandas_adjacency(scaled_rna_als_values)
rna_ctr_nx = nx.from_pandas_adjacency(scaled_rna_ctr_values)

In [13]:
nx.write_weighted_edgelist(rna_ctr_nx,f"{NBDATA_DIR}/iMN_rna_ctr_genie_edgelist_0.999.tsv", delimiter="\t")
nx.write_weighted_edgelist(rna_als_nx,f"{NBDATA_DIR}/iMN_rna_als_genie_edgelist_0.999.tsv", delimiter="\t")

## Map to Numeric for MFinder

In [30]:
for (i,j) in nx.edges(rna_ctr_nx):
    print(i,j)
    break

ENSG00000169740 ENSG00000113387


In [34]:
def node_numeric_map(network, outfile):
    converted_nx = nx.convert_node_labels_to_integers(network, ordering="sorted")
    converted_nx.remove_edges_from(nx.selfloop_edges(converted_nx))
    with open(f"{outfile}.txt", "w+") as f:
        for (i,j) in nx.edges(converted_nx):
            f.write(f"{i} {j} 1\n")
            # nx.write_weighted_edgelist(converted_nx, f"{outfile}.txt", delimiter=" ")
    with open(f"{outfile}_MAP.tsv", "w+") as f:
        for i, n in enumerate(sorted(network.nodes())):
            f.write(f"{n}\t{i}\n")

In [35]:
node_numeric_map(rna_ctr_nx,f"{NBDATA_DIR}/iMN_rna_ctr_genie_integer_edgelist_0.999")
node_numeric_map(rna_als_nx,f"{NBDATA_DIR}/iMN_rna_als_genie_integer_edgelist_0.999")

## Write TF ID File

In [16]:
tf_list = set(pd.read_csv(f"{NBDATA_DIR}/iMN_human_tflist.txt",header=None)[0])

In [17]:
def write_tf_mapper(network, outfile):
    with open(outfile,"w+") as f:
        for i, n in enumerate(sorted(network.nodes())):
            f.write(f"{i}\t{n}\t{int(n in tf_list)}\n")

In [18]:
write_tf_mapper(rna_ctr_nx, f"{NBDATA_DIR}/iMN_rna_ctr_TF_id_list.tsv")
write_tf_mapper(rna_als_nx, f"{NBDATA_DIR}/iMN_rna_als_TF_id_list.tsv")