# Gene selection for GRN inference

02/12/2022

Here we select the set of genes for the GRN inference for RBP-mRNA and RBP-lncRNA interactions

We use the Smart-seq2, 10x and SCAN-seq2 (9CL) data for Hepg2, the CEL-seq, STORM-seq 1M reads, Smart-seq3, SCAN-seq2 9CL and UMI200 for K562.

For each dataset we consider only genes present in the fasta file of the canonical isoforms used for the annotation of the eCLIP data. 

We select the following sets of genes:

- eCLIP RBPs + top 400 HVmRNAs
- eCLIP RBPs + top 400 HVlncRNAs

In [None]:
%matplotlib inline

In [None]:
import scanpy as sc
import anndata as ad
import numpy as np
import pandas as pd
import os

In [None]:
input_folder='./'
proc_folder=input_folder+'processed_data/'

## Load the fasta file with the canonical isoforms

In [None]:
from Bio import SeqIO
gname=[]
gid=[]
f_open = open("/Users/jonathan/Desktop/IIT/INTERACTomics/ENCODE_eCLIP_DATA/transcriptomes/hsapiens_gene_ensembl_107_canonical_new.fa", "rU")
for rec in SeqIO.parse(f_open, "fasta"):
    myid = rec.id
    gname.append(myid.split('|')[4])
    gid.append(myid.split('|')[0])

## Loading scRNA-seq pre-processed data

In [None]:
HepG2_SMART=ad.read_h5ad(proc_folder+'processed_HepG2_Smartseq2.h5ad')
HepG2_10x=ad.read_h5ad(proc_folder+'processed_HepG2_10x.h5ad')

K562_CEL=ad.read_h5ad(proc_folder+'processed_K562_CELseq.h5ad')
K562_CEL_ARACNe=ad.read_h5ad(proc_folder+'processed_K562_CELseq_ARACNe.h5ad')

K562_STORM=ad.read_h5ad(proc_folder+'processed_K562_STORMseq1M.h5ad')

K562_SMART3=ad.read_h5ad(proc_folder+'processed_K562_Smartseq3.h5ad')
K562_SMART3_ARACNe=ad.read_h5ad(proc_folder+'processed_K562_Smartseq3.h5ad')

In [None]:
# Consider only genes present in the fasta file
def Genes_in_fasta(adata,gnames):
    print(adata)
    inters=list(set(adata.var_names).intersection(set(gnames)))
    adata=adata[:,inters].copy()
    print(adata)
    return adata;

In [None]:
HepG2_SMART=Genes_in_fasta(HepG2_SMART,gname)
HepG2_10x=Genes_in_fasta(HepG2_10x,gname)
# HepG2_10x_ARACNe=Genes_in_fasta(HepG2_10x_ARACNe,gname)

K562_CEL=Genes_in_fasta(K562_CEL,gname)
K562_CEL_ARACNe=Genes_in_fasta(K562_CEL_ARACNe,gname)

K562_STORM=Genes_in_fasta(K562_STORM,gname)
K562_SMART3=Genes_in_fasta(K562_SMART3,gname)
K562_SMART3_ARACNe=Genes_in_fasta(K562_SMART3_ARACNe,gname)

## Load TFs and RBPs

In [None]:
# Load TFs from Beeline
TFs=pd.read_csv('human-tfs.csv')
TFs=list(set(TFs['TF']))
len(TFs)

In [None]:
RBPs=list(np.loadtxt("eCLIP_RBPs.txt",dtype=str))

## Gene sets

### Highly variable long non-coding RNAs

In [None]:
from gtfparse import read_gtf
gtf_df_lnc = read_gtf("/Users/jonathan/Desktop/IIT/INTERACTomics/ENCODE_eCLIP_DATA/transcriptomes_gencode_V41/gencode.v41.long_noncoding_RNAs.gtf")
#     print(gtf_df_lnc.gene_type.value_counts())
gnames_nc=set(gtf_df_lnc.gene_name)
    
def HVlnc(adata,eclip,ct,n):
    print(ct)
    
    
    print('Total nr lncRNA',len(set(gnames_nc).intersection(set(adata.var_names))))
    inters_lnc=list(set(gnames_nc).intersection(set(adata.var_names)))
    
    adata_lnc=adata[:,inters_lnc].copy()
    sc.pp.highly_variable_genes(adata_lnc,max_mean=10,n_top_genes=n)
    adata_HVlnc=adata_lnc[:,adata_lnc.var['highly_variable']==True].copy()
    
    ordered_HVlnc=adata_HVlnc.var.sort_values('dispersions_norm',ascending=False).index
    
    return ordered_HVlnc;

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

def jaccard_similarity(list1, list2):
    intersection = len(list(set(list1).intersection(list2)))
    union = (len(set(list1)) + len(set(list2))) - intersection
    return float(intersection) / union

def jaccard_heatmap(adatas,protocols,ct):
    jaccard=np.zeros((len(adatas),len(adatas)))
    i=0
    for (adata1,prot1) in zip(adatas,protocols):
        j=0
        for (adata2,prot2) in zip(adatas,protocols):
            jaccard[i,j]=jaccard_similarity(list(adata1),list(adata2))
            j+=1
        i+=1
    
    # Getting the Upper Triangle of the co-relation matrix
    matrix = np.triu(jaccard)

    fig,ax =plt.subplots()
    ax.set_title(ct)
    # using the upper triangle matrix as mask 
    sns.heatmap(jaccard, annot=True, mask=matrix,ax=ax,
           xticklabels=protocols,yticklabels=protocols)
    plt.show(),plt.close()

In [None]:
HVlnc_HepG2_Smartseq2=HVlnc(HepG2_SMART,eclip_HepG2,'HepG2_Smartseq2',400)
HVlnc_HepG2_10x=HVlnc(HepG2_10x,eclip_HepG2,'HepG2_10x',400)

In [None]:
jaccard_heatmap([HVlnc_HepG2_Smartseq2,HVlnc_HepG2_10x],['Smart-seq2','10x'],'HepG2')

In [None]:
HVlnc_K562_CELseq=HVlnc(K562_CEL,eclip_K562,'K562_CELseq',400)
HVlnc_K562_STORMseq=HVlnc(K562_STORM,eclip_K562,'K562_STORMseq',400)
HVlnc_K562_Smartseq3=HVlnc(K562_SMART3,eclip_K562,'K562_Smartseq3',400)

In [None]:
jaccard_heatmap([HVlnc_K562_CELseq,HVlnc_K562_STORMseq,HVlnc_K562_Smartseq3],
                ['CEL-seq','STORM-seq','Smart-seq3'],'K562')

### Highly variable mRNAs

In [None]:
from gtfparse import read_gtf

gtf_df = read_gtf("/Users/jonathan/Desktop/IIT/INTERACTomics/ENCODE_eCLIP_DATA/transcriptomes_gencode_V41/gencode.v41.primary_assembly.annotation.gtf")
# Select only protein coding
gtf_df_pc=gtf_df[gtf_df.gene_type=='protein_coding']
gnames_pc=set(gtf_df_pc.gene_name)

def HVmRNA(adata,eclip,ct,n):
    
        
    print('Total nr mRNA',len(set(gnames_pc).intersection(set(adata.var_names))))
    inters_mRNA=list(set(gnames_pc).intersection(set(adata.var_names)))
    
    adata_mRNA=adata[:,inters_mRNA].copy()
    sc.pp.highly_variable_genes(adata_mRNA,max_mean=10,n_top_genes=n)
    adata_HVmRNA=adata_mRNA[:,adata_mRNA.var['highly_variable']==True].copy()
    
    ordered_HVmRNA=adata_HVmRNA.var.sort_values('dispersions_norm',ascending=False).index
    
    return ordered_HVmRNA;

In [None]:
HVmRNA_HepG2_Smartseq2=HVmRNA(HepG2_SMART,eclip_HepG2,'HepG2_Smartseq2',400)
HVmRNA_HepG2_10x=HVmRNA(HepG2_10x,eclip_HepG2,'HepG2_10x',400)

In [None]:
jaccard_heatmap([HVmRNA_HepG2_Smartseq2,HVmRNA_HepG2_10x],['Smart-seq2','10x'],'HepG2')

In [None]:
HVmRNA_K562_CELseq=HVmRNA(K562_CEL,eclip_K562,'K562_CELseq',400)
HVmRNA_K562_STORMseq=HVmRNA(K562_STORM,eclip_K562,'K562_STORMseq',400)
HVmRNA_K562_Smartseq3=HVmRNA(K562_SMART3,eclip_K562,'K562_Smartseq3',400)

In [None]:
jaccard_heatmap([HVmRNA_K562_CELseq,HVmRNA_K562_STORMseq,HVmRNA_K562_Smartseq3],
                ['CEL-seq','STORM-seq','Smart-seq3'],'K562')

## TFs and RBP selection

In [None]:
def RBP_TF_selection(adata,all_TFs,all_RBPs):
    sc.pp.highly_variable_genes(adata,max_mean=10)
    high_var=adata[:,adata.var['highly_variable']==True].copy()
    
    myRBPs=list(set(all_RBPs).intersection(set(adata.var_names)))
    
    myHVTFs=list(set(all_TFs).intersection(set(high_var.var_names)))
    
    print(len(myRBPs),len(myHVTFs));
    
    return myRBPs, myHVTFs;

In [None]:
RBP_HepG2_Smartseq2, HVTFs_HepG2_Smartseq2 = RBP_TF_selection(HepG2_SMART,TFs,RBPs)
RBP_HepG2_10x, HVTFs_HepG2_10x = RBP_TF_selection(HepG2_10x,TFs,RBPs)

RBP_K562_CELseq, HVTFs_K562_CELseq = RBP_TF_selection(K562_CEL,TFs,RBPs)
RBP_K562_STORMseq, HVTFs_K562_STORMseq = RBP_TF_selection(K562_STORM,TFs,RBPs)
RBP_K562_Smartseq3, HVTFs_K562_Smartseq3 = RBP_TF_selection(K562_SMART3,TFs,RBPs)

In [None]:
## Define the three sets of genes for all the datasets
HepG2_Smartseq2_RBP_lnc_all_genes = list(set(list(HVlnc_HepG2_Smartseq2)+list(RBP_HepG2_Smartseq2)))
HepG2_Smartseq2_RBP_mRNA_all_genes = list(set(list(HVmRNA_HepG2_Smartseq2)+list(RBP_HepG2_Smartseq2)))
HepG2_Smartseq2_TF_mRNA_all_genes = list(set(list(HVmRNA_HepG2_Smartseq2)+list(HVTFs_HepG2_Smartseq2)))

print(len(HepG2_Smartseq2_RBP_lnc_all_genes),len(HepG2_Smartseq2_RBP_mRNA_all_genes),
     len(HepG2_Smartseq2_TF_mRNA_all_genes))

In [None]:
## Define the three sets of genes for all the datasets
HepG2_10x_RBP_lnc_all_genes = list(set(list(HVlnc_HepG2_10x)+list(RBP_HepG2_10x)))
HepG2_10x_RBP_mRNA_all_genes = list(set(list(HVmRNA_HepG2_10x)+list(RBP_HepG2_10x)))
HepG2_10x_TF_mRNA_all_genes = list(set(list(HVmRNA_HepG2_10x)+list(HVTFs_HepG2_10x)))

print(len(HepG2_10x_RBP_lnc_all_genes),len(HepG2_10x_RBP_mRNA_all_genes),
     len(HepG2_10x_TF_mRNA_all_genes))

In [None]:
## Define the three sets of genes for all the datasets
K562_CELseq_RBP_lnc_all_genes = list(set(list(HVlnc_K562_CELseq)+list(RBP_K562_CELseq)))
K562_CELseq_RBP_mRNA_all_genes = list(set(list(HVmRNA_K562_CELseq)+list(RBP_K562_CELseq)))
K562_CELseq_TF_mRNA_all_genes = list(set(list(HVmRNA_K562_CELseq)+list(HVTFs_K562_CELseq)))

print(len(K562_CELseq_RBP_lnc_all_genes),len(K562_CELseq_RBP_mRNA_all_genes),
     len(K562_CELseq_TF_mRNA_all_genes))

In [None]:
## Define the three sets of genes for all the datasets
K562_STORMseq_RBP_lnc_all_genes = list(set(list(HVlnc_K562_STORMseq)+list(RBP_K562_STORMseq)))
K562_STORMseq_RBP_mRNA_all_genes = list(set(list(HVmRNA_K562_STORMseq)+list(RBP_K562_STORMseq)))
K562_STORMseq_TF_mRNA_all_genes = list(set(list(HVmRNA_K562_STORMseq)+list(HVTFs_K562_STORMseq)))

print(len(K562_STORMseq_RBP_lnc_all_genes),len(K562_STORMseq_RBP_mRNA_all_genes),
     len(K562_STORMseq_TF_mRNA_all_genes))

In [None]:
## Define the three sets of genes for all the datasets
K562_Smartseq3_RBP_lnc_all_genes = list(set(list(HVlnc_K562_Smartseq3)+list(RBP_K562_Smartseq3)))
K562_Smartseq3_RBP_mRNA_all_genes = list(set(list(HVmRNA_K562_Smartseq3)+list(RBP_K562_Smartseq3)))
K562_Smartseq3_TF_mRNA_all_genes = list(set(list(HVmRNA_K562_Smartseq3)+list(HVTFs_K562_Smartseq3)))

print(len(K562_Smartseq3_RBP_lnc_all_genes),len(K562_Smartseq3_RBP_mRNA_all_genes),
     len(K562_Smartseq3_TF_mRNA_all_genes))

# Save the data

In [None]:
def SaveData(folder, adata, geneset, label1, label2):
    
    # Save the normalized data in a csv file
    tmp_df=pd.DataFrame(data=adata[:,geneset].X.T, index=adata[:,geneset].var_names,
                        columns=adata[:,geneset].obs_names)
    tmp_df.to_csv(folder+label1+'NormalizedData_'+label2+'.csv')

    # Save the raw data in a csv file
    tmp_df=pd.DataFrame(data=adata.raw[:, geneset].X.T, index=adata.raw[:, geneset].var_names, 
                        columns=adata[:, geneset].obs_names)
    tmp_df.to_csv(folder+label1+'RawData_'+label2+'.csv')

In [None]:
input_folder2=input_folder+'GENE_SELECTION_mRNA_lncRNA/'

if os.path.isdir(input_folder2)==False:
    os.mkdir(input_folder2)

In [None]:
os.mkdir(input_folder2+'HepG2_Smartseq2_RBP_lnc400')
os.mkdir(input_folder2+'HepG2_Smartseq2_TF_mRNA400')
os.mkdir(input_folder2+'HepG2_Smartseq2_RBP_mRNA400')

os.mkdir(input_folder2+'HepG2_10x_RBP_lnc400')
os.mkdir(input_folder2+'HepG2_10x_TF_mRNA400')
os.mkdir(input_folder2+'HepG2_10x_RBP_mRNA400')

SaveData(input_folder2+'HepG2_Smartseq2_RBP_lnc400/', HepG2_SMART, HepG2_Smartseq2_RBP_lnc_all_genes,
         'HepG2_Smartseq2', 'RBP_lnc400')
SaveData(input_folder2+'HepG2_Smartseq2_TF_mRNA400/', HepG2_SMART, HepG2_Smartseq2_TF_mRNA_all_genes,
         'HepG2_Smartseq2', 'TF_mRNA400')
SaveData(input_folder2+'HepG2_Smartseq2_RBP_mRNA400/', HepG2_SMART, HepG2_Smartseq2_RBP_mRNA_all_genes,
         'HepG2_Smartseq2', 'RBP_mRNA400')

SaveData(input_folder2+'HepG2_10x_RBP_lnc400/', HepG2_10x, HepG2_10x_RBP_lnc_all_genes,
         'HepG2_10x', 'RBP_lnc400')
SaveData(input_folder2+'HepG2_10x_TF_mRNA400/', HepG2_10x, HepG2_10x_TF_mRNA_all_genes,
         'HepG2_10x', 'TF_mRNA400')
SaveData(input_folder2+'HepG2_10x_RBP_mRNA400/', HepG2_10x, HepG2_10x_RBP_mRNA_all_genes,
         'HepG2_10x', 'RBP_mRNA400')

In [None]:
os.mkdir(input_folder2+'K562_CELseq_RBP_lnc400')
os.mkdir(input_folder2+'K562_CELseq_TF_mRNA400')
os.mkdir(input_folder2+'K562_CELseq_RBP_mRNA400')

os.mkdir(input_folder2+'K562_STORMseq_RBP_lnc400')
os.mkdir(input_folder2+'K562_STORMseq_TF_mRNA400')
os.mkdir(input_folder2+'K562_STORMseq_RBP_mRNA400')

os.mkdir(input_folder2+'K562_Smartseq3_RBP_lnc400')
os.mkdir(input_folder2+'K562_Smartseq3_TF_mRNA400')
os.mkdir(input_folder2+'K562_Smartseq3_RBP_mRNA400')

SaveData(input_folder2+'K562_CELseq_RBP_lnc400/', K562_CEL, K562_CELseq_RBP_lnc_all_genes,
         'K562_CELseq', 'RBP_lnc400')
SaveData(input_folder2+'K562_CELseq_TF_mRNA400/', K562_CEL, K562_CELseq_TF_mRNA_all_genes,
         'K562_CELseq', 'TF_mRNA400')
SaveData(input_folder2+'K562_CELseq_RBP_mRNA400/', K562_CEL, K562_CELseq_RBP_mRNA_all_genes,
         'K562_CELseq', 'RBP_mRNA400')

SaveData(input_folder2+'K562_STORMseq_RBP_lnc400/', K562_STORM, K562_STORMseq_RBP_lnc_all_genes,
         'K562_STORMseq', 'RBP_lnc400')
SaveData(input_folder2+'K562_STORMseq_TF_mRNA400/', K562_STORM, K562_STORMseq_TF_mRNA_all_genes,
         'K562_STORMseq', 'TF_mRNA400')
SaveData(input_folder2+'K562_STORMseq_RBP_mRNA400/', K562_STORM, K562_STORMseq_RBP_mRNA_all_genes,
         'K562_STORMseq', 'RBP_mRNA400')

SaveData(input_folder2+'K562_Smartseq3_RBP_lnc400/', K562_SMART3, K562_Smartseq3_RBP_lnc_all_genes,
         'K562_Smartseq3', 'RBP_lnc400')
SaveData(input_folder2+'K562_Smartseq3_TF_mRNA400/', K562_SMART3, K562_Smartseq3_TF_mRNA_all_genes,
         'K562_Smartseq3', 'TF_mRNA400')
SaveData(input_folder2+'K562_Smartseq3_RBP_mRNA400/', K562_SMART3, K562_Smartseq3_RBP_mRNA_all_genes,
         'K562_Smartseq3', 'RBP_mRNA400')

In [None]:
input_folder3=input_folder+'GENE_SELECTION_mRNA_lncRNA/ARACNe_INPUT/'

if os.path.isdir(input_folder3)==False:
    os.mkdir(input_folder3)

In [None]:
def SaveDataforARACNe(folder, adata, geneset, label1, label2):
    adata_temp=ad.AnnData(X=adata[:,geneset].X)
    adata_temp.obs_names=adata[:,geneset].obs_names
    adata_temp.var_names=adata[:,geneset].var_names
    adata_temp.obs['batch']=label1
    adata_temp.write(folder+'processed_'+label1+'_'+label2+'.h5ad')

In [None]:
SaveDataforARACNe(input_folder3+'HepG2_Smartseq2_RBP_lnc400', HepG2_SMART.raw, HepG2_Smartseq2_RBP_lnc_all_genes,
         'HepG2_Smartseq2', 'RBP_lnc400')
SaveDataforARACNe(input_folder3+'HepG2_Smartseq2_TF_mRNA400', HepG2_SMART.raw, HepG2_Smartseq2_TF_mRNA_all_genes,
         'HepG2_Smartseq2', 'TF_mRNA400')
SaveDataforARACNe(input_folder3+'HepG2_Smartseq2_RBP_mRNA400', HepG2_SMART.raw, HepG2_Smartseq2_RBP_mRNA_all_genes,
         'HepG2_Smartseq2', 'RBP_mRNA400')

In [None]:
SaveDataforARACNe(input_folder3+'K562_CELseq_RBP_lnc400', K562_CEL_ARACNe, K562_CELseq_RBP_lnc_all_genes,
         'K562_CELseq', 'RBP_lnc400')
SaveDataforARACNe(input_folder3+'K562_CELseq_TF_mRNA400', K562_CEL_ARACNe, K562_CELseq_TF_mRNA_all_genes,
         'K562_CELseq', 'TF_mRNA400')
SaveDataforARACNe(input_folder3+'K562_CELseq_RBP_mRNA400', K562_CEL_ARACNe, K562_CELseq_RBP_mRNA_all_genes,
         'K562_CELseq', 'RBP_mRNA400')

SaveDataforARACNe(input_folder3+'K562_STORMseq_RBP_lnc400', K562_STORM.raw, K562_STORMseq_RBP_lnc_all_genes,
         'K562_STORMseq', 'RBP_lnc400')
SaveDataforARACNe(input_folder3+'K562_STORMseq_TF_mRNA400', K562_STORM.raw, K562_STORMseq_TF_mRNA_all_genes,
         'K562_STORMseq', 'TF_mRNA400')
SaveDataforARACNe(input_folder3+'K562_STORMseq_RBP_mRNA400', K562_STORM.raw, K562_STORMseq_RBP_mRNA_all_genes,
         'K562_STORMseq', 'RBP_mRNA400')

SaveDataforARACNe(input_folder3+'K562_Smartseq3_RBP_lnc400', K562_SMART3_ARACNe, K562_Smartseq3_RBP_lnc_all_genes,
         'K562_Smartseq3', 'RBP_lnc400')
SaveDataforARACNe(input_folder3+'K562_Smartseq3_TF_mRNA400', K562_SMART3_ARACNe, K562_Smartseq3_TF_mRNA_all_genes,
         'K562_Smartseq3', 'TF_mRNA400')
SaveDataforARACNe(input_folder3+'K562_Smartseq3_RBP_mRNA400', K562_SMART3_ARACNe, K562_Smartseq3_RBP_mRNA_all_genes,
         'K562_Smartseq3', 'RBP_mRNA400')

In [None]:
gname_folder='/Users/jonathan/Desktop/IIT/INTERACTomics/scRNA-seq_data/ANALYSIS_FEB_2023_RIBO/GENE_SELECTION_mRNA_lncRNA/gene_names/'

if os.path.isdir(gname_folder)==False:
    os.mkdir(gname_folder)

In [None]:
# Save the gene names for each dataset and their union for running catRAPID
np.savetxt(gname_folder+'gnamesHepG2_Smartseq2_RBP_lnc400.txt',np.c_[HepG2_Smartseq2_RBP_lnc_all_genes],fmt='%s')
np.savetxt(gname_folder+'gnamesHepG2_Smartseq2_TF_mRNA400.txt',np.c_[HepG2_Smartseq2_TF_mRNA_all_genes],fmt='%s')
np.savetxt(gname_folder+'gnamesHepG2_Smartseq2_RBP_mRNA400.txt',np.c_[HepG2_Smartseq2_RBP_mRNA_all_genes],fmt='%s')

np.savetxt(gname_folder+'gnamesHepG2_10x_RBP_lnc400.txt',np.c_[HepG2_10x_RBP_lnc_all_genes],fmt='%s')
np.savetxt(gname_folder+'gnamesHepG2_10x_TF_mRNA400.txt',np.c_[HepG2_10x_TF_mRNA_all_genes],fmt='%s')
np.savetxt(gname_folder+'gnamesHepG2_10x_RBP_mRNA400.txt',np.c_[HepG2_10x_RBP_mRNA_all_genes],fmt='%s')

np.savetxt(gname_folder+'gnamesK562_CELseq_RBP_lnc400.txt',np.c_[K562_CELseq_RBP_lnc_all_genes],fmt='%s')
np.savetxt(gname_folder+'gnamesK562_CELseq_TF_mRNA400.txt',np.c_[K562_CELseq_TF_mRNA_all_genes],fmt='%s')
np.savetxt(gname_folder+'gnamesK562_CELseq_RBP_mRNA400.txt',np.c_[K562_CELseq_RBP_mRNA_all_genes],fmt='%s')

np.savetxt(gname_folder+'gnamesK562_STORMseq_RBP_lnc400.txt',np.c_[K562_STORMseq_RBP_lnc_all_genes],fmt='%s')
np.savetxt(gname_folder+'gnamesK562_STORMseq_TF_mRNA400.txt',np.c_[K562_STORMseq_TF_mRNA_all_genes],fmt='%s')
np.savetxt(gname_folder+'gnamesK562_STORMseq_RBP_mRNA400.txt',np.c_[K562_STORMseq_RBP_mRNA_all_genes],fmt='%s')

np.savetxt(gname_folder+'gnamesK562_Smartseq3_RBP_lnc400.txt',np.c_[K562_Smartseq3_RBP_lnc_all_genes],fmt='%s')
np.savetxt(gname_folder+'gnamesK562_Smartseq3_TF_mRNA400.txt',np.c_[K562_Smartseq3_TF_mRNA_all_genes],fmt='%s')
np.savetxt(gname_folder+'gnamesK562_Smartseq3_RBP_mRNA400.txt',np.c_[K562_Smartseq3_RBP_mRNA_all_genes],fmt='%s')