In [1]:
import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
import sys
import time

from os import walk
from scipy.stats import spearmanr

# import utils
sys.path.append("../../utils")
from plotting_utils import *
from misc_utils import *
from norm_utils import *

%matplotlib inline

## variables

In [2]:
cage_f = "../../misc/03__rna_seq_expr/hg19.cage_peak_phase1and2combined_ann.txt.gz"

In [3]:
fimo_f = "fimo_all_biotypes.txt"

In [4]:
chip_f = "chip_all.txt"

In [5]:
# files with tissue specificities calculated across all samples
tss_ts_f = "TSS.CAGE_grouped_exp.tissue_sp.txt"
enh_ts_f = "Enh.CAGE_grouped_exp.tissue_sp.txt"

In [6]:
# files with CAGE expression in HepG2, HeLa, K562 only
tss_cell_line_expr_f = "../../misc/03__rna_seq_expr/hg19.cage_peak_tpm_ann.mpra_cell_line_replicates.tsv"
enh_cell_line_expr_f = "../../misc/03__rna_seq_expr/hg19.enhancers_tpm_ann.mpra_cell_line_replicates.tsv"

## 1. import data

In [7]:
fimo = pd.read_table(fimo_f, sep="\t")
fimo.head()

Unnamed: 0,#pattern name,short_description,tss_id,shuffled,start,stop,strand,score,p-value,q-value,matched sequence,motif_id,biotype
0,RUNX1,"p@chr16:80597029..80597034,-","chr16:80597029..80597034,-",shuffled,215,225,+,15.6034,2.49e-07,0.998,GTCTGTGGTTT,RUNX1,intergenic
1,RUNX1,p1@FRG1,"chr4:190861993..190862128,+",shuffled,399,409,+,15.6034,2.49e-07,0.998,GTCTGTGGTTT,RUNX1,div_pc
2,RUNX1,,"chr1:10532355..10532384,+",shuffled,487,497,+,15.6034,2.49e-07,0.998,GTCTGTGGTTT,RUNX1,protein_coding
3,RUNX1,p1@TRIB1,"chr8:126442570..126442624,+",shuffled,580,590,+,15.6034,2.49e-07,0.998,GTCTGTGGTTT,RUNX1,protein_coding
4,RUNX1,p1@RPGRIP1L,"chr16:53737742..53737790,-",shuffled,794,804,-,15.6034,2.49e-07,0.998,GTCTGTGGTTT,RUNX1,protein_coding


In [8]:
chip = pd.read_table(chip_f, sep="\t")
chip.head()

Unnamed: 0,chromosome,start,end,short_description,tss_id,score,strand,chip_id,chip_chromosome,chip_start,chip_end,chip_score,biotype,pos,pos_rel,motif_id
0,chr1,10002385,10005385,p1@LZIC,"chr1:10003372..10003465,-",0,-,ARID3A,chr1,10003524,10003525,24.543343,div_pc,10003524.5,-139.5,ARID3A
1,chr1,10002385,10005385,p1@LZIC,"chr1:10003372..10003465,-",0,-,ATF2,chr1,10003413,10003414,42.461327,div_pc,10003413.5,-28.5,ATF2
2,chr1,10002385,10005385,p1@LZIC,"chr1:10003372..10003465,-",0,-,ATF2,chr1,10003497,10003498,69.683125,div_pc,10003497.5,-112.5,ATF2
3,chr1,10002385,10005385,p1@LZIC,"chr1:10003372..10003465,-",0,-,ATF3,chr1,10003457,10003458,27.073183,div_pc,10003457.5,-72.5,ATF3
4,chr1,10002385,10005385,p1@LZIC,"chr1:10003372..10003465,-",0,-,BCL3,chr1,10003424,10003425,55.235219,div_pc,10003424.5,-39.5,BCL3


In [9]:
cage = pd.read_table(cage_f, sep="\t", skiprows=7, header=0)
cage.head()

Unnamed: 0,00Annotation,short_description,description,association_with_transcript,entrezgene_id,hgnc_id,uniprot_id
0,"chr10:100013403..100013414,-","p@chr10:100013403..100013414,-","CAGE_peak_at_chr10:100013403..100013414,-",,,,
1,"chr10:100027943..100027958,-",p1@LOXL4,CAGE_peak_1_at_LOXL4_5end,"48bp_to_ENST00000260702,NM_032211,uc001kpa.1_5end",entrezgene:84171,HGNC:17171,uniprot:Q96JB6
2,"chr10:100076685..100076699,+","p@chr10:100076685..100076699,+","CAGE_peak_at_chr10:100076685..100076699,+",,,,
3,"chr10:100150910..100150935,-","p@chr10:100150910..100150935,-","CAGE_peak_at_chr10:100150910..100150935,-",,,,
4,"chr10:100150951..100150962,-","p@chr10:100150951..100150962,-","CAGE_peak_at_chr10:100150951..100150962,-",,,,


In [10]:
tss_ts = pd.read_table(tss_ts_f, sep="\t")
tss_ts.head()

Unnamed: 0,00Annotation,short_description,Group_0,Group_1,Group_2,Group_3,Group_4,Group_5,Group_6,Group_7,...,Group_541,Group_542,Group_543,Group_544,Group_545,Group_546,Group_547,Group_548,Group_549,tissue_sp_all
0,"chr10:100013403..100013414,-","p@chr10:100013403..100013414,-",0.543,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.993829
1,"chr10:100027943..100027958,-",p1@LOXL4,27.14998,0.89047,0.0,0.0,0.0,0.1056,0.0,0.101989,...,3.681535,0.0,0.0,0.0,0.0,1.603423,2.000139,0.629078,0.0,0.968756
2,"chr10:100076685..100076699,+","p@chr10:100076685..100076699,+",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.984646
3,"chr10:100150910..100150935,-","p@chr10:100150910..100150935,-",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.548168,0.0,0.0,0.0,0.0,0.0,0.0,0.99197
4,"chr10:100150951..100150962,-","p@chr10:100150951..100150962,-",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.096335,0.0,0.0,0.0,0.0,0.0,0.0,0.981522


In [11]:
enh_ts = pd.read_table(enh_ts_f, sep="\t")
enh_ts.head()

Unnamed: 0,Id,Group_0,Group_1,Group_2,Group_3,Group_4,Group_5,Group_6,Group_7,Group_8,...,Group_541,Group_542,Group_543,Group_544,Group_545,Group_546,Group_547,Group_548,Group_549,tissue_sp_all
0,chr1:839741-840250,0.0,0.216714,0.0,0.0,0.0,0.102923,0.0,0.0,0.0,...,0.0,0.0,0.0,0.306431,0.206949,0.0,0.38338,0.087482,0.110784,0.981674
1,chr1:840753-841210,0.0,0.216714,0.0,0.0,0.0,0.0,0.543603,0.0,0.0,...,0.0,0.0,0.13168,0.0,0.0,0.0,0.0,0.0,0.0,0.984912
2,chr1:845485-845678,0.0,0.0,0.0,0.0,0.0,0.0,0.181201,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.990033
3,chr1:855764-856157,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.990598
4,chr1:856539-856757,0.0,0.0,0.0,0.0,0.199924,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.087482,0.0,0.9774


## 2. find unique TF names

In [19]:
fimo_tfs = list(fimo["#pattern name"].unique())
len(fimo_tfs)

435

In [20]:
chip_tfs = list(chip["chip_id"].unique())
len(chip_tfs)

160

In [21]:
fimo_tfs.extend(chip_tfs)
all_tfs = list(set(fimo_tfs))
len(all_tfs)

528

In [22]:
all_tfs = [x.upper() for x in all_tfs]
all_tfs[0:5]

['CEBPD', 'NFATC1', 'GTF2F1', 'RORA', 'GSC2']

In [23]:
# remove fusion proteins and vars from list (ones with ::)
all_tfs = [x for x in all_tfs if "::" not in x and "(VAR" not in x]
len(all_tfs)

491

## 3. determine how many TF names are missing from CAGE file

In [51]:
def get_gene_names(row):
    sep_proms = row.short_description.split("@")
    sep_proms = [x.split(",") for x in sep_proms]
    genes = []
    for x in sep_proms:
        for s in x:
            if s == "+" or s == "-":
                continue
            elif s.startswith("p"):
                continue
            else:
                if s.startswith("chr"):
                    genes.append(s)
                else:
                    genes.append(s.upper())
    genes = list(set(genes))
    genes = ",".join(map(str, genes)) 
    return genes

In [52]:
def tidy_split(df, column, sep='|', keep=False):
    """
    Split the values of a column and expand so the new DataFrame has one split
    value per row. Filters rows where the column is missing.

    Params
    ------
    df : pandas.DataFrame
        dataframe with the column to split and expand
    column : str
        the column to split and expand
    sep : str
        the string used to split the column's values
    keep : bool
        whether to retain the presplit value as it's own row

    Returns
    -------
    pandas.DataFrame
        Returns a dataframe with the same columns as `df`.
    """
    indexes = list()
    new_values = list()
    df = df.dropna(subset=[column])
    for i, presplit in enumerate(df[column].astype(str)):
        values = presplit.split(sep)
        if keep and len(values) > 1:
            indexes.append(i)
            new_values.append(presplit)
        for value in values:
            indexes.append(i)
            new_values.append(value)
    new_df = df.iloc[indexes, :].copy()
    new_df[column] = new_values
    return new_df

In [53]:
cage["genes"] = cage.apply(get_gene_names, axis=1)
cage.sample(5)

Unnamed: 0,00Annotation,short_description,description,association_with_transcript,entrezgene_id,hgnc_id,uniprot_id,genes
57744,"chr17:37075079..37075101,+",p40@LASP1,CAGE_peak_40_at_LASP1_5end,-63bp_to_AK095958_5end,entrezgene:3927,HGNC:6513,,LASP1
143697,"chr4:39460638..39460656,+",p3@LIAS,CAGE_peak_3_at_LIAS_5end,"-8bp_to_ENST00000509519,NM_006859,NM_194451,uc...",entrezgene:11019,HGNC:16429,uniprot:O43766,LIAS
18143,"chr11:72296276..72296292,-","p@chr11:72296276..72296292,-","CAGE_peak_at_chr11:72296276..72296292,-",,,,,chr11:72296276..72296292
89308,"chr1:211432784..211432796,+",p8@RCOR3,CAGE_peak_8_at_RCOR3_5end,9bp_to_ENST00000419091_5end,entrezgene:55758,HGNC:25594,uniprot:Q9P2K3,RCOR3
45975,"chr15:69850521..69850537,+",p2@ENST00000558781,CAGE_peak_2_at_ENST00000558781_5end,0bp_to_ENST00000558781_5end,,,,ENST00000558781


In [54]:
cage_split = tidy_split(cage, column="genes", sep=",")
cage_split.head()

Unnamed: 0,00Annotation,short_description,description,association_with_transcript,entrezgene_id,hgnc_id,uniprot_id,genes
0,"chr10:100013403..100013414,-","p@chr10:100013403..100013414,-","CAGE_peak_at_chr10:100013403..100013414,-",,,,,chr10:100013403..100013414
1,"chr10:100027943..100027958,-",p1@LOXL4,CAGE_peak_1_at_LOXL4_5end,"48bp_to_ENST00000260702,NM_032211,uc001kpa.1_5end",entrezgene:84171,HGNC:17171,uniprot:Q96JB6,LOXL4
2,"chr10:100076685..100076699,+","p@chr10:100076685..100076699,+","CAGE_peak_at_chr10:100076685..100076699,+",,,,,chr10:100076685..100076699
3,"chr10:100150910..100150935,-","p@chr10:100150910..100150935,-","CAGE_peak_at_chr10:100150910..100150935,-",,,,,chr10:100150910..100150935
4,"chr10:100150951..100150962,-","p@chr10:100150951..100150962,-","CAGE_peak_at_chr10:100150951..100150962,-",,,,,chr10:100150951..100150962


In [57]:
cage_split[cage_split["genes"].astype(str).str.contains("MEF2B")]

Unnamed: 0,00Annotation,short_description,description,association_with_transcript,entrezgene_id,hgnc_id,uniprot_id,genes
71809,"chr19:19261486..19261492,-","p5@MEF2BNB-MEF2B,p5@MEF2B","CAGE_peak_5_at_MEF2BNB-MEF2B_5end,CAGE_peak_5_...",82bp_to_ENST00000475290_5end,"entrezgene:100271849,entrezgene:4207","HGNC:39979,HGNC:6995",,MEF2B
71809,"chr19:19261486..19261492,-","p5@MEF2BNB-MEF2B,p5@MEF2B","CAGE_peak_5_at_MEF2BNB-MEF2B_5end,CAGE_peak_5_...",82bp_to_ENST00000475290_5end,"entrezgene:100271849,entrezgene:4207","HGNC:39979,HGNC:6995",,MEF2BNB-MEF2B
71810,"chr19:19281060..19281078,-","p2@MEF2BNB-MEF2B,p2@MEF2B","CAGE_peak_2_at_MEF2BNB-MEF2B_5end,CAGE_peak_2_...",0bp_to_ENST00000409224_5end,"entrezgene:100271849,entrezgene:4207","HGNC:39979,HGNC:6995",uniprot:B3KQ23,MEF2B
71810,"chr19:19281060..19281078,-","p2@MEF2BNB-MEF2B,p2@MEF2B","CAGE_peak_2_at_MEF2BNB-MEF2B_5end,CAGE_peak_2_...",0bp_to_ENST00000409224_5end,"entrezgene:100271849,entrezgene:4207","HGNC:39979,HGNC:6995",uniprot:B3KQ23,MEF2BNB-MEF2B
71811,"chr19:19302931..19302974,-","p1@MEF2BNB-MEF2B,p1@MEF2BNB,p1@MEF2B","CAGE_peak_1_at_MEF2BNB-MEF2B_5end,CAGE_peak_1_...","0bp_to_ENST00000162023,ENST00000354191,ENST000...","entrezgene:100271849,entrezgene:4207,entrezgen...","HGNC:39979,HGNC:6995,HGNC:37247","uniprot:Q02080,uniprot:Q96FH0",MEF2B
71811,"chr19:19302931..19302974,-","p1@MEF2BNB-MEF2B,p1@MEF2BNB,p1@MEF2B","CAGE_peak_1_at_MEF2BNB-MEF2B_5end,CAGE_peak_1_...","0bp_to_ENST00000162023,ENST00000354191,ENST000...","entrezgene:100271849,entrezgene:4207,entrezgen...","HGNC:39979,HGNC:6995,HGNC:37247","uniprot:Q02080,uniprot:Q96FH0",MEF2BNB
71811,"chr19:19302931..19302974,-","p1@MEF2BNB-MEF2B,p1@MEF2BNB,p1@MEF2B","CAGE_peak_1_at_MEF2BNB-MEF2B_5end,CAGE_peak_1_...","0bp_to_ENST00000162023,ENST00000354191,ENST000...","entrezgene:100271849,entrezgene:4207,entrezgen...","HGNC:39979,HGNC:6995,HGNC:37247","uniprot:Q02080,uniprot:Q96FH0",MEF2BNB-MEF2B


In [58]:
manual_fixes = {"SIN3AK20": "SIN3A", "KAP1": "TRIM28", "SREBP1": "SREBF1", "MIX-A": "MIXL1", 
                "RPC155": "POLR3A", "ZBTB18": "ZNF238"}

In [59]:
cage_genes = list(cage_split["genes"].unique())

In [64]:
tf_cage_map = {}
counter = 0
for tf in all_tfs:
    if tf in cage_genes:
        sub = cage_split[cage_split["genes"] == tf]
        peaks = list(sub["00Annotation"].unique())
    elif tf in manual_fixes:
        name = manual_fixes[tf]
        sub = cage_split[cage_split["short_description"] == name]
        peaks = list(sub["00Annotation"].unique())
    else:
        peaks = ["none"]
        counter += 1
    tf_cage_map[tf] =  peaks

In [65]:
counter

12

only 12/491 (non-var, non-fusion TFs) cannot be mapped this way

## 4. map peaks to tissue specificity values

In [66]:
tss_ts.head()

Unnamed: 0,00Annotation,short_description,Group_0,Group_1,Group_2,Group_3,Group_4,Group_5,Group_6,Group_7,...,Group_541,Group_542,Group_543,Group_544,Group_545,Group_546,Group_547,Group_548,Group_549,tissue_sp_all
0,"chr10:100013403..100013414,-","p@chr10:100013403..100013414,-",0.543,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.993829
1,"chr10:100027943..100027958,-",p1@LOXL4,27.14998,0.89047,0.0,0.0,0.0,0.1056,0.0,0.101989,...,3.681535,0.0,0.0,0.0,0.0,1.603423,2.000139,0.629078,0.0,0.968756
2,"chr10:100076685..100076699,+","p@chr10:100076685..100076699,+",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.984646
3,"chr10:100150910..100150935,-","p@chr10:100150910..100150935,-",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.548168,0.0,0.0,0.0,0.0,0.0,0.0,0.99197
4,"chr10:100150951..100150962,-","p@chr10:100150951..100150962,-",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.096335,0.0,0.0,0.0,0.0,0.0,0.0,0.981522


In [95]:
sample_cols = [x for x in tss_ts.columns if "Group_" in x]

In [96]:
tf_ts_map = {}
for tf in tf_cage_map:
    peaks = tf_cage_map[tf]
    tss_ts_sub = tss_ts[tss_ts["00Annotation"].isin(peaks)]
    if len(tss_ts_sub) > 0:
        tss_ts_sub_sum = tss_ts_sub[sample_cols].sum(axis=0)
        sub_array = np.zeros((1, len(sample_cols)))
        sub_array[0,:] = tss_ts_sub_sum
        sub_df = pd.DataFrame(data=sub_array)
        specificity = calculate_tissue_specificity(sub_df)
        tf_ts_map[tf] = specificity

In [97]:
tf_ts_map = pd.DataFrame.from_dict(tf_ts_map, orient="index").reset_index()
tf_ts_map.columns = ["tf", "tissue_sp_all"]
tf_ts_map.sort_values(by="tissue_sp_all").head()

Unnamed: 0,tf,tissue_sp_all
356,MLX,0.611056
277,SMARCC2,0.634786
444,FAM48A,0.648737
2,GTF2F1,0.65118
144,YY1,0.659442


## 5. calculate tissue-specificity based on 3 MPRA cell lines

In [98]:
K562_group = "Group_17"
HepG2_group = "Group_513"
HeLa_group = "Group_512"
sample_3_cols = [K562_group, HepG2_group, HeLa_group]
sample_3_cols

['Group_17', 'Group_513', 'Group_512']

In [99]:
tf_ts_map_3 = {}
for tf in tf_cage_map:
    peaks = tf_cage_map[tf]
    tss_ts_sub = tss_ts[tss_ts["00Annotation"].isin(peaks)]
    if len(tss_ts_sub) > 0:
        tss_ts_sub_sum = tss_ts_sub[sample_3_cols].sum(axis=0)
        sub_array = np.zeros((1, len(sample_3_cols)))
        sub_array[0,:] = tss_ts_sub_sum
        sub_df = pd.DataFrame(data=sub_array)
        specificity = calculate_tissue_specificity(sub_df)
        tf_ts_map_3[tf] = specificity

In [100]:
tf_ts_map_3 = pd.DataFrame.from_dict(tf_ts_map_3, orient="index").reset_index()
tf_ts_map_3.columns = ["tf", "tissue_sp_3"]
tf_ts_map_3.sort_values(by="tissue_sp_3").head()

Unnamed: 0,tf,tissue_sp_3
241,MYBL2,0.053008
106,POLR2A,0.056071
99,FOXK1,0.056893
460,RFX1,0.060397
435,TCF3,0.066899


In [101]:
tf_ts_map = tf_ts_map.merge(tf_ts_map_3, on="tf", how="left")
tf_ts_map.sample(5)

Unnamed: 0,tf,tissue_sp_all,tissue_sp_3
433,OLIG2,0.978271,0.666667
420,TCF7,0.982379,0.533603
169,HINFP,0.723512,0.110798
334,RARG,0.850302,0.557349
299,SAP30,0.8275,0.225986


## 6. calculate tissue sp based on 3 for all TSS and all enh

In [102]:
specificities = calculate_tissue_specificity(tss_ts[sample_3_cols])
tss_ts["tissue_sp_3"] = specificities
tss_ts.sample(5)

Unnamed: 0,00Annotation,short_description,Group_0,Group_1,Group_2,Group_3,Group_4,Group_5,Group_6,Group_7,...,Group_542,Group_543,Group_544,Group_545,Group_546,Group_547,Group_548,Group_549,tissue_sp_all,tissue_sp_3
131177,"chr3:16564505..16564510,-","p@chr3:16564505..16564510,-",0.0,0.0,1.164959,0.79995,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.987965,
197879,"chrX:152773830..152773842,+",p25@BGN,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.960933,
42629,"chr15:33023541..33023550,-",p10@CU686887,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.800056,0.0,0.0,0.986486,
2754,"chr10:135342102..135342115,+","p@chr10:135342102..135342115,+",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.996824,
9275,"chr11:107328527..107328569,-",p1@CWF19L2,5.429996,34.950933,207.36279,13.199168,43.181411,52.060966,11.041292,23.457431,...,48.529716,13.018983,14.414833,19.910267,28.059898,16.001111,32.801948,13.103336,0.932661,0.251215


In [103]:
specificities = calculate_tissue_specificity(enh_ts[sample_3_cols])
enh_ts["tissue_sp_3"] = specificities
enh_ts.sample(5)

Unnamed: 0,Id,Group_0,Group_1,Group_2,Group_3,Group_4,Group_5,Group_6,Group_7,Group_8,...,Group_542,Group_543,Group_544,Group_545,Group_546,Group_547,Group_548,Group_549,tissue_sp_all,tissue_sp_3
58208,chr8:10872577-10873059,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.110784,0.988151,
29379,chr19:40543471-40543758,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.103474,0.0,0.0,0.0,0.0,0.984271,0.666667
4258,chr1:185370192-185370524,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.098692,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.99298,
64046,chrX:6217986-6218124,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.306431,0.0,0.0,0.0,0.0,0.0,0.99501,
51428,chr6:27136362-27136689,0.0,0.108357,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.087482,0.110784,0.988938,0.666667


## 7. write files

In [104]:
tf_ts_map.to_csv("TF_tissue_specificities.from_CAGE.txt", sep="\t", index=False)
len(tf_ts_map)

465

In [105]:
tss_ts.to_csv("TSS.CAGE_grouped_exp.tissue_sp.txt", sep="\t", index=False)

In [106]:
enh_ts.to_csv("Enh.CAGE_grouped_exp.tissue_sp.txt", sep="\t", index=False)