In [1]:
# set up files associated with each genome coordinates
import metadensity as md
md.settings.from_config_file('/tscc/nfs/home/hsher/Metadensity/config/hg38-tscc2.ini')


# then import the modules
from metadensity.metadensity import *
from metadensity.plotd import *
import pandas as pd
import matplotlib.pyplot as plt

%matplotlib inline

# I have a precompiles list of ENCODE datas as a csv that loads in this dataloader
plt.style.use('seaborn-white')
from itertools import chain
from pathlib import Path

import glob

please set the right config according to genome coordinate
Using /home/hsher/gencode_coords/GRCh38.p13.genome.fa
Using HG38 by default
Using /tscc/nfs/home/hsher/gencode_coords/GRCh38.p13.genome.fa


Matplotlib created a temporary config/cache directory at /tmp/matplotlib-tantue9h because the default path (/home/jovyan/.cache/matplotlib) is not a writable directory; it is highly recommended to set the MPLCONFIGDIR environment variable to a writable directory, in particular to speed up the import of Matplotlib and to better support multiprocessing.


Using:  /tscc/nfs/home/hsher/gencode_coords/gencode.v33.transcript.gff3


In [2]:
candidates = [ 'NUDT21',  'GRB2', 'RNPS1','CPSF6', 'STAU2','MBNL2', 'TRNAU1AP', 'RBM22','EIF4B', 'ZC3HAV1', 'RBM5', 'ZMAT3'] #

In [3]:
#candidates = [ 'ZMAT3',  'RBM5'] #

In [4]:
pas_paths = '/tscc/nfs/home/pjaganna1/projects/apa_screen_yongsheng/apa_kd_data_graveley_redo/dif_apa/pas_seq/'

In [5]:
pas_candidate_paths = [i for i in glob.glob(os.path.join(pas_paths, '*.txt')) if i.split('/')[-1].split('_')[1] in candidates]

In [6]:
def return_class(df):
    if df['log(treatment.p/treatment.d)'] > df['log(Ctrl.p/Ctrl.d)']:
        return 'proximal'
    elif df['log(treatment.p/treatment.d)'] < df['log(Ctrl.p/Ctrl.d)']:
        return 'distal'
    
totals = []
all_table_info = pd.DataFrame()
for rbp_table in pas_candidate_paths:
    kd = rbp_table.split('/')[-1].split('_')[1]
    test = pd.read_csv(rbp_table, sep='\t')
    test['treatment.p + 1'] = test['treatment.p'] + 1
    test['treatment.d + 1'] = test['treatment.d'] + 1
    test['Ctrl.p + 1'] = test['Ctrl.p'] + 1
    test['Ctrl.d + 1'] = test['Ctrl.d'] + 1
    test['log(treatment.p/treatment.d)'] = test.apply(lambda x: np.log2(x['treatment.p + 1']/x['treatment.d + 1']), axis=1)
    test['log(Ctrl.p/Ctrl.d)'] = test.apply(lambda x: np.log2(x['Ctrl.p + 1']/x['Ctrl.d + 1']), axis=1)
    test['apa_class'] = test.apply(return_class, axis=1)
    test['apa_type'] = test.apply(lambda x: f"{x['TYPE.p']}_{x['TYPE.d']}", axis=1)
    test['KD'] = kd
    test = test[(test['TYPE.p'].isin(['Intron', 'LastExon'])) & (test['TYPE.d'].isin(['Intron', 'LastExon']))]
    #hg38_coord = pd.read_csv(f'/tscc/nfs/home/pjaganna1/projects/apa_screen_yongsheng/apa_kd_all/pas-seq/hg38/{kd}_pas_hg38.bed', sep='\t', header=None)
    final_table = test[(test['KD'] ==kd)][['SYMBOL', 'COORD.p', 'COORDS.d', 'KD', 'apa_class']]
    all_table_info = pd.concat([all_table_info, final_table])

In [7]:
all_table_info.head()

Unnamed: 0,SYMBOL,COORD.p,COORDS.d,KD,apa_class
0,ADD2,chr2:70675091:-,chr2:70656790:-,RBM22,distal
2,AL138752.2,chr9:37774496:-,chr9:37588413:-,RBM22,distal
3,ASAH1,chr8:18066763:-,chr8:18056452:-,RBM22,proximal
4,ASXL1,chr20:32437359:+,chr20:32439320:+,RBM22,distal
5,CCDC71,chr3:49165667:-,chr3:49162535:-,RBM22,distal


In [8]:
all_table_info.KD.unique()

array(['RBM22', 'RBM5', 'MBNL2', 'STAU2', 'ZC3HAV1', 'TRNAU1AP', 'EIF4B',
       'ZMAT3', 'GRB2', 'NUDT21', 'RNPS1', 'CPSF6'], dtype=object)

In [9]:
all_table_info_dist = all_table_info[all_table_info['apa_class']=='distal']
all_table_info_prox = all_table_info[all_table_info['apa_class']=='proximal']

```following tutorial: https://metadensity.readthedocs.io/en/latest/1_Example_using_skipper_outputs.html```

In [10]:

    
def get_file_path(prefix, skipper_output_dir, ip_suffix = '_IP_', in_suffix = '_IN_'):
        return pd.Series({'uid':prefix,
        'RBP': prefix,
        'bam_0': str(skipper_output_dir/'bams'/'dedup'/'genome'/f'{prefix}{ip_suffix}1.genome.Aligned.sort.dedup.bam'),
        'bam_control_0':str(skipper_output_dir/'bams'/'dedup'/'genome'/f'{prefix}{in_suffix}1.genome.Aligned.sort.dedup.bam'),
        'minus_0': str(skipper_output_dir / 'bigwigs' / 'scaled/minus' / f'{prefix}{ip_suffix}1.scaled.cov.minus.bw'),
        'minus_control_0': str(skipper_output_dir / 'bigwigs' / 'scaled/minus' / f'{prefix}{in_suffix}1.scaled.cov.minus.bw'),
        'plus_0': str(skipper_output_dir / 'bigwigs' / 'scaled/plus' / f'{prefix}{ip_suffix}1.scaled.cov.plus.bw'),
        'plus_control_0': str(skipper_output_dir / 'bigwigs' / 'scaled/plus' / f'{prefix}{in_suffix}1.scaled.cov.plus.bw'),
        'bed_0': transcript.fn,
        'bed_1': transcript.fn,
        'bam_1': str(skipper_output_dir/'bams'/'dedup'/'genome'/f'{prefix}{ip_suffix}2.genome.Aligned.sort.dedup.bam'),
        'bam_control_1':str(skipper_output_dir/'bams'/'dedup'/'genome'/f'{prefix}{in_suffix}2.genome.Aligned.sort.dedup.bam'),
        'minus_1': str(skipper_output_dir / 'bigwigs' / 'scaled/minus' / f'{prefix}{ip_suffix}2.scaled.cov.minus.bw'),
        'minus_control_1': str(skipper_output_dir / 'bigwigs' / 'scaled/minus' / f'{prefix}{in_suffix}2.scaled.cov.minus.bw'),
        'plus_1': str(skipper_output_dir / 'bigwigs' / 'scaled/plus' / f'{prefix}{ip_suffix}2.scaled.cov.plus.bw'),
        'plus_control_1': str(skipper_output_dir / 'bigwigs' / 'scaled/plus' / f'{prefix}{in_suffix}2.scaled.cov.plus.bw'),
        }
        )

def find_gene_ids_with_reproducible_windows(prefix, skipper_output_dir):
        window_path = skipper_output_dir / 'reproducible_enriched_windows' / f'{prefix}.reproducible_enriched_windows.tsv.gz'
        window = pd.read_csv(window_path, sep = '\t')
        #window = window[(window['enrichment_l2or_mean'] >=3) & (window['p_min'] < 0.05) & ((window['feature_types'].str.contains('UTR3'))| (window['feature_types'].str.contains('INTRON')))].copy()

        # find genes with reproducible windows. Some windows contain >1 gene id. `chain` flattens the list of list
        # chain https://www.geeksforgeeks.org/python-itertools-chain/
        gene_with_windows = list(set(chain(*window['gene_id'].apply(lambda idlist: idlist.split(':')).tolist())))

        # skipper is run with gencode v38 which is different from metadensity's version. need some conversion
        # remove version
        geneid_no_version = [g.split('.')[0] for g in gene_with_windows] # this is list comprehension. lazy and short for loops
        # map to Metadensity's version
        #metadensity_gene_ids = [t.attrs['gene_id'] for t in transcript if t.attrs['gene_id'].split('.')[0] in geneid_no_version]
        metadensity_gene_ids = [t.attrs['gene_name'] for t in transcript if t.attrs['gene_id'].split('.')[0] in geneid_no_version]
        return metadensity_gene_ids
    
    
def extract_polyA_signal_coordinates(subset_polyadf, coord_col):
    ''' subtracting the coordinate of polyA signal from polya dataframe'''
    # extract polya signals
    signal_coord = []
    if coord_col == 'COORD.p':
        for index, row in subset_polyadf.iterrows():
            ch, start,strand =  row['COORD.p'].split(':')[0], row['COORD.p'].split(':')[1], row['COORD.p'].split(':')[2]
            signal_coord.append([ch,int(start),int(start) + 1,
                                 row['COORD.p'], '.', strand])
    elif coord_col == 'COORDS.d':
        for index, row in subset_polyadf.iterrows():
            ch_d, start_d,strand_d =  row['COORDS.d'].split(':')[0], row['COORDS.d'].split(':')[1], row['COORDS.d'].split(':')[2]
            signal_coord.append([ch_d,int(start_d),int(start_d) + 1,
                                 row['COORDS.d'], '.', strand_d])
            

    # make into bed
    polysignal_df = pd.DataFrame(signal_coord,
                                columns= ['chrom', 'start', 'end', 'name', 'score', 'strand']).drop_duplicates()
    polyasignal_bed = BedTool.from_dataframe(polysignal_df)

    return polyasignal_bed
def polyAtype_specific_coords(pas_table, gene):
    ''' create polyA related feature for specific types of polyA
    polyAtype can be TE, EX, IN, DS..'''
    prox = pas_table[(pas_table['KD']==gene)]
    dist = pas_table[(pas_table['KD']==gene)]

    # create bed of polyA sites
    prox_bed = extract_polyA_signal_coordinates(prox, 'COORD.p')
    dist_bed = extract_polyA_signal_coordinates(dist, 'COORDS.d')

    return prox_bed, dist_bed

def segment_transcript_name(s):
    return s.split(';')[0].split('=')[1]

def segment_gene_name(s):
    return s.split(';')[5].split('=')[1]

def genes_of_interest(gene,pas_table,transcript_type = 'protein_coding',  sample_no=200 ): # the pas table is the filtered all_table prox or dist
    intersection_set = set(geneids).intersection(set(pas_table[(pas_table['KD']==gene) ]['SYMBOL']))
    print('Intersection Set:', len(intersection_set))
    #tids = [t.attrs['transcript_id'] for t in transcript if t.attrs['transcript_type'] == transcript_type and t.attrs['gene_name'] in set(pas_table[(pas_table['KD']==gene) ]['SYMBOL'])] # hungtingtin
    tids = [t.attrs['transcript_id'] for t in transcript if t.attrs['gene_name'] in intersection_set]
    
    print(len(tids))
    cds_metagenes = Build_many_metagene(tids, sample_no = sample_no)

    return cds_metagenes

# add feature to those metagene
def add_feature_to_metagene(kd_exp_meta, length=200):
    for transcript_id in kd_exp_meta.keys():
        metagene = kd_exp_meta[transcript_id]

        # add feature
        sub = prox_site_df.loc[prox_site_df['transcript_id']==transcript_id]
        for index, row in sub.iterrows():
            metagene.create_feature(interval = row['start'], feature_name = 'prox',
                                    length = length
                                   ) # POINT FEATURE

        sub = dist_site_df.loc[dist_site_df['transcript_id']==transcript_id]
        for index, row in sub.iterrows():
            metagene.create_feature(interval = row['start'], feature_name = 'dist',
                                   length = length)
            
def remove_features_noPA(kd_exp_meta):
    for transcript_id in list(kd_exp_meta.keys()).copy():
        metagene = kd_exp_meta[transcript_id]

        if 'prox' not in metagene.featnames or 'dist' not in metagene.featnames:
            del kd_exp_meta[transcript_id]
            
def build_idr_metadensity(eCLIP,v, metagene, additional_name_info ):
    ''' build metadensity object given eCLIP data and metagene that is build with polyA sites'''
    m = Metadensity(eCLIP, f'{eCLIP.name} ({additional_name_info})', metagenes=kd_exp_meta, background_method = 'relative information', normalize = False)

    m.featnames = ['prox','dist'] # to restrict only computing for these 2 features
    m.get_density_array()
    m.save_deepdish(os.path.join(deepdish_out, '{}_{}_sample200_length200.h5'.format(eCLIP.name, additional_name_info)))
    return m


In [11]:
pas_table_dict = {'DISTAL':all_table_info_dist, 'PROXIMAL':all_table_info_prox}

In [12]:
deepdish_out = '/tscc/nfs/home/pjaganna1/projects/apa_screen_yongsheng/eclip/metadendity/h5_files_prox_dist_PAS'

In [20]:
 all_meta_obj[g]

[<metadensity.metadensity.Metadensity at 0x1553f28855d0>,
 <metadensity.metadensity.Metadensity at 0x1553f2847810>]

In [13]:
reproducible_genes_df_prox = {}
reproducible_genes_df_dist = {}
all_meta_obj = {}

for g in candidates:#['RBM22', 'RBM10', 'EIF4B', 'GRB2', 'CPSF6', 'CPSF5']:#'GRB2', 'RNPS1', 'RBM22', 'MBNL2', 'CPSF6', 
        all_meta_obj[g] = []
        print(g)
        #all_meta = []
        for k in pas_table_dict:    

            

            if g =='NUDT21':
                skipper_output_dir = Path(f'/tscc/nfs/home/pjaganna1/projects/apa_screen_yongsheng/eclip/skipper_runs/CPSF5/output/')
                e_1 = eCLIP.from_series(get_file_path('CPSF5', skipper_output_dir))
                geneids = find_gene_ids_with_reproducible_windows('CPSF5', skipper_output_dir)
            else:
                skipper_output_dir = Path(f'/tscc/nfs/home/pjaganna1/projects/apa_screen_yongsheng/eclip/skipper_runs/{g}/output/')
                e_1 = eCLIP.from_series(get_file_path(g, skipper_output_dir))
                geneids = find_gene_ids_with_reproducible_windows(g, skipper_output_dir)
            prox_coord, dist_coord = polyAtype_specific_coords(pas_table_dict[k], g) # PAS TABLE HERE IS ALL TABLE, PROX OR DIST

            # determine which transcript these are in
            header = ['chrom', 'start', 'end', 'name', 'score', 'strand', 'attr']
            prox_site_df = prox_coord.intersect(transcript,s = True, wb = True).to_dataframe(header = None)[[0,1,2,3,4,5,14]]
            dist_site_df = dist_coord.intersect(transcript, s = True, wb = True).to_dataframe(header = None)[[0,1,2,3,4,5,14]]
            prox_site_df.columns = header
            dist_site_df.columns = header

            prox_site_df['transcript_id']= prox_site_df['attr'].map(segment_transcript_name)
            dist_site_df['transcript_id']= dist_site_df['attr'].map(segment_transcript_name)
            prox_site_df['gene_name']= prox_site_df['attr'].map(segment_gene_name)
            dist_site_df['gene_name']= dist_site_df['attr'].map(segment_gene_name)
            kd_exp_meta= genes_of_interest(g, pas_table_dict[k])
            add_feature_to_metagene(kd_exp_meta)
            remove_features_noPA(kd_exp_meta)
            all_meta_obj[g].append(build_idr_metadensity(e_1, g, kd_exp_meta, k))

NUDT21


['chrom', 'start', 'end', 'name', 'score', 'strand', 'thickStart', 'thickEnd', 'itemRgb', 'blockCount', 'blockSizes', 'blockStarts']
but file has 15 fields; you can supply custom names with the `names` kwarg
  "`names` kwarg" % (self.file_type, _names, self.field_count())
['chrom', 'start', 'end', 'name', 'score', 'strand', 'thickStart', 'thickEnd', 'itemRgb', 'blockCount', 'blockSizes', 'blockStarts']
but file has 15 fields; you can supply custom names with the `names` kwarg
  "`names` kwarg" % (self.file_type, _names, self.field_count())


Intersection Set: 146
147
Using: /tscc/nfs/home/hsher/projects/Metadensity/metadensity/data/hg38/gencode
Done building metagene


  elif _pandas and isinstance(level, (pd.DataFrame, pd.Series, pd.Panel)):
['chrom', 'start', 'end', 'name', 'score', 'strand', 'thickStart', 'thickEnd', 'itemRgb', 'blockCount', 'blockSizes', 'blockStarts']
but file has 15 fields; you can supply custom names with the `names` kwarg
  "`names` kwarg" % (self.file_type, _names, self.field_count())
['chrom', 'start', 'end', 'name', 'score', 'strand', 'thickStart', 'thickEnd', 'itemRgb', 'blockCount', 'blockSizes', 'blockStarts']
but file has 15 fields; you can supply custom names with the `names` kwarg
  "`names` kwarg" % (self.file_type, _names, self.field_count())


Intersection Set: 1623
1624
Using: /tscc/nfs/home/hsher/projects/Metadensity/metadensity/data/hg38/gencode
Done building metagene


  elif _pandas and isinstance(level, (pd.DataFrame, pd.Series, pd.Panel)):


GRB2


['chrom', 'start', 'end', 'name', 'score', 'strand', 'thickStart', 'thickEnd', 'itemRgb', 'blockCount', 'blockSizes', 'blockStarts']
but file has 15 fields; you can supply custom names with the `names` kwarg
  "`names` kwarg" % (self.file_type, _names, self.field_count())
['chrom', 'start', 'end', 'name', 'score', 'strand', 'thickStart', 'thickEnd', 'itemRgb', 'blockCount', 'blockSizes', 'blockStarts']
but file has 15 fields; you can supply custom names with the `names` kwarg
  "`names` kwarg" % (self.file_type, _names, self.field_count())


Intersection Set: 79
79
Using: /tscc/nfs/home/hsher/projects/Metadensity/metadensity/data/hg38/gencode
Done building metagene


  elif _pandas and isinstance(level, (pd.DataFrame, pd.Series, pd.Panel)):
['chrom', 'start', 'end', 'name', 'score', 'strand', 'thickStart', 'thickEnd', 'itemRgb', 'blockCount', 'blockSizes', 'blockStarts']
but file has 15 fields; you can supply custom names with the `names` kwarg
  "`names` kwarg" % (self.file_type, _names, self.field_count())
['chrom', 'start', 'end', 'name', 'score', 'strand', 'thickStart', 'thickEnd', 'itemRgb', 'blockCount', 'blockSizes', 'blockStarts']
but file has 15 fields; you can supply custom names with the `names` kwarg
  "`names` kwarg" % (self.file_type, _names, self.field_count())


Intersection Set: 48
49
Using: /tscc/nfs/home/hsher/projects/Metadensity/metadensity/data/hg38/gencode
Done building metagene


  elif _pandas and isinstance(level, (pd.DataFrame, pd.Series, pd.Panel)):


RNPS1


['chrom', 'start', 'end', 'name', 'score', 'strand', 'thickStart', 'thickEnd', 'itemRgb', 'blockCount', 'blockSizes', 'blockStarts']
but file has 15 fields; you can supply custom names with the `names` kwarg
  "`names` kwarg" % (self.file_type, _names, self.field_count())
['chrom', 'start', 'end', 'name', 'score', 'strand', 'thickStart', 'thickEnd', 'itemRgb', 'blockCount', 'blockSizes', 'blockStarts']
but file has 15 fields; you can supply custom names with the `names` kwarg
  "`names` kwarg" % (self.file_type, _names, self.field_count())


Intersection Set: 10
10
Using: /tscc/nfs/home/hsher/projects/Metadensity/metadensity/data/hg38/gencode
Done building metagene


  elif _pandas and isinstance(level, (pd.DataFrame, pd.Series, pd.Panel)):
['chrom', 'start', 'end', 'name', 'score', 'strand', 'thickStart', 'thickEnd', 'itemRgb', 'blockCount', 'blockSizes', 'blockStarts']
but file has 15 fields; you can supply custom names with the `names` kwarg
  "`names` kwarg" % (self.file_type, _names, self.field_count())
['chrom', 'start', 'end', 'name', 'score', 'strand', 'thickStart', 'thickEnd', 'itemRgb', 'blockCount', 'blockSizes', 'blockStarts']
but file has 15 fields; you can supply custom names with the `names` kwarg
  "`names` kwarg" % (self.file_type, _names, self.field_count())


Intersection Set: 13
13
Using: /tscc/nfs/home/hsher/projects/Metadensity/metadensity/data/hg38/gencode
Done building metagene


  elif _pandas and isinstance(level, (pd.DataFrame, pd.Series, pd.Panel)):


CPSF6


['chrom', 'start', 'end', 'name', 'score', 'strand', 'thickStart', 'thickEnd', 'itemRgb', 'blockCount', 'blockSizes', 'blockStarts']
but file has 15 fields; you can supply custom names with the `names` kwarg
  "`names` kwarg" % (self.file_type, _names, self.field_count())
['chrom', 'start', 'end', 'name', 'score', 'strand', 'thickStart', 'thickEnd', 'itemRgb', 'blockCount', 'blockSizes', 'blockStarts']
but file has 15 fields; you can supply custom names with the `names` kwarg
  "`names` kwarg" % (self.file_type, _names, self.field_count())


Intersection Set: 47
47
Using: /tscc/nfs/home/hsher/projects/Metadensity/metadensity/data/hg38/gencode
Done building metagene


  elif _pandas and isinstance(level, (pd.DataFrame, pd.Series, pd.Panel)):
['chrom', 'start', 'end', 'name', 'score', 'strand', 'thickStart', 'thickEnd', 'itemRgb', 'blockCount', 'blockSizes', 'blockStarts']
but file has 15 fields; you can supply custom names with the `names` kwarg
  "`names` kwarg" % (self.file_type, _names, self.field_count())
['chrom', 'start', 'end', 'name', 'score', 'strand', 'thickStart', 'thickEnd', 'itemRgb', 'blockCount', 'blockSizes', 'blockStarts']
but file has 15 fields; you can supply custom names with the `names` kwarg
  "`names` kwarg" % (self.file_type, _names, self.field_count())


Intersection Set: 583
583
Using: /tscc/nfs/home/hsher/projects/Metadensity/metadensity/data/hg38/gencode
Done building metagene


  elif _pandas and isinstance(level, (pd.DataFrame, pd.Series, pd.Panel)):


STAU2


['chrom', 'start', 'end', 'name', 'score', 'strand', 'thickStart', 'thickEnd', 'itemRgb', 'blockCount', 'blockSizes', 'blockStarts']
but file has 15 fields; you can supply custom names with the `names` kwarg
  "`names` kwarg" % (self.file_type, _names, self.field_count())
['chrom', 'start', 'end', 'name', 'score', 'strand', 'thickStart', 'thickEnd', 'itemRgb', 'blockCount', 'blockSizes', 'blockStarts']
but file has 15 fields; you can supply custom names with the `names` kwarg
  "`names` kwarg" % (self.file_type, _names, self.field_count())


Intersection Set: 10
10
Using: /tscc/nfs/home/hsher/projects/Metadensity/metadensity/data/hg38/gencode
Done building metagene


  elif _pandas and isinstance(level, (pd.DataFrame, pd.Series, pd.Panel)):
['chrom', 'start', 'end', 'name', 'score', 'strand', 'thickStart', 'thickEnd', 'itemRgb', 'blockCount', 'blockSizes', 'blockStarts']
but file has 15 fields; you can supply custom names with the `names` kwarg
  "`names` kwarg" % (self.file_type, _names, self.field_count())
['chrom', 'start', 'end', 'name', 'score', 'strand', 'thickStart', 'thickEnd', 'itemRgb', 'blockCount', 'blockSizes', 'blockStarts']
but file has 15 fields; you can supply custom names with the `names` kwarg
  "`names` kwarg" % (self.file_type, _names, self.field_count())


Intersection Set: 7
7
Using: /tscc/nfs/home/hsher/projects/Metadensity/metadensity/data/hg38/gencode
Done building metagene


  elif _pandas and isinstance(level, (pd.DataFrame, pd.Series, pd.Panel)):


MBNL2


['chrom', 'start', 'end', 'name', 'score', 'strand', 'thickStart', 'thickEnd', 'itemRgb', 'blockCount', 'blockSizes', 'blockStarts']
but file has 15 fields; you can supply custom names with the `names` kwarg
  "`names` kwarg" % (self.file_type, _names, self.field_count())
['chrom', 'start', 'end', 'name', 'score', 'strand', 'thickStart', 'thickEnd', 'itemRgb', 'blockCount', 'blockSizes', 'blockStarts']
but file has 15 fields; you can supply custom names with the `names` kwarg
  "`names` kwarg" % (self.file_type, _names, self.field_count())


Intersection Set: 487
487
Using: /tscc/nfs/home/hsher/projects/Metadensity/metadensity/data/hg38/gencode
Done building metagene


  elif _pandas and isinstance(level, (pd.DataFrame, pd.Series, pd.Panel)):
['chrom', 'start', 'end', 'name', 'score', 'strand', 'thickStart', 'thickEnd', 'itemRgb', 'blockCount', 'blockSizes', 'blockStarts']
but file has 15 fields; you can supply custom names with the `names` kwarg
  "`names` kwarg" % (self.file_type, _names, self.field_count())
['chrom', 'start', 'end', 'name', 'score', 'strand', 'thickStart', 'thickEnd', 'itemRgb', 'blockCount', 'blockSizes', 'blockStarts']
but file has 15 fields; you can supply custom names with the `names` kwarg
  "`names` kwarg" % (self.file_type, _names, self.field_count())


Intersection Set: 140
141
Using: /tscc/nfs/home/hsher/projects/Metadensity/metadensity/data/hg38/gencode
Done building metagene


  elif _pandas and isinstance(level, (pd.DataFrame, pd.Series, pd.Panel)):


TRNAU1AP


['chrom', 'start', 'end', 'name', 'score', 'strand', 'thickStart', 'thickEnd', 'itemRgb', 'blockCount', 'blockSizes', 'blockStarts']
but file has 15 fields; you can supply custom names with the `names` kwarg
  "`names` kwarg" % (self.file_type, _names, self.field_count())
['chrom', 'start', 'end', 'name', 'score', 'strand', 'thickStart', 'thickEnd', 'itemRgb', 'blockCount', 'blockSizes', 'blockStarts']
but file has 15 fields; you can supply custom names with the `names` kwarg
  "`names` kwarg" % (self.file_type, _names, self.field_count())


Intersection Set: 81
81
Using: /tscc/nfs/home/hsher/projects/Metadensity/metadensity/data/hg38/gencode
Done building metagene


  elif _pandas and isinstance(level, (pd.DataFrame, pd.Series, pd.Panel)):
['chrom', 'start', 'end', 'name', 'score', 'strand', 'thickStart', 'thickEnd', 'itemRgb', 'blockCount', 'blockSizes', 'blockStarts']
but file has 15 fields; you can supply custom names with the `names` kwarg
  "`names` kwarg" % (self.file_type, _names, self.field_count())
['chrom', 'start', 'end', 'name', 'score', 'strand', 'thickStart', 'thickEnd', 'itemRgb', 'blockCount', 'blockSizes', 'blockStarts']
but file has 15 fields; you can supply custom names with the `names` kwarg
  "`names` kwarg" % (self.file_type, _names, self.field_count())


Intersection Set: 52
53
Using: /tscc/nfs/home/hsher/projects/Metadensity/metadensity/data/hg38/gencode
Done building metagene


  elif _pandas and isinstance(level, (pd.DataFrame, pd.Series, pd.Panel)):


RBM22


['chrom', 'start', 'end', 'name', 'score', 'strand', 'thickStart', 'thickEnd', 'itemRgb', 'blockCount', 'blockSizes', 'blockStarts']
but file has 15 fields; you can supply custom names with the `names` kwarg
  "`names` kwarg" % (self.file_type, _names, self.field_count())
['chrom', 'start', 'end', 'name', 'score', 'strand', 'thickStart', 'thickEnd', 'itemRgb', 'blockCount', 'blockSizes', 'blockStarts']
but file has 15 fields; you can supply custom names with the `names` kwarg
  "`names` kwarg" % (self.file_type, _names, self.field_count())


Intersection Set: 6
6
Using: /tscc/nfs/home/hsher/projects/Metadensity/metadensity/data/hg38/gencode
Done building metagene


  elif _pandas and isinstance(level, (pd.DataFrame, pd.Series, pd.Panel)):
['chrom', 'start', 'end', 'name', 'score', 'strand', 'thickStart', 'thickEnd', 'itemRgb', 'blockCount', 'blockSizes', 'blockStarts']
but file has 15 fields; you can supply custom names with the `names` kwarg
  "`names` kwarg" % (self.file_type, _names, self.field_count())
['chrom', 'start', 'end', 'name', 'score', 'strand', 'thickStart', 'thickEnd', 'itemRgb', 'blockCount', 'blockSizes', 'blockStarts']
but file has 15 fields; you can supply custom names with the `names` kwarg
  "`names` kwarg" % (self.file_type, _names, self.field_count())


Intersection Set: 2
2
Using: /tscc/nfs/home/hsher/projects/Metadensity/metadensity/data/hg38/gencode
Done building metagene
EIF4B


  elif _pandas and isinstance(level, (pd.DataFrame, pd.Series, pd.Panel)):
['chrom', 'start', 'end', 'name', 'score', 'strand', 'thickStart', 'thickEnd', 'itemRgb', 'blockCount', 'blockSizes', 'blockStarts']
but file has 15 fields; you can supply custom names with the `names` kwarg
  "`names` kwarg" % (self.file_type, _names, self.field_count())
['chrom', 'start', 'end', 'name', 'score', 'strand', 'thickStart', 'thickEnd', 'itemRgb', 'blockCount', 'blockSizes', 'blockStarts']
but file has 15 fields; you can supply custom names with the `names` kwarg
  "`names` kwarg" % (self.file_type, _names, self.field_count())


Intersection Set: 9
9
Using: /tscc/nfs/home/hsher/projects/Metadensity/metadensity/data/hg38/gencode
Done building metagene


  elif _pandas and isinstance(level, (pd.DataFrame, pd.Series, pd.Panel)):
['chrom', 'start', 'end', 'name', 'score', 'strand', 'thickStart', 'thickEnd', 'itemRgb', 'blockCount', 'blockSizes', 'blockStarts']
but file has 15 fields; you can supply custom names with the `names` kwarg
  "`names` kwarg" % (self.file_type, _names, self.field_count())
['chrom', 'start', 'end', 'name', 'score', 'strand', 'thickStart', 'thickEnd', 'itemRgb', 'blockCount', 'blockSizes', 'blockStarts']
but file has 15 fields; you can supply custom names with the `names` kwarg
  "`names` kwarg" % (self.file_type, _names, self.field_count())


Intersection Set: 27
27
Using: /tscc/nfs/home/hsher/projects/Metadensity/metadensity/data/hg38/gencode
Done building metagene


  elif _pandas and isinstance(level, (pd.DataFrame, pd.Series, pd.Panel)):


ZC3HAV1


['chrom', 'start', 'end', 'name', 'score', 'strand', 'thickStart', 'thickEnd', 'itemRgb', 'blockCount', 'blockSizes', 'blockStarts']
but file has 15 fields; you can supply custom names with the `names` kwarg
  "`names` kwarg" % (self.file_type, _names, self.field_count())
['chrom', 'start', 'end', 'name', 'score', 'strand', 'thickStart', 'thickEnd', 'itemRgb', 'blockCount', 'blockSizes', 'blockStarts']
but file has 15 fields; you can supply custom names with the `names` kwarg
  "`names` kwarg" % (self.file_type, _names, self.field_count())


Intersection Set: 264
264
Using: /tscc/nfs/home/hsher/projects/Metadensity/metadensity/data/hg38/gencode
Done building metagene


  elif _pandas and isinstance(level, (pd.DataFrame, pd.Series, pd.Panel)):
['chrom', 'start', 'end', 'name', 'score', 'strand', 'thickStart', 'thickEnd', 'itemRgb', 'blockCount', 'blockSizes', 'blockStarts']
but file has 15 fields; you can supply custom names with the `names` kwarg
  "`names` kwarg" % (self.file_type, _names, self.field_count())
['chrom', 'start', 'end', 'name', 'score', 'strand', 'thickStart', 'thickEnd', 'itemRgb', 'blockCount', 'blockSizes', 'blockStarts']
but file has 15 fields; you can supply custom names with the `names` kwarg
  "`names` kwarg" % (self.file_type, _names, self.field_count())


Intersection Set: 129
130
Using: /tscc/nfs/home/hsher/projects/Metadensity/metadensity/data/hg38/gencode
Done building metagene
need at least one array to concatenate
RBM5


  elif _pandas and isinstance(level, (pd.DataFrame, pd.Series, pd.Panel)):
['chrom', 'start', 'end', 'name', 'score', 'strand', 'thickStart', 'thickEnd', 'itemRgb', 'blockCount', 'blockSizes', 'blockStarts']
but file has 15 fields; you can supply custom names with the `names` kwarg
  "`names` kwarg" % (self.file_type, _names, self.field_count())
['chrom', 'start', 'end', 'name', 'score', 'strand', 'thickStart', 'thickEnd', 'itemRgb', 'blockCount', 'blockSizes', 'blockStarts']
but file has 15 fields; you can supply custom names with the `names` kwarg
  "`names` kwarg" % (self.file_type, _names, self.field_count())


Intersection Set: 6
6
Using: /tscc/nfs/home/hsher/projects/Metadensity/metadensity/data/hg38/gencode
Done building metagene


  elif _pandas and isinstance(level, (pd.DataFrame, pd.Series, pd.Panel)):
['chrom', 'start', 'end', 'name', 'score', 'strand', 'thickStart', 'thickEnd', 'itemRgb', 'blockCount', 'blockSizes', 'blockStarts']
but file has 15 fields; you can supply custom names with the `names` kwarg
  "`names` kwarg" % (self.file_type, _names, self.field_count())
['chrom', 'start', 'end', 'name', 'score', 'strand', 'thickStart', 'thickEnd', 'itemRgb', 'blockCount', 'blockSizes', 'blockStarts']
but file has 15 fields; you can supply custom names with the `names` kwarg
  "`names` kwarg" % (self.file_type, _names, self.field_count())


Intersection Set: 231
231
Using: /tscc/nfs/home/hsher/projects/Metadensity/metadensity/data/hg38/gencode
Done building metagene


  elif _pandas and isinstance(level, (pd.DataFrame, pd.Series, pd.Panel)):


ZMAT3


['chrom', 'start', 'end', 'name', 'score', 'strand', 'thickStart', 'thickEnd', 'itemRgb', 'blockCount', 'blockSizes', 'blockStarts']
but file has 15 fields; you can supply custom names with the `names` kwarg
  "`names` kwarg" % (self.file_type, _names, self.field_count())
['chrom', 'start', 'end', 'name', 'score', 'strand', 'thickStart', 'thickEnd', 'itemRgb', 'blockCount', 'blockSizes', 'blockStarts']
but file has 15 fields; you can supply custom names with the `names` kwarg
  "`names` kwarg" % (self.file_type, _names, self.field_count())


Intersection Set: 11
12
Using: /tscc/nfs/home/hsher/projects/Metadensity/metadensity/data/hg38/gencode
Done building metagene


  elif _pandas and isinstance(level, (pd.DataFrame, pd.Series, pd.Panel)):
['chrom', 'start', 'end', 'name', 'score', 'strand', 'thickStart', 'thickEnd', 'itemRgb', 'blockCount', 'blockSizes', 'blockStarts']
but file has 15 fields; you can supply custom names with the `names` kwarg
  "`names` kwarg" % (self.file_type, _names, self.field_count())
['chrom', 'start', 'end', 'name', 'score', 'strand', 'thickStart', 'thickEnd', 'itemRgb', 'blockCount', 'blockSizes', 'blockStarts']
but file has 15 fields; you can supply custom names with the `names` kwarg
  "`names` kwarg" % (self.file_type, _names, self.field_count())


Intersection Set: 39
39
Using: /tscc/nfs/home/hsher/projects/Metadensity/metadensity/data/hg38/gencode
Done building metagene


  elif _pandas and isinstance(level, (pd.DataFrame, pd.Series, pd.Panel)):
