In [None]:
#!/usr/bin/env python
# coding: utf-8

# 20221129
# 
# sarahfong
# 
# ### intersect nullomers, empirical shuffle with phylop 100way bigWig
# 
# split by exonic/non-exonic
# 
# 
# use bigWigSummary executable from UCSC to get phylop 
# 
# 
# compare nullomers v. empirical background

# In[ ]:
—

In [1]:
import glob
from joblib import Parallel, delayed
import os
import pybedtools as pbt
import subprocess
import sys


import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from scipy import stats
import seaborn as sns
import statsmodels as sm

# append path
sys.path.append("/wynton/home/ahituv/fongsl/tools/py_/")

# import config reader
import config_readwrite as crw
import zippery



config_tag = "config-exon.ini"

# append path
sys.path.append("/wynton/home/ahituv/fongsl/tools/py_/")

# import config reader
import config_readwrite as crw
import count_lines as cl
import plot_params as pp

config_name = os.path.join(os.getcwd(), config_tag)

config, configname = crw.read_config(config_name)


# Analysis

In [2]:
DATASET = "PHYLOP"

DATA_PATH = config[DATASET]["PATH"]
RE_PATH = config["RESULTS"]["PATH"] 

ANNOT = config["GENCODE"]["ANNOT"] 

RE = os.path.join(RE_PATH, DATASET)

if os.path.exists(RE) is False:
    os.mkdir(RE)
    config["RESULTS"][DATASET] = RE 

## functions

In [3]:
def plot_lines(df, out_pdf,datatype, xrange):
    fig, ax = plt.subplots(figsize=(6,6))
    sns.lineplot(df, x="pos", y= '50%', hue = "label" )
    sns.lineplot(df, x="pos", y= '25%', hue = "label")
    sns.lineplot(df, x="pos", y= '75%', hue = "label")

    ax.axvline(499, color="grey", ls = "--") # center line
    
    ax.set(xlim = (xrange[0], xrange[1]),
           ylabel = "phylop score - 100way",
            title = f"{datatype}:{xrange[0]}-{xrange[1]}")
    
    ax.legend(loc="upper right")
    
    plt.savefig(out_pdf, bbox_inches="tight")
    plt.show()

    
def plot_line(y, df, out_pdf,datatype, xrange):
    fig, ax = plt.subplots(figsize=(6,6))
    sns.lineplot(df, x="pos", y=y, hue="label")
    
    ax.axvline(499, color="grey", ls = "--") # center line
    
    ax.set(xlim = (xrange[0], xrange[1]),
           ylabel = f"phylop score - 100way-{y}",
            title = f"{datatype}:{xrange[0]}-{xrange[1]}")
    
    ax.legend(loc="upper right")
    
    plt.savefig(out_pdf, bbox_inches="tight")
    plt.show()
    
    
def calculateEmpiricalP(obs, exp_sum_list):
    
    import datetime
    """
    return two lists
        (1) info - vector w/  
                n_obs, 
                median_exp, 
                std, 
                fold-change  # calculated from the median of expected shuffle 
                p_val
                
        (2) fold_changes- vector expected fold changes (to calculate confidence interval)
        
    input
        observed overlap count (int)
        list of expected overlap counts (list of ints)
    
    method
        1. get median of expected overlap counts
        2. get standard deviation of expected overlap counts
        3. center expected overlap counts at median
        4. Sum the number of centered expected counts greater than observed centered count
            This is two tailed because it evaluates both sides of the distribution (w/ abs value). 
        5. calculate fold change as observed/ median expected w/ pseudo count
        6. calculate fold change of each "obs"/ expected w/ pseudo count
        7. calculate the p-value as count of equal or more extreme values than observed value
        8. return list of empirical info + fold changes
        
        
    
    """
    #1
    mu = np.median(exp_sum_list)  # median of exp.dist
    
    #2
    sigma = np.std(exp_sum_list)  # std
    
    #3
    dist_from_mu = [exp - mu for exp in exp_sum_list] # center the distribution 
    
    #4
    p_sum = sum(1 for exp_dist in dist_from_mu if abs(exp_dist) >= abs(obs - mu)) # count values >= centered obs

    #5
    fold_change = (obs + 1.0) / (mu + 1.0) # fold change obs from median expected w pseudo count
    
    #6
    fold_changes = list((obs + 1.0) / (m + 1.0) for m in exp_sum_list) # fold change obs from /each exp w pseudo count
    
    #7
    p_val = (p_sum + 1.0) / (len(exp_sum_list) + 1.0)  # probability of observing obs-like value equal or more extreme in expected distribution
    
    #8
    info = [
            obs, 
            mu, 
            sigma, 
            fold_change, 
            p_val, 
            str(datetime.datetime.now())
            ]
    
    return info, fold_changes

def exp(nullo, shuf, summary_stat_name, pos):
    """
    return PER POSITION foldchange, empirical P between observed and expected (shuffled) data
    
    input
        nullo (df) - pandas dataframe of nullomer summary stats in long form (each row is one base in dist)
        shuf (df) - pandas dataframe of N shuffled summary stats in long form (each row is one base in dist)
        summary_stat_name (str) - summary_stat measurement name (this should be a column in the dataframes)
        pos (int) - relative position of the base to estimate fold change between. 
        
    method
        1. prepare position phylop vectors for obs, exp
        2. calculate empirical P
    
    
    """
    obs = nullo.loc[nullo["pos"]== pos, summary_stat_name].iloc[0]
    exp_list =  shuf.loc[shuf["pos"]== pos, summary_stat_name].to_list()
    
    info, fold_changes = calculateEmpiricalP(obs, exp_list)
    
    print(info)
    
    return info, fold_changes

In [4]:
def formatDf(datatype, query):
    """
    glob all query files for datatype and turn these data into a single dataframe
    split dataframe based on nullomer v shuffle
    
    input 
        datatype (str) - label for dataset (e.g. exon-overlpa, no exon-overlap)
        query (str) - fragment of str to glob files on. 
        
    method
        1. glob all files
        2. make a list to collect pandas dataframes
        3. assign LABEL - NULLOMER/SHUF  for each dataset. 
        4. assign ID - str of the file name
        5. open the file as a pandas dataframe, add label, id columns, append to collection list
        6. concatenate all dataframes together
        7. split dataframes on NULLOMER/SHUF label
        
    return 
        df (pd dataframe) - all data
        nullo (pd dataframe) - data for just nullomers
        shuf (pd dataframe) - data for just matched shuffles
        
    """
    #1
    file_list = glob.glob(os.path.join(DATA_PATH, query))
    print(len(file_list))
    
    #2
    df_list = []
    
    #3
    for f in file_list:
    
        # assign static label
        if "shuf" in f:
            LABEL = f"SHUF-{datatype}"
        else:
            LABEL = datatype
        #4
        ID = f.split("/")[-1]
        
        #5
        df = pd.read_csv(f, sep='\t')
        
        df["label"], df["id"] = LABEL, ID

        df_list.append(df)    
    #6
    df = pd.concat(df_list)
    
    #7
    nullo = df.loc[df["label"] == datatype]
    shuf = df.loc[df["label"] != datatype]

    return df, nullo, shuf

In [5]:
def center_stats(nullo, shuf):
    """
    print obs v. exp at center position stats w/ median values
    """
    
    # MIDPOINT - median summary stats 
    i, f = exp(nullo, shuf, "50%", 500)

    # MIDPOINT-100 - median summary stats 
    i, f = exp(nullo, shuf, "50%", 400)

    # MIDPOINT+100 - median summary stats 
    i, f = exp(nullo, shuf, "50%", 600)

# old code for summary stats

In [6]:
lines = []  # for collecting stats
for DATATYPE, QUERY in SETS:
    print(DATATYPE)
    df, nullo, shuf = formatDf(DATATYPE, QUERY)
    break
    ## plot each percentile + xranges 

    ys = ["25%", "50%", "75%"]
    ranges = [("1kb", [0,1000]), ("0.2kb", [400, 600]),  ("0.1kb", [450,550])]
    
    for y in ys:
        for name, xrange in ranges:
            out = os.path.join(RE, f"phylop-{name}_{ANNOT}_{DATATYPE}-{y}.pdf")
            plot_line(y, df, out, DATATYPE, xrange)

    ## plot counts of phylop values

    fig, ax = plt.subplots(figsize=(6,6))
    sns.lineplot(df, x="pos", y= 'count', hue = "label")
    ax.set(title = DATATYPE)
    out = os.path.join(RE, f"{DATATYPE}-count.pdf")
    plt.savefig(out)

    ## plot everything together

    for name, xrange in ranges:
        out = os.path.join(RE, f"phylop-{name}_{ANNOT}_{DATATYPE}.pdf")

        plot_lines(df, out, DATATYPE, xrange)
        
    ### empirical P for median phylop
    # MIDPOINT - median summary stats 

    center_stats(nullo, shuf)
    for pos in np.arange(0,1000):
        """
        collect obs v exp stats
        """
        i, f = exp(nullo, shuf, "50%", pos)

        i.extend([pos,f"{ANNOT}-{DATATYPE}", "50%\n"])
        i = [str(item) for item in i]
        line = "\t".join(i)
        lines.append(line)

NameError: name 'SETS' is not defined

# new code for raw data

In [7]:
# get summary stats for phylop scores

def q025(x):
    return x.quantile(0.025)

def q975(x):
    return x.quantile(0.975)

def getSummaryStats(file, value_col, matched_df):
    """
    return summary stats for each position in a bed element
    
    input
        file (str) - path to extracted bw vector file from above run
        value_col (str) - name of column to calculate values on. 
        matched_df(pd dataframe) - GC matched element ids to keep. 
        
    method
        1. make a pandas dataframe out of the dictionary
            
            col 0-3 = CHR, START, STOP, Nullomer ID
            col 4-1003 = each bp position score flanking nullomer
            
            col 503 = pre-nullomer locus.
            
            1.1 Formatting
                - drop_duplicates() 
                - dropna()
                
            1.2 make id col for matching gc
            1.3 filter for GC matching ids
            1.4 drop id col

    
        2. turn dataframe into long form data, where 1 column is the position, and another column is the value
            index on CHR, START, STOP, Nullomer ID
            
            - DROP NAN values. 
            
        3. per position, bootstrap confidence intervals. 
        
        4. compute summary stats per position across nullomer sequence for linsight scores.
            4.1 also correct for
        
        5. write summary stats to outfile
    
    return 
    
        summary stats outfile (str) - write summary stats as file, 
            ## so that stats can be done on empirical summary statistics. 
            
        
    """
    out = file.strip(".txt") + "-SUMMARY_STATS_GC.txt"
    zipped = file + ".gz" 

    if os.path.exists(out) is False:
        
        # if re-running stats
        if os.path.exists(zipped) is True:
            zippery.unzip_file(zipped)
            
        
        #1 make a pandas dataframe
        df = pd.read_csv(file, sep = '\t', header = None, low_memory = False).drop_duplicates().dropna() #1.1

        #1.2 
        df["id"] = df[0] + ":" + df[1].map(str) + '-' +df[2].map(str)
        
        #1.3 GC match filter
        if "shuf" in file:
            keep = df.loc[df["id"].isin(set(matched_df["id"]))].copy() # keep only GC content matched elements
        else:
            keep = df
            
        print(df.shape, keep.shape)
        
        #1.4 drop id col
        keep = keep.drop(columns =["id"])
        

        #2
        melted = pd.melt(keep, id_vars= keep.columns[:4], var_name="pos", value_name=value_col)

        #print(melted.shape, melted.head())

        melted[value_col] = melted[value_col].astype(float)  # change data type
        

        f = {value_col: [q025, q975]}
        ci = melted.groupby('pos').agg(f).reset_index()

        #4
        summary_stats = melted.groupby("pos")[value_col].describe().reset_index()
        summary_stats= summary_stats.join(ci)

        #print(list(summary_stats))
        

        summary_stats["pos"] = summary_stats["pos"] - 4 # correct bp position 
                            

        #5 
        summary_stats.to_csv(out, sep ='\t', index=False)

        zippery.rezip_file(file)
    
    return out

# MATCH ON GC

In [8]:
MATCHED_GC = "/wynton/home/ahituv/fongsl/nullomers/data/MATCHED_gc_frac_nbins-25.tsv"
matched = pd.read_csv(MATCHED_GC, sep='\t')

matched.head()

Unnamed: 0,id,gc_frac,gc_dinuc_frac,datatype,dist1bin
0,chr22:37658728-37659729,0.728272,0.197802,SHUF-EXON,0.0
1,chr8:123273764-123274765,0.72028,0.152847,SHUF-EXON,0.0
2,chr15:40282038-40283039,0.749251,0.201798,SHUF-EXON,0.0
3,chr16:67666023-67667024,0.729271,0.166833,SHUF-EXON,0.0
4,chr5:132825384-132826385,0.725275,0.172827,SHUF-EXON,0.0


In [None]:
SETS = [('EXON-OVERLAP', "*-exon_overlap*.txt.gz"), 
       ("EXON-NO-OVERLAP", "*-exon_no-overlap*.txt.gz")]

for datatype, query in SETS:

    #1
    file_list = glob.glob(os.path.join(DATA_PATH, query))
    print(len(file_list))


    value_col ="phylop"
    for f in file_list:
        out = getSummaryStats(f, value_col, matched)

501
(4255, 1005) (662, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-366-phylop.txt.gz already has .gz suffix -- unchanged


(4260, 1005) (606, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-370-phylop.txt.gz already has .gz suffix -- unchanged


(4248, 1005) (601, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-407-phylop.txt.gz already has .gz suffix -- unchanged


(4263, 1005) (567, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-419-phylop.txt.gz already has .gz suffix -- unchanged


(4254, 1005) (625, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-397-phylop.txt.gz already has .gz suffix -- unchanged


(4266, 1005) (641, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-400-phylop.txt.gz already has .gz suffix -- unchanged


(4252, 1005) (615, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-395-phylop.txt.gz already has .gz suffix -- unchanged


(4245, 1005) (589, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-426-phylop.txt.gz already has .gz suffix -- unchanged


(4250, 1005) (611, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-423-phylop.txt.gz already has .gz suffix -- unchanged


(4250, 1005) (658, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-387-phylop.txt.gz already has .gz suffix -- unchanged


(4255, 1005) (666, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-412-phylop.txt.gz already has .gz suffix -- unchanged


(4248, 1005) (639, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-411-phylop.txt.gz already has .gz suffix -- unchanged


(4255, 1005) (634, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-396-phylop.txt.gz already has .gz suffix -- unchanged


(4257, 1005) (601, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-414-phylop.txt.gz already has .gz suffix -- unchanged


(4257, 1005) (660, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-381-phylop.txt.gz already has .gz suffix -- unchanged


(4255, 1005) (646, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-398-phylop.txt.gz already has .gz suffix -- unchanged


(4263, 1005) (645, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-385-phylop.txt.gz already has .gz suffix -- unchanged


(4261, 1005) (650, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-391-phylop.txt.gz already has .gz suffix -- unchanged


(4263, 1005) (589, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-386-phylop.txt.gz already has .gz suffix -- unchanged


(4262, 1005) (624, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-392-phylop.txt.gz already has .gz suffix -- unchanged


(4253, 1005) (610, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-388-phylop.txt.gz already has .gz suffix -- unchanged


(4255, 1005) (672, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-390-phylop.txt.gz already has .gz suffix -- unchanged


(4259, 1005) (634, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-389-phylop.txt.gz already has .gz suffix -- unchanged


(4262, 1005) (673, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-399-phylop.txt.gz already has .gz suffix -- unchanged


(4254, 1005) (634, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-382-phylop.txt.gz already has .gz suffix -- unchanged


(4249, 1005) (606, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-403-phylop.txt.gz already has .gz suffix -- unchanged


(4249, 1005) (602, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-401-phylop.txt.gz already has .gz suffix -- unchanged


(4253, 1005) (607, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-408-phylop.txt.gz already has .gz suffix -- unchanged


(4257, 1005) (634, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-424-phylop.txt.gz already has .gz suffix -- unchanged


(4257, 1005) (630, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-410-phylop.txt.gz already has .gz suffix -- unchanged


(4256, 1005) (640, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-405-phylop.txt.gz already has .gz suffix -- unchanged


(4258, 1005) (620, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-422-phylop.txt.gz already has .gz suffix -- unchanged


(4250, 1005) (678, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-437-phylop.txt.gz already has .gz suffix -- unchanged


(4254, 1005) (633, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-406-phylop.txt.gz already has .gz suffix -- unchanged


(4261, 1005) (608, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-418-phylop.txt.gz already has .gz suffix -- unchanged


(4250, 1005) (617, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-421-phylop.txt.gz already has .gz suffix -- unchanged


(4261, 1005) (618, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-415-phylop.txt.gz already has .gz suffix -- unchanged


(4244, 1005) (660, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-420-phylop.txt.gz already has .gz suffix -- unchanged


(4244, 1005) (650, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-413-phylop.txt.gz already has .gz suffix -- unchanged


(4250, 1005) (616, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-417-phylop.txt.gz already has .gz suffix -- unchanged


(4255, 1005) (610, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-416-phylop.txt.gz already has .gz suffix -- unchanged


(4256, 1005) (611, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-432-phylop.txt.gz already has .gz suffix -- unchanged


(4263, 1005) (595, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-409-phylop.txt.gz already has .gz suffix -- unchanged


(4255, 1005) (650, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-427-phylop.txt.gz already has .gz suffix -- unchanged


(4260, 1005) (649, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-425-phylop.txt.gz already has .gz suffix -- unchanged


(4256, 1005) (612, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-431-phylop.txt.gz already has .gz suffix -- unchanged


(4256, 1005) (629, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-428-phylop.txt.gz already has .gz suffix -- unchanged


(4261, 1005) (605, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-462-phylop.txt.gz already has .gz suffix -- unchanged


(4257, 1005) (612, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-430-phylop.txt.gz already has .gz suffix -- unchanged


(4258, 1005) (604, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-435-phylop.txt.gz already has .gz suffix -- unchanged


(4258, 1005) (637, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-436-phylop.txt.gz already has .gz suffix -- unchanged


(4254, 1005) (622, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-429-phylop.txt.gz already has .gz suffix -- unchanged


(4253, 1005) (665, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-434-phylop.txt.gz already has .gz suffix -- unchanged


(4243, 1005) (654, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-455-phylop.txt.gz already has .gz suffix -- unchanged


(4250, 1005) (623, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-449-phylop.txt.gz already has .gz suffix -- unchanged


(4259, 1005) (656, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-478-phylop.txt.gz already has .gz suffix -- unchanged


(4248, 1005) (610, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-446-phylop.txt.gz already has .gz suffix -- unchanged


(4262, 1005) (629, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-441-phylop.txt.gz already has .gz suffix -- unchanged


(4258, 1005) (591, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-440-phylop.txt.gz already has .gz suffix -- unchanged


(4254, 1005) (641, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-439-phylop.txt.gz already has .gz suffix -- unchanged


(4252, 1005) (638, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-442-phylop.txt.gz already has .gz suffix -- unchanged


(4248, 1005) (643, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-445-phylop.txt.gz already has .gz suffix -- unchanged


(4265, 1005) (655, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-443-phylop.txt.gz already has .gz suffix -- unchanged


(4260, 1005) (650, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-447-phylop.txt.gz already has .gz suffix -- unchanged


(4260, 1005) (643, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-452-phylop.txt.gz already has .gz suffix -- unchanged


(4251, 1005) (632, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-438-phylop.txt.gz already has .gz suffix -- unchanged


(4257, 1005) (663, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-444-phylop.txt.gz already has .gz suffix -- unchanged


(4249, 1005) (646, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-448-phylop.txt.gz already has .gz suffix -- unchanged


(4249, 1005) (633, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-457-phylop.txt.gz already has .gz suffix -- unchanged


(4255, 1005) (633, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-463-phylop.txt.gz already has .gz suffix -- unchanged


(4266, 1005) (638, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-459-phylop.txt.gz already has .gz suffix -- unchanged


(4263, 1005) (628, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-450-phylop.txt.gz already has .gz suffix -- unchanged


(4254, 1005) (624, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-465-phylop.txt.gz already has .gz suffix -- unchanged


(4262, 1005) (647, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-451-phylop.txt.gz already has .gz suffix -- unchanged


(4263, 1005) (625, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-458-phylop.txt.gz already has .gz suffix -- unchanged


(4257, 1005) (632, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-454-phylop.txt.gz already has .gz suffix -- unchanged


(4264, 1005) (619, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-476-phylop.txt.gz already has .gz suffix -- unchanged


(4266, 1005) (667, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-433-phylop.txt.gz already has .gz suffix -- unchanged


(4261, 1005) (623, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-460-phylop.txt.gz already has .gz suffix -- unchanged


(4266, 1005) (651, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-453-phylop.txt.gz already has .gz suffix -- unchanged


(4258, 1005) (624, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-491-phylop.txt.gz already has .gz suffix -- unchanged


(4245, 1005) (629, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-456-phylop.txt.gz already has .gz suffix -- unchanged


(4259, 1005) (632, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-479-phylop.txt.gz already has .gz suffix -- unchanged


(4255, 1005) (642, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-468-phylop.txt.gz already has .gz suffix -- unchanged


(4260, 1005) (633, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-482-phylop.txt.gz already has .gz suffix -- unchanged


(4263, 1005) (610, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-464-phylop.txt.gz already has .gz suffix -- unchanged


(4249, 1005) (579, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-474-phylop.txt.gz already has .gz suffix -- unchanged


(4256, 1005) (634, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-470-phylop.txt.gz already has .gz suffix -- unchanged


(4256, 1005) (654, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-461-phylop.txt.gz already has .gz suffix -- unchanged


(4248, 1005) (586, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-472-phylop.txt.gz already has .gz suffix -- unchanged


(4259, 1005) (644, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-469-phylop.txt.gz already has .gz suffix -- unchanged


(4255, 1005) (580, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-471-phylop.txt.gz already has .gz suffix -- unchanged


(4265, 1005) (596, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-473-phylop.txt.gz already has .gz suffix -- unchanged


(4258, 1005) (660, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-466-phylop.txt.gz already has .gz suffix -- unchanged


(4253, 1005) (611, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-489-phylop.txt.gz already has .gz suffix -- unchanged


(4250, 1005) (627, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-487-phylop.txt.gz already has .gz suffix -- unchanged


(4263, 1005) (648, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-483-phylop.txt.gz already has .gz suffix -- unchanged


(4259, 1005) (638, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-481-phylop.txt.gz already has .gz suffix -- unchanged


(4260, 1005) (615, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-475-phylop.txt.gz already has .gz suffix -- unchanged


(4250, 1005) (629, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-477-phylop.txt.gz already has .gz suffix -- unchanged


(4259, 1005) (621, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-467-phylop.txt.gz already has .gz suffix -- unchanged


(4259, 1005) (630, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-486-phylop.txt.gz already has .gz suffix -- unchanged


(4258, 1005) (602, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-488-phylop.txt.gz already has .gz suffix -- unchanged


(4252, 1005) (604, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-484-phylop.txt.gz already has .gz suffix -- unchanged


(4257, 1005) (618, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-485-phylop.txt.gz already has .gz suffix -- unchanged


(4248, 1005) (639, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-480-phylop.txt.gz already has .gz suffix -- unchanged


(4257, 1005) (614, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-494-phylop.txt.gz already has .gz suffix -- unchanged


(4249, 1005) (638, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-492-phylop.txt.gz already has .gz suffix -- unchanged


(4260, 1005) (624, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-490-phylop.txt.gz already has .gz suffix -- unchanged


(4256, 1005) (615, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-498-phylop.txt.gz already has .gz suffix -- unchanged


(4256, 1005) (607, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-499-phylop.txt.gz already has .gz suffix -- unchanged


(4268, 1005) (626, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-496-phylop.txt.gz already has .gz suffix -- unchanged


(4254, 1005) (629, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-497-phylop.txt.gz already has .gz suffix -- unchanged


(4251, 1005) (617, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-495-phylop.txt.gz already has .gz suffix -- unchanged


(4262, 1005) (628, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_overlap-493-phylop.txt.gz already has .gz suffix -- unchanged


497
(13595, 1005) (2110, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_no-overlap-ref.only-176-phylop.txt.gz already has .gz suffix -- unchanged


(14045, 1005) (14045, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/mutations-uniq-woRMSK.GENCODE-exon_no-overlap-ref.only-phylop.txt.gz already has .gz suffix -- unchanged


(13580, 1005) (1982, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_no-overlap-ref.only-5-phylop.txt.gz already has .gz suffix -- unchanged


(13542, 1005) (2019, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_no-overlap-ref.only-0-phylop.txt.gz already has .gz suffix -- unchanged


(13580, 1005) (2009, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_no-overlap-ref.only-13-phylop.txt.gz already has .gz suffix -- unchanged


(13583, 1005) (1986, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_no-overlap-ref.only-32-phylop.txt.gz already has .gz suffix -- unchanged


(13593, 1005) (2145, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_no-overlap-ref.only-22-phylop.txt.gz already has .gz suffix -- unchanged


(13596, 1005) (2071, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_no-overlap-ref.only-14-phylop.txt.gz already has .gz suffix -- unchanged


(13575, 1005) (2002, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_no-overlap-ref.only-24-phylop.txt.gz already has .gz suffix -- unchanged


(13584, 1005) (2099, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_no-overlap-ref.only-37-phylop.txt.gz already has .gz suffix -- unchanged


(13541, 1005) (2015, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_no-overlap-ref.only-28-phylop.txt.gz already has .gz suffix -- unchanged


(13543, 1005) (1996, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_no-overlap-ref.only-18-phylop.txt.gz already has .gz suffix -- unchanged


(13562, 1005) (1992, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_no-overlap-ref.only-17-phylop.txt.gz already has .gz suffix -- unchanged


(13529, 1005) (2088, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_no-overlap-ref.only-8-phylop.txt.gz already has .gz suffix -- unchanged


(13566, 1005) (1990, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_no-overlap-ref.only-10-phylop.txt.gz already has .gz suffix -- unchanged


(13569, 1005) (2034, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_no-overlap-ref.only-4-phylop.txt.gz already has .gz suffix -- unchanged


(13540, 1005) (2074, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_no-overlap-ref.only-9-phylop.txt.gz already has .gz suffix -- unchanged


(13591, 1005) (2004, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_no-overlap-ref.only-31-phylop.txt.gz already has .gz suffix -- unchanged


(13581, 1005) (1979, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_no-overlap-ref.only-1-phylop.txt.gz already has .gz suffix -- unchanged


(13591, 1005) (2034, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_no-overlap-ref.only-6-phylop.txt.gz already has .gz suffix -- unchanged


(13578, 1005) (1997, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_no-overlap-ref.only-2-phylop.txt.gz already has .gz suffix -- unchanged


(13554, 1005) (1986, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_no-overlap-ref.only-15-phylop.txt.gz already has .gz suffix -- unchanged


(13566, 1005) (1977, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_no-overlap-ref.only-50-phylop.txt.gz already has .gz suffix -- unchanged


(13576, 1005) (1928, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_no-overlap-ref.only-21-phylop.txt.gz already has .gz suffix -- unchanged


(13560, 1005) (1928, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_no-overlap-ref.only-33-phylop.txt.gz already has .gz suffix -- unchanged


(13587, 1005) (2051, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_no-overlap-ref.only-11-phylop.txt.gz already has .gz suffix -- unchanged


(13568, 1005) (2009, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_no-overlap-ref.only-12-phylop.txt.gz already has .gz suffix -- unchanged


(13560, 1005) (2047, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_no-overlap-ref.only-20-phylop.txt.gz already has .gz suffix -- unchanged


(13614, 1005) (2050, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_no-overlap-ref.only-16-phylop.txt.gz already has .gz suffix -- unchanged


(13594, 1005) (2016, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_no-overlap-ref.only-19-phylop.txt.gz already has .gz suffix -- unchanged


(13566, 1005) (2003, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_no-overlap-ref.only-38-phylop.txt.gz already has .gz suffix -- unchanged


(13547, 1005) (1979, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_no-overlap-ref.only-36-phylop.txt.gz already has .gz suffix -- unchanged


(13566, 1005) (2086, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_no-overlap-ref.only-30-phylop.txt.gz already has .gz suffix -- unchanged


(13542, 1005) (1997, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_no-overlap-ref.only-29-phylop.txt.gz already has .gz suffix -- unchanged


(13544, 1005) (2019, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_no-overlap-ref.only-7-phylop.txt.gz already has .gz suffix -- unchanged


(13588, 1005) (1930, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_no-overlap-ref.only-3-phylop.txt.gz already has .gz suffix -- unchanged


(13558, 1005) (2035, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_no-overlap-ref.only-39-phylop.txt.gz already has .gz suffix -- unchanged


(13549, 1005) (1950, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_no-overlap-ref.only-25-phylop.txt.gz already has .gz suffix -- unchanged


(13606, 1005) (2069, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_no-overlap-ref.only-27-phylop.txt.gz already has .gz suffix -- unchanged


(13559, 1005) (2109, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_no-overlap-ref.only-26-phylop.txt.gz already has .gz suffix -- unchanged


(13552, 1005) (1949, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_no-overlap-ref.only-23-phylop.txt.gz already has .gz suffix -- unchanged


(13536, 1005) (2088, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_no-overlap-ref.only-55-phylop.txt.gz already has .gz suffix -- unchanged


(13558, 1005) (1993, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_no-overlap-ref.only-34-phylop.txt.gz already has .gz suffix -- unchanged


(13576, 1005) (2020, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_no-overlap-ref.only-75-phylop.txt.gz already has .gz suffix -- unchanged


(13572, 1005) (2046, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_no-overlap-ref.only-61-phylop.txt.gz already has .gz suffix -- unchanged


(13550, 1005) (1987, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_no-overlap-ref.only-42-phylop.txt.gz already has .gz suffix -- unchanged


(13570, 1005) (2065, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_no-overlap-ref.only-51-phylop.txt.gz already has .gz suffix -- unchanged


(13571, 1005) (2022, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_no-overlap-ref.only-49-phylop.txt.gz already has .gz suffix -- unchanged


(13574, 1005) (1977, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_no-overlap-ref.only-35-phylop.txt.gz already has .gz suffix -- unchanged


(13555, 1005) (2041, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_no-overlap-ref.only-60-phylop.txt.gz already has .gz suffix -- unchanged


(13578, 1005) (2027, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_no-overlap-ref.only-56-phylop.txt.gz already has .gz suffix -- unchanged


(13576, 1005) (2012, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_no-overlap-ref.only-44-phylop.txt.gz already has .gz suffix -- unchanged


(13578, 1005) (2059, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_no-overlap-ref.only-59-phylop.txt.gz already has .gz suffix -- unchanged


(13565, 1005) (2025, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_no-overlap-ref.only-85-phylop.txt.gz already has .gz suffix -- unchanged


(13564, 1005) (1978, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_no-overlap-ref.only-46-phylop.txt.gz already has .gz suffix -- unchanged


(13575, 1005) (1997, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_no-overlap-ref.only-77-phylop.txt.gz already has .gz suffix -- unchanged


(13556, 1005) (2036, 1005)


  summary_stats= summary_stats.join(ci)
gzip: /wynton/home/ahituv/fongsl/nullomers/data/phylop/shuf-mutations-uniq-woRMSK.GENCODE-exon_no-overlap-ref.only-43-phylop.txt.gz already has .gz suffix -- unchanged


# get GC results

In [27]:
GC_SETS = [('EXON-OVERLAP', "*-exon_overlap*SUMMARY_STATS_GC.txt"), 
       ("EXON-NO-OVERLAP", "*-exon_no-overlap*SUMMARY_STATS_GC.txt")]

for datatype, query in SETS:
    file_list = glob.glob(os.path.join(DATA_PATH, query))
#2
df_list = []

#3
for f in file_list:

    # assign static label
    if "shuf" in f:
        LABEL = f"SHUF-{datatype}"
    else:
        LABEL = datatype
    #4
    ID = f.split("/")[-1]


    #5
    df = pd.read_csv(f, sep='\t', header = None)
    df["label"] = LABEL



    df_list.append(df)  

  df = pd.read_csv(f, sep='\t', header = None)


KeyboardInterrupt: 

In [None]:
out

In [26]:
df.loc[df["id"].isin(set(matched["id"]))]

(620, 1005)

In [11]:
#6
df = pd.concat(df_list)

#7
nullo = df.loc[df["label"] == datatype]
shuf = df.loc[df["label"] != datatype]

# write all the stats to a file

In [33]:
 out_stat=os.path.join(RE, f"{DATASET}_{ANNOT}_empirical-stats.txt")

with open(out_stat, "w") as writer:
    for line in lines:
        writer.write(line)
    writer.close()