In [1]:
import helper
import pickle

In [2]:
tifs = helper.read_all_tifs(positive_strand_only=True, return_dict=True)

In [3]:
genome = helper.organize_genome_by_chrom(helper.read_bedfile(), positive_strand_only=True)

In [4]:
densities = pickle.load(open("densities.pickle", "rb"))
for chrom in helper.kYeastChroms:
    densities[chrom] = densities[chrom][0]

In [44]:
import matplotlib.pyplot as plt
import numpy as np
import matplotlib

save_to_pgf=True

np.random.seed(1135)

In [45]:
if save_to_pgf: 
    matplotlib.use("pgf")
    matplotlib.rcParams.update({
        "pgf.texsystem": "pdflatex",
        'font.family': 'serif',
        'text.usetex': True,
        'pgf.rcfonts': False,
    })
else:
    matplotlib.rcdefaults()

In [7]:
from metagene_helper import select_one_random_junction, kJunctionCandidates

INFO:helper:Reading bedfile...
INFO:helper:Done reading bedfile ../indexes/yeast-all.bed
INFO:helper:Processed TIFs in chromosome 1...
INFO:helper:Processed TIFs in chromosome 2...
INFO:helper:Processed TIFs in chromosome 3...
INFO:helper:Processed TIFs in chromosome 4...
INFO:helper:Processed TIFs in chromosome 5...
INFO:helper:Processed TIFs in chromosome 6...
INFO:helper:Processed TIFs in chromosome 7...
INFO:helper:Processed TIFs in chromosome 8...
INFO:helper:Processed TIFs in chromosome 9...
INFO:helper:Processed TIFs in chromosome 10...
INFO:helper:Processed TIFs in chromosome 11...
INFO:helper:Processed TIFs in chromosome 12...
INFO:helper:Processed TIFs in chromosome 13...
INFO:helper:Processed TIFs in chromosome 14...
INFO:helper:Processed TIFs in chromosome 15...
INFO:helper:Processed TIFs in chromosome 16...
INFO:helper:Done reading TIF file... returning a dictionary


In [8]:
n = 100

In [9]:
unsorted_tifs = helper.read_all_tifs()

INFO:helper:Done reading TIF file... returning a pandas


In [17]:
def get_all_junctions_chrom(chrom, tifs):
    splits = []
    for gene, gtifs in tifs[chrom].items():
        junctions = helper.generate_5utr_isoform_starts(gtifs)
        splits = splits + junctions[1:]
    return splits

In [34]:
big_meta = [0] * (2 * n)
fake_meta = [0] * (2 * n)

bad_meta = [0] * (2 * n)
bad_random_meta = [0] * (2 * n)


np.random.seed(1135)

for chrom in helper.kYeastChroms:
    splits = select_one_random_junction(chrom, tifs)
    meta = helper.make_metagene_plot((densities[chrom], []),
                                     splits,
                                     "",
                                     n,
                                     n,
                                     plot=False)
    big_meta = np.add(big_meta, meta)

    fake_junctions = [(x, "+") for x in np.random.choice(
        kJunctionCandidates[chrom],
        size=len(splits),
        replace=False)]
    meta = helper.make_metagene_plot((densities[chrom], []),
                                     fake_junctions,
                                     "",
                                     n,
                                     n,
                                     plot=False)
    fake_meta = np.add(fake_meta, meta)
    
    all_junctions = get_all_junctions_chrom(chrom, tifs)
    meta = helper.make_metagene_plot((densities[chrom], []),
                                     all_junctions,
                                     "",
                                     n,
                                     n,
                                     plot=False)
    bad_meta = np.add(bad_meta, meta)
    
    fake_junctions = [(x, "+") for x in np.random.choice(
        kJunctionCandidates[chrom],
        size=len(all_junctions),
        replace=False)]
    meta = helper.make_metagene_plot((densities[chrom], []),
                                     fake_junctions,
                                     "",
                                     n,
                                     n,
                                     plot=False)
    bad_random_meta = np.add(bad_random_meta, meta)
    


INFO:helper:Generating metagene plot... Number of overlapping sites is: 44
INFO:helper:<class 'tuple'>
INFO:helper:type of splits[0] is <class 'tuple'>
INFO:helper:(143657, '+')
INFO:helper:Generating metagene plot... Number of overlapping sites is: 44
INFO:helper:<class 'tuple'>
INFO:helper:type of splits[0] is <class 'tuple'>
INFO:helper:(33419, '+')
INFO:helper:Generating metagene plot... Number of overlapping sites is: 1184
INFO:helper:<class 'tuple'>
INFO:helper:type of splits[0] is <class 'tuple'>
INFO:helper:(143602, '+')
INFO:helper:Generating metagene plot... Number of overlapping sites is: 1184
INFO:helper:<class 'tuple'>
INFO:helper:type of splits[0] is <class 'tuple'>
INFO:helper:(183721, '+')
INFO:helper:Generating metagene plot... Number of overlapping sites is: 171
INFO:helper:<class 'tuple'>
INFO:helper:type of splits[0] is <class 'tuple'>
INFO:helper:(236353, '+')
INFO:helper:Generating metagene plot... Number of overlapping sites is: 171
INFO:helper:<class 'tuple'>
IN

INFO:helper:(340150, '+')
INFO:helper:Generating metagene plot... Number of overlapping sites is: 6974
INFO:helper:<class 'tuple'>
INFO:helper:type of splits[0] is <class 'tuple'>
INFO:helper:(147811, '+')
INFO:helper:Generating metagene plot... Number of overlapping sites is: 6974
INFO:helper:<class 'tuple'>
INFO:helper:type of splits[0] is <class 'tuple'>
INFO:helper:(853393, '+')
INFO:helper:Generating metagene plot... Number of overlapping sites is: 215
INFO:helper:<class 'tuple'>
INFO:helper:type of splits[0] is <class 'tuple'>
INFO:helper:(267083, '+')
INFO:helper:Generating metagene plot... Number of overlapping sites is: 215
INFO:helper:<class 'tuple'>
INFO:helper:type of splits[0] is <class 'tuple'>
INFO:helper:(110995, '+')
INFO:helper:Generating metagene plot... Number of overlapping sites is: 8075
INFO:helper:<class 'tuple'>
INFO:helper:type of splits[0] is <class 'tuple'>
INFO:helper:(266923, '+')
INFO:helper:Generating metagene plot... Number of overlapping sites is: 8075

In [46]:
plt.plot(range(-n, n), big_meta)    
plt.title("Metagene plot of non-overlaping ioform positions")
plt.xlabel("Distance from junction (nt)")
plt.ylabel("Aggregated Read density")
plt.savefig("final_plots/good_metagene.pgf")

plt.close()


In [47]:
plt.plot(range(-n, n), fake_meta)    
plt.title("Metagene plot of non-overlaping random positions")
plt.xlabel("Distance from junction (nt)")
plt.ylabel("Aggregated Read density")
plt.savefig("final_plots/random_metagene.pgf")

plt.close()

In [48]:
plt.plot(range(-50, 50), bad_meta[50:150])    
plt.title("Metagene plot of all ioform positions")
plt.xlabel("Distance from junction (nt)")
plt.ylabel("Aggregated Read density")
plt.savefig("final_plots/all_metagene.pgf")

plt.close()

In [49]:
plt.plot(range(-50, 50), bad_random_meta[50:150])    
plt.title("Metagene plot of random genomic positions")
plt.xlabel("Distance from junction (nt)")
plt.ylabel("Aggregated Read density")
plt.savefig("final_plots/random_big_metagene.pgf")

plt.close()