In [1]:
target = "cbf1_nexus"

In [2]:
import os
import json
import gzip
import math
import pyBigWig
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import mean_squared_error
from scipy.stats import spearmanr, pearsonr, gaussian_kde

In [3]:
chrom_sizes = {'chrI': 230218,
               'chrII': 813184,
               'chrIII': 316620,
               'chrIV': 1531933,
               'chrV': 576874,
               'chrVI': 270161,
               'chrVII': 1090940,
               'chrVIII': 562643,
               'chrIX': 439888,
               'chrX': 745751,
               'chrXI': 666816,
               'chrXII': 1078177,
               'chrXIII': 924431,
               'chrXIV': 784333,
               'chrXV': 1091291,
               'chrXVI': 948066,
               'chrM': 85779}

In [4]:
bigWigs = (pyBigWig.open("/oak/stanford/groups/akundaje/amr1/pho4_final/lite_data/in-vivo/"+target+"/basename_prefix.pooled.positive.bigwig"),
           pyBigWig.open("/oak/stanford/groups/akundaje/amr1/pho4_final/lite_data/in-vivo/"+target+"/basename_prefix.pooled.negative.bigwig"))
bedFile = open("/oak/stanford/groups/akundaje/amr1/pho4_final/lite_data/in-vivo/"+target+"/100_around_summits.bed")

peak_coords = []
for line in bedFile:
    chrm,s,e,_ = line.strip().split('\t')
    peak_coords.append((chrm, int(s), int(e)))

currentPosCounts = []
currentNegCounts = []
for chrm, s, e in peak_coords:
    if "_" in chrm: continue
    end = e + 150
    start = s - 150
    if start < 0 or end >= chrom_sizes[chrm]: continue
    posvals = np.array(bigWigs[0].values(chrm, start, end))
    where_are_NaNs = np.isnan(posvals)
    posvals[where_are_NaNs] = 0.0
    currentPosCounts.append(posvals)
    negvals = np.array(bigWigs[1].values(chrm, start, end))
    where_are_NaNs = np.isnan(negvals)
    negvals[where_are_NaNs] = 0.0
    currentNegCounts.append(negvals)

In [5]:
font = {'weight' : 'bold', 'size'   : 14}
plt.figure()
matplotlib.rc('font', **font)
plt.plot(np.mean(currentPosCounts, axis=0))
plt.plot(np.mean(-np.array(currentNegCounts), axis=0))
plt.savefig('comparison_figs/'+target+'/avg_profile.png', dpi=300, format='png')
plt.clf()

<Figure size 432x288 with 0 Axes>

In [6]:
currentPosCounts = np.array(currentPosCounts)
row_sums = currentPosCounts.sum(axis=1)
normalizedPosCounts = currentPosCounts / row_sums[:, np.newaxis]
normalizedPosCounts = normalizedPosCounts

currentNegCounts = np.array(currentNegCounts)
row_sums = currentNegCounts.sum(axis=1)
normalizedNegCounts = currentNegCounts / row_sums[:, np.newaxis]
normalizedNegCounts = -1 * normalizedNegCounts

In [7]:
top_n = 100
top_regions = np.argsort(currentPosCounts.sum(axis=1)+currentNegCounts.sum(axis=1))[:-top_n-1:-1]

In [8]:
combinedNormalizedCounts = []
for idx in top_regions:
    combinedNormalizedCounts.append(normalizedPosCounts[idx][100:300])
    combinedNormalizedCounts.append(normalizedNegCounts[idx][100:300])

font = {'weight' : 'bold', 'size'   : 14}
plt.figure()
matplotlib.rc('font', **font)
sns.heatmap(combinedNormalizedCounts, cmap = "RdBu_r", center = 0, vmin=-0.025, vmax =0.025)
plt.tick_params(
    axis='both',       # changes apply to the x-axis
    which='both',      # both major and minor ticks are affected
    bottom=False,      # ticks along the bottom edge are off
    top=False,         # ticks along the top edge are off
    left=False,
    labelleft=False,
    right=False,
    labelright=False,
    labelbottom=False) # labels along the bottom edge are off
plt.savefig('comparison_figs/'+target+'/top_heatmap.png', dpi=300, format='png')
plt.clf()

<Figure size 432x288 with 0 Axes>