In [None]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import mybiotools as mbt
import pr_peaks
import os
from scipy.stats import gaussian_kde

In [None]:
# load the peak data
high       = pr_peaks.Condition('high'   ,'all_treated',0.05,'gv_107_01_01_chipseq')
medium1    = pr_peaks.Condition('medium1','4HCP'       ,0.10,'gv_108_01_01_chipseq')
medium2    = pr_peaks.Condition('medium2','3HCP'       ,0.50,'gv_109_01_01_chipseq')
medium3    = pr_peaks.Condition('medium3','3HCP'       ,1.00,'gv_110_01_01_chipseq')
low        = pr_peaks.Condition('low'    ,'1HCP'       ,10.0,'gv_111_01_01_chipseq')

In [None]:
# this is to make life easier
Hpeaks = high.peaks
Mpeaks = medium2.peaks
Lpeaks = low.peaks

In [None]:
conditions = [high,medium1,medium2,medium3,low]
concentrations = np.array([c.concentration for c in conditions])

# 2018-03-05 Preparing lab meeting

## Experimental data

### Peak scores
Here I want to prepare the figures to show in my LM presentation, concerning the PR peaks project. First, I want to prepare the figure that shows the distribution of scores on the all_treated versus 1HCP peaks. I put it in a script in the scripts directory, called `get_peak_scores.py`.

Using that data I can make a lovely figure.

### Peak read counts

Next, I want to show the number of reads in every peak, as a function of the concentration. I put the calculation in a script called `get_peak_read_counts.py`. Now I can make another lovely figure: `H_to_L.py`.

In [None]:
# load data
pr_peaks_root_dir = '%s/work/CRG/projects/pr_peaks'%(os.getenv('HOME'))
data_dir = '%s/data'%(pr_peaks_root_dir)
figures_dir = '%s/figures'%(pr_peaks_root_dir)
Hpeaks_count = np.load('%s/Hpeaks_count.npy'%(data_dir))
Mpeaks_count = np.load('%s/Mpeaks_count.npy'%(data_dir))
Lpeaks_average = np.load('%s/Lpeaks_average.npy'%(data_dir))

In [None]:
# plot now the number of reads as a function of the concentration for each peak class
fig = plt.figure(figsize=(6,4))
plt.semilogx(concentrations,Hpeaks_count.mean(axis=0)/Lpeaks_average,'o--')
plt.semilogx(concentrations,Mpeaks_count.mean(axis=0)/Lpeaks_average,'^--')
plt.xlabel('Concentration of hormone [nM]',fontsize=24)
plt.ylabel('H to L ratio',fontsize=24)
fig.tight_layout()
fig.savefig('%s/H_to_L.pdf'%(figures_dir))
plt.show()

## Simulation data

In [None]:
np.random.seed(85498)

# general simulation parameters
nsteps = 100000
n = 100
boost = 4.0
mus = np.arange(1,20,2)

# init site_taus
Hsites = [2,6,10,40,50,60,70,80]
Lsites = [i for i in xrange(n) if i not in Hsites]
site_taus = 2.0*np.ones(n)
site_taus[Hsites] = 20.0

# init contact lists
nocontacts = [[] for i in xrange(n)]
HHcontact = [[] for i in xrange(n)]
HHcontact[Hsites[0]] = [Hsites[1]]
HHcontact[Hsites[1]] = [Hsites[0]]

# init the Jumping Models
uniform = pr_peaks.JumpingModel(nocontacts,site_taus,boost)
HH = pr_peaks.JumpingModel(HHcontact,site_taus,boost)

# cycle on mu values
for mu in mus :
    # init omega_t
    omega_t_initial = pr_peaks.init_omega_t(n,mu)
    mbt.log_message('Uniform','mu = %d'%(mu))
    uniform.run(nsteps,omega_t_initial)
    mbt.log_message('HH     ','mu = %d'%(mu))
    HH.run(nsteps,omega_t_initial)
    
# aftermath
pr_peaks.H_to_L(uniform,Hsites,Lsites)
pr_peaks.H_to_L(HH,Hsites,Lsites)

I want to plot the results in the case in which there is no boosting effect, and the case in which the boosting effect is there.

In [None]:
nmus = len(mus)
nH   = len(Hsites)
nL   = len(Lsites)
for model in [uniform,HH] :
    model.Hsites_theta = np.zeros((nmus,nH))
    model.H_to_L_individual = np.zeros((nmus,nH))
    for i,mu in enumerate(mus) :
        model.Hsites_theta[i,:] = model.theta[mu][Hsites]
        averageL = model.theta[mu][Lsites].mean()
        model.H_to_L_individual[i,:] = model.Hsites_theta[i,:]/averageL

In [None]:
fig,axarr = plt.subplots(1,2,figsize=(12,4))

xticks = range(1,20,2)

axarr[0].plot(mus,uniform.Hsites_theta.mean(axis=1),'o--')
axarr[0].set_ylabel(r'Occupancy of H sites',fontsize=24)
axarr[0].set_xlabel(r'Number of searchers',fontsize=24)
axarr[0].set_xticks(xticks)

axarr[1].plot(mus,uniform.H_to_L_individual.mean(axis=1),'o--')
axarr[1].set_ylabel(r'H to L ratio',fontsize=24)
axarr[1].set_xlabel(r'Number of searchers',fontsize=24)
axarr[1].set_xticks(xticks)

fig.tight_layout()
fig.savefig('%s/sim_H_to_L_uniform.pdf'%(figures_dir))
plt.show()

In [None]:
fig,axarr = plt.subplots(1,2,figsize=(12,4))

xticks = range(1,20,2)

axarr[0].plot(mus,HH.Hsites_theta.mean(axis=1),'o--')
axarr[0].set_ylabel(r'Occupancy of H sites',fontsize=24)
axarr[0].set_xlabel(r'Number of searchers',fontsize=24)
axarr[0].set_xticks(xticks)

axarr[1].plot(mus,HH.H_to_L_individual.mean(axis=1),'o--')
axarr[1].set_ylabel(r'H to L ratio',fontsize=24)
axarr[1].set_xlabel(r'Number of searchers',fontsize=24)
axarr[1].set_xticks(xticks)

fig.tight_layout()
fig.savefig('%s/sim_H_to_L_HH.pdf'%(figures_dir))
plt.show()

In [None]:
fig,axarr = plt.subplots(1,2,figsize=(10,4))
xticks = range(1,20,2)
for j in xrange(nH) :
    if j==0 or j==1 :
        color='r'
    else :
        color='k'
    axarr[0].plot(mus,uniform.H_to_L_individual[:,j],color=color)
    axarr[1].plot(mus,HH.H_to_L_individual[:,j],color=color)
axarr[0].set_title('No contacts',fontsize=32)
axarr[1].set_title('With contacts',fontsize=32)
for ax in axarr :
    ax.set_xlabel(r'Number of searchers',fontsize=24)
    ax.set_xticks(xticks)
axarr[0].set_ylabel(r'H to L ratio',fontsize=24)
fig.tight_layout()
fig.savefig('%s/sim_H_to_L_individual.pdf'%(figures_dir))
plt.show()

In [None]:
# now calculate the number of counts corresponding to the High peaks, 
# as a function of the concentration of hormone
Hpeaks_count = np.zeros((nHpeaks,nconditions))
averageL = np.zeros(nconditions)
H_to_L_individual = np.zeros((nHpeaks,nconditions))

# fill the arrays
for j,condition in enumerate(conditions) :
    averageL[j] = pr_peaks.average_peak_counts(Lpeaks,condition)
    for i,peak in enumerate(Hpeaks) :
        Hpeaks_count[i,j] = condition.peak_counts(peak)
        H_to_L_individual[i,j] = Hpeaks_count[i,j]/averageL[j]

In [None]:
fig = plt.figure(figsize=(6,4))
for j in xrange(100) :
    plt.loglog(concentrations,H_to_L_individual[j,:])
plt.xlabel('Concentration of hormone [nM]',fontsize=24)
plt.ylabel('H to L ratio',fontsize=24)
fig.tight_layout()
fig.savefig('%s/H_to_L_individual.pdf'%(figures_dir))
plt.show()

In [None]:
corrmat = np.zeros((nHpeaks,nHpeaks))
for i in xrange(nHpeaks) :
    for j in xrange(i,nHpeaks) :
        c = np.corrcoef(H_to_L_individual[i,:],H_to_L_individual[j,:])[0,1]
        corrmat[i,j] = corrmat[j,i] = c

In [None]:
start,end = 50,60
fig,ax = plt.subplots(1,1,figsize=(10,10))
cb = ax.matshow(corrmat[start:end,start:end])
# cax = plt.colorbar(cb)
yticks = range(start,end)
ax.set_yticks(range(len(yticks)))
ax.set_yticklabels([str(Hpeaks[i]) for i in yticks])
fig.tight_layout()
fig.savefig('%s/peak_correlation.png'%(figures_dir),bbox_inches='tight')
plt.show()