In [None]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import sys, os
import nucpos
from Bio import SeqIO, Seq
from Bio.Alphabet import IUPAC

# 2018-02-09 p8 vs p9 energy landscapes

I want to look at the differences between the probability landscapes of p8 and p9 promoter. In the meantime I want to look at the module I'm building.

I'll start by playing around with the construction of a Sequence class, that I can use later to deal more effectively with data and parameters.

In [None]:
dm_genome = nucpos.load_Drosophila_genome()

In [None]:
class Sequence(object) :
    
    def __init__(self,seq) :
        if not isinstance(seq,basestring) :
            raise TypeError('Init sequence with string!')
        self.seq    = seq     # sequence
        self._p     = {}      # probability density
        self._E     = {}      # energy landscape
        self.nuc    = {}      # nucleosome occupancy
        
    def p(self, order, mechanical_model, temperature) :
        if not self._p.has_key((order, mechanical_model, temperature)) :
            this_p = nucpos.probability_landscape(self.seq, order, mechanical_model, temperature)
            self._p[(order,mechanical_model,temperature)] = this_p
        return self._p[(order, mechanical_model, temperature)]
    
    def E(self, order, mechanical_model, temperature) :
        if not self._E.has_key((order, mechanical_model, temperature)) :
            this_E = nucpos.energy(self.p(order, mechanical_model, temperature))
            self._E[(order,mechanical_model,temperature)] = this_E
        return self._E[(order, mechanical_model, temperature)]

In [None]:
class InsertedSequence(Sequence) :
    def __init__(self,seq,genome,chromosome,cut_site,
                 left=nucpos.left,right=nucpos.right) :
        # set class properties
        self.chromosome = chromosome
        self.cut_site = cut_site
        self.left = left
        self.right = right
        # insert the sequence in the genome at requested position
        c = genome[chromosome]
        fullseq = c[cut_site-left:cut_site] + seq + c[cut_site:cut_site+right]
        # invoke the parent class constructor
        super(InsertedSequence, self).__init__(str(fullseq.seq))

In [None]:
barcode_invented = 'ATGGTGATGCGTATAGATCC'
p8_sequence = nucpos.load_insertion('p8',barcode_invented)
p8 = Sequence(p8_sequence)

In [None]:
# integrations = {}
cut_site = 984587
chromosome = '2L'
# integrations[(cut_site,chromosome)] = InsertedSequence(p8_sequence,dm_genome,chromosome,cut_site)
integration = InsertedSequence(p8_sequence,dm_genome,chromosome,cut_site)

In [None]:
mechanical_models = ['MD','Olson']
temperature = 'RoomTemp'
for mechanical_model in mechanical_models :
    print integration.E(2, mechanical_model, temperature)

In [None]:
integration._p

Okay this seems to be working well. Now let's start to look at some data.

In [None]:
# load p8 and p9
barcode_invented = 'ATGGTGATGCGTATAGATCC'
p8_sequence = nucpos.load_insertion('p8',barcode_invented)
p8 = Sequence(p8_sequence)
p9_sequence = nucpos.load_insertion('p9',barcode_invented)
p9 = Sequence(p9_sequence)

In [None]:
# annotate promoter for the two sequences
p8.promoter = [290,1298]
p9.promoter = [290,1298]
p8.GFP = [1299,len(p8.seq)-290]
p9.GFP = [1299,len(p9.seq)-290]

In [None]:
mechanical_models = ['MD','Olson']
order = 2
temperature = 'RoomTemp'
fig,axarr = plt.subplots(2,2,figsize=(15,5))
for i,seq in enumerate([p8,p9]) :
    for j,mechanical_model in enumerate(mechanical_models) :
        ax = axarr[i,j]
        ax.semilogy(seq.p(order,mechanical_model,temperature))
        x = np.arange(seq.promoter[0],seq.promoter[1])
        ax.semilogy(x,seq.p(order,mechanical_model,temperature)[x],'r')
        x = np.arange(seq.GFP[0],seq.GFP[1])
        ax.semilogy(x,seq.p(order,mechanical_model,temperature)[x],'GREEN')