# Tunable Ribosome Occupancy Model
_Author: Raghav Chanchani_

_For the Brian Zid laboratory at UC San Diego_

_Date last modified: 10/4/2018_
#### A discrete-time Markov chain to simulate ribosome distribution along a given gene using the assumptions listed below. There is no final absorption state. Ribosomes are recycled to the -1 index after they move off of the simulated mRNAs. This model does not consider binding energies of the ribosomes to sequences in the gene of interest.
__References:__
1. https://book.bionumbers.org/what-is-faster-transcription-or-translation
2. https://www.columbia.edu/~ks20/stochastic-I/stochastic-I-MCI.pdf
3. https://github.com/gvanderheide/discreteMarkovChain

__Assumptions:__
1. Initiation when ribosome reads first codon _(AUG in given gene file)_
2. Elongation in-between
3. Ribosome moves one triplet with each elongation step
4. Ribosomes are recycled once they move off of the gene
5. If any pair of ribosomes' positions are under 30 nucleotides apart, the ribosome closer to AUG is not allowed to move until its position $\geq$ 30 nucleotides from the next further-along ribosome
6. Termination when ribosome reads UAG, UAA, or UGA _(in next update)_
7. Length of gene in graph and calculations is in codons, not nucleotides
8. Position refers to the center position of the ribosome
9. The mRNAs only consist of ORFs of the gene of interest _(may change in next update)_
10. How quickly or if initiation increases, decreases _(sec.)_ _(constant rate)_
11. How quickly or if elongation increases, decreases _(sec.)_ _(constant rate)_

__Tunable Parameters:__
1. Initiation rate
2. Elongation rate
3. Number of ribosomes on a single mRNA
4. Number of mRNAs
5. Probability of a ribosome moving from its current position
6. Size of a ribosome (in nucleotides)
7. How long the simulated run is for (sec.)

(Ribosome reads 20$codon\over{s.}$ at maximum elongation rate (1).)

In [15]:
%matplotlib notebook
import numpy as np
import math
import sys
import os
from os import path
import argparse
from itertools import chain
import matplotlib.pyplot as plt
from ipywidgets import *
import ipywidgets as widgets
from IPython.display import display
plt.style.use('ggplot')
#from discreteMarkovChain import markovChain

In [16]:
global gene_length
global mRNA
global ribo_size
global fname

"""
Read in the file containing the gene of interest and determine whether or not it is the correct file type ".txt" and
initialize the mRNA, occupancy, and ribosome_list lists.
"""
parser = argparse.ArgumentParser()
parser.add_argument('filename')
args = parser.parse_args('sample.txt'.split())
ribosome_list = [] # storage for all ribosomes in a single mRNA
gene_length = 0
mRNA = []
occupancy = [] # storage for all ribosomes in all mRNAs
fname = args.filename
if not fname.lower().endswith(('.fasta','.txt')):
    parser.error("file must be a .txt or FASTA file")

In [17]:
"""
Description: Creates the ribosome object which stores the ribosome's position and how long
    it has been at its current position. The counter is initially set to zero.
"""
class ribosome:
    """
    Description: Creates a new ribosome object with initial position -1 and counter 0.
    Inputs: None
    Return: None
    """
    def __init__(self):
        self.position = -1
        self.counter = 0
    """
    Description: Changes the value of the ribosome's position.
    Inputs: new_position - the position the ribosome has been moved to
    Return: None
    """
    def set_position(self, new_position):
        self.position = new_position
    """
    Description: Changes the value of the ribosome object's counter variable given a new value.
    Inputs: new_count - number of timesteps ribosome has been at its current position
    Return: None
    """
    def set_counter(self, new_count):
        self.counter = new_count

In [18]:
"""
Description: Counts number of nucleotides in the gene of interest and creates an "mRNA" list of the same size
    that will be used to note ribosome positions on.
Inputs: gene_file - a .txt file that contains the mRNA base pairs of a given gene.
Return: gene_length - the length of the gene passed in, in nucleotides
        mRNA - a list corresponding with the length of the (gene_file in codons)
"""
def read_gene(gene_file):
    global gene_length
    global mRNA
    temp_list = []
    mRNA = []
    
    try:
        with open(gene_file) as inputFileHandle:
            line_list = [lines.split() for lines in inputFileHandle]  # extract lines
            while line_list:
                temp_list.extend(line_list.pop(0))
                while temp_list:
                    mRNA.extend(temp_list.pop(0))
            gene_length = int(len(mRNA)/3)
                    
            return inputFileHandle.read(), gene_length, mRNA
    except IOError:
        sys.stderr.write("read_gene - Error: Could not open {}\n".format(gene_file))
        sys.exit(-1)


In [19]:
"""
Description: Creates a steady state probability matrix that determines the likelihood that a ribosome
    will move to the next codon or remain in its current position.
Inputs: None
Return: None
"""
def make_matrix():
    global probability
    global gene_length
    
    probs = np.zeros[(gene_length,gene_length)]
    for col in range(gene_length):
        for row in range(gene_length):
            if row == col:
                probs[row][col] = probability
                if row == gene_length - 1:
                    probs[row][0] = abs(1-probability)
            if row + 1 == col:
                probs[row][col] = abs(1-probability)
    mc = markovChain(probs)
    mc.computePi('linear')                                      # computation method of steady-state probabilities
    steady_state_probs = mc.pi                                  # steady-state probabilities of moving into different nucleotides
    #print('equilibrium state: {}'.format(steady_state_probs))

    return

In [20]:
"""
Description: Given the probability that the ribosome will move, the ribosome is evaluated to move
    or remain in its current position.
Inputs: ribosome - ribosome object trying to move
Return: ribosome - same ribosome object with updated position and counter
"""
def move(ribosome):
    global gene_length
    global probability
    
    position = ribosome.position
    if ribosome.position < gene_length:
        moves = np.random.random() <= probability
        if moves:
            ribosome.position = position + 1
            ribosome.counter = 1
        else:
            ribosome.counter += 1
    return ribosome

In [31]:
"""
Description: Graphs the ribosome occupancy of codons as a histogram with x-axis being the position (in codons)
    and the y-axis being occupancy as a fraction of the total number of ribosomes simulated across all mRNA
Inputs: ribos - the list containing all ribosomes
Return: None
"""
def create_histogram(all_cules):
    global ribo_size
    global fname
    global gene_length
    
    pos_array = [ribo.position for sim in all_cules for ribo in sim]
    ax = plt.gca()
    ax.set_title('Ribosome Frequency ' + os.path.splitext(str(fname))[0])
    ax.set_yscale('log')
    hist = plt.hist(pos_array, bins=range(min(pos_array)*100,gene_length + 10, 10), density=False)
    plt.xlabel('Distance from AUG (codon)')
    plt.ylabel('Ribosome Frequency')
    plt.show()
    
    return

In [22]:
"""
Description: Determines whether or not the ribosomes on the simulated mRNA will collide, which ribosome is allowed to
    move, and updates the positions and number of timesteps present at a given location of all ribosomes corresponding
    to a simulated mRNA.
Inputs: ribosomes - the number of ribosomes to be run on a given mRNA
        time - the length of the simulation (sec.)
        ribo_size - the size of the ribosome object (nucleotides)
        probability - the probability that a ribosome will move forward from its current position
        kI - initiation rate
        kE - elongation rate
        n_mRNA - total number of mRNAs
Return: ribosome_list - list containing all ribosome objects corresponding to a given mRNA
"""
def simulate(ribosomes,time,ribo_size,prob,kI,kE):
    global gene_length
    global probability
    ################################# fix overlaps
    ################################# when ribosome wants to move, also check if correct distance from 
    probability = prob
    ribosome_list = [ribosome() for complex in range(ribosomes)]
    flag, gene_length, mRNA = read_gene(args.filename)
    discrete_time = [t for t in range(time*10)] # x10 is used because it splits the simulation into 0.1sec. res.
    for t_step in discrete_time:
        for complex in range(len(ribosome_list) - 1): # for each ribosome...
            step = ribosome_list[complex].counter #-------------------------- wrong because may never reach?
            if t_step % kI == 0: # if initiation timestep
                if step % kE == 0: # if elongation timestep
                    if ribosome_list[complex].position == 0: # if at beginning of gene
                        if complex == 0: # if first ribosome in the simulation
                            if ribosome_list[complex].counter == 0: # if just arrived at beginning of gene
                                if abs(ribosome_list[complex].position-ribosome_list[len(ribosome_list)-1].position) > \
                                ribo_size-1: # if correct distance from other ribosome
                                    ribosome_list[complex] = move(ribosome_list[complex]) # attempt to move
                                elif ribosome_list[len(ribosome_list)-1].position == -1:
                                    ribosome_list[complex] = move(ribosome_list[complex]) # attempt to move
                                else: ribosome_list[complex].counter += 1
                                #continue # move on to next ribosome
                            else: # if been at position for at least one time step
                                if abs(ribosome_list[complex].position-ribosome_list[len(ribosome_list)-1].position) > \
                                ribo_size-1: # if correct distance from other ribosome
                                    ribosome_list[complex] = move(ribosome_list[complex]) # attempt to move
                                elif ribosome_list[len(ribosome_list)-1].position == -1:
                                    ribosome_list[complex] = move(ribosome_list[complex]) # attempt to move
                                else: ribosome_list[complex].counter += 1
                        else: # if not the first ribosome but at beginning of gene
                            if ribosome_list[complex].counter == 0: # if just arrived at beginning of gene
                                if abs(ribosome_list[complex].position-ribosome_list[complex-1].position) > \
                                ribo_size-1: # if correct distance from other ribosome
                                    ribosome_list[complex] = move(ribosome_list[complex]) # attempt to move
                                elif ribosome_list[complex-1].position == -1:
                                    ribosome_list[complex] = move(ribosome_list[complex]) # attempt to move
                                else: ribosome_list[complex].counter += 1
                                #continue # move on to next ribosome
                            else:
                                if abs(ribosome_list[complex].position-ribosome_list[complex-1].position) > \
                                ribo_size-1: # if correct distance from other ribosome
                                    ribosome_list[complex] = move(ribosome_list[complex])
                                elif ribosome_list[complex-1].position == -1:
                                    ribosome_list[complex] = move(ribosome_list[complex]) # attempt to move
                                else: ribosome_list[complex].counter += 1
                    elif ribosome_list[complex].position == -1: # if not on gene yet
                        if complex == 0: # if first ribosome in the simulation
                            if ribosome_list[len(ribosome_list)-1].position == -1: # if last ribosome also at -1
                                moves = np.random.random() <= probability
                                if moves:
                                    ribosome_list[complex].counter = 1
                                    ribosome_list[complex].position = 0
                                else: ribosome_list[complex].counter += 1
                            else: # if last ribosome not at -1 but somewhere on the mRNA
                                if abs(ribosome_list[complex].position-ribosome_list[len(ribosome_list)-1].position) > \
                                ribo_size-1: # if correct distance from the last ribosome
                                    moves = np.random.random() <= probability
                                    if moves:
                                        ribosome_list[complex].counter = 1
                                        ribosome_list[complex].position = 0
                                    else: ribosome_list[complex].counter += 1
                                else: ribosome_list[complex].counter += 1
                        else: # if not the first ribosome
                            if ribosome_list[complex-1].position == -1: # if ribosome "ahead" of it is also at -1
                                moves = np.random.random() <= probability
                                if moves:
                                    ribosome_list[complex].counter = 1
                                    ribosome_list[complex].position = 0
                                else: ribosome_list[complex].counter += 1
                            else: # if ribosome "ahead" of it is not at -1 but somewhere else on the mRNA
                                if abs(ribosome_list[complex].position-ribosome_list[complex-1].position) > \
                                ribo_size-1: # if correct distance fom other ribosome
                                    moves = np.random.random() <= probability
                                    if moves:
                                        ribosome_list[complex].counter = 1
                                        ribosome_list[complex].position = 0
                                    else: ribosome_list[complex].counter += 1
                                else: ribosome_list[complex].counter += 1
                    else: # if somewhere on gene not at beginning
                        if complex == 0: # if first ribosome in the simulation
                            if abs(ribosome_list[complex].position-ribosome_list[len(ribosome_list)-1].position) > \
                            ribo_size-1: # if correct distance apart
                                ribosome_list[complex] = move(ribosome_list[complex])
                                if ribosome_list[complex].position > gene_length:
                                    ribosome_list[complex].position = -1
                                    ribosome_list[complex].counter = 0
                            else: ribosome_list[complex].counter += 1
                        else: # if not the first ribosome
                            if abs(ribosome_list[complex].position-ribosome_list[complex-1].position) > \
                            ribo_size-1:
                                ribosome_list[complex] = move(ribosome_list[complex])
                                if ribosome_list[complex].position > gene_length: # if the ribosome leaves the ribosome it is recycled
                                    ribosome_list[complex].position = -1
                                    ribosome_list[complex].counter = 0
                            elif ribosome_list[complex-1].position == -1:
                                    ribosome_list[complex] = move(ribosome_list[complex]) # attempt to move
                            else: ribosome_list[complex].counter += 1
                else: # if not an elongation timestep and only an initiation timestep
                    if complex == 0: # if first ribosome in the simulation
                        if ribosome_list[complex].position == -1:
                            if ribosome_list[len(ribosome_list)-1].position == -1:
                                moves = np.random.random() <= probability
                                if moves:
                                    ribosome_list[complex].counter = 1
                                    ribosome_list[complex].position = 0
                                else: ribosome_list[complex].counter += 1
                            else: # if only first ribosome is at -1
                                if abs(ribosome_list[complex].position-ribosome_list[len(ribosome_list)-1].position) > \
                                ribo_size-1: # if correct distance apart
                                    moves = np.random.random() <= probability # attempt to move
                                    if moves:
                                        ribosome_list[complex].counter = 1
                                        ribosome_list[complex].position = 0
                                    else: ribosome_list[complex].counter += 1
                                else: ribosome_list[complex].counter += 1
                        else: ribosome_list[complex].counter += 1
                    else: # if complex > 0 and only an initiation timestep
                        if ribosome_list[complex].position == -1: # if ribosome was not initiated or is recycled
                            if ribosome_list[complex-1].position == -1: # if both ribosomes are at -1
                                moves = np.random.random() <= probability # attempt to move
                                if moves:
                                    ribosome_list[complex].counter = 1
                                    ribosome_list[complex].position = 0
                                else: ribosome_list[complex].counter += 1
                            else: # if only current ribosome is at -1
                                if abs(ribosome_list[complex].position-ribosome_list[complex-1].position) > \
                                ribo_size-1: # if it is the correct distance away from its neighbor to the "right"
                                    moves = np.random.random() <= probability
                                    if moves:
                                        ribosome_list[complex].counter = 1
                                        ribosome_list[complex].position = 0
                                    else: ribosome_list[complex].counter += 1
                                else: ribosome_list[complex].counter += 1
                        else: ribosome_list[complex].counter += 1
            else: # if not initiation step
                if ribosome_list[complex].position == 0: # if at the beginning of an mRNA
                    if step % kE == 0: # if an elongation step
                        if complex == 0: # if first ribosome in the simulation
                            if abs(ribosome_list[complex].position-ribosome_list[len(ribosome_list)-1].position) > \
                            ribo_size-1:
                                ribosome_list[complex] = move(ribosome_list[complex])
                                if ribosome_list[complex].position > gene_length:
                                    ribosome_list[complex].position = -1
                                    ribosome_list[complex].counter = 0
                            elif ribosome_list[complex-1].position == -1:
                                ribosome_list[complex] = move(ribosome_list[complex]) # attempt to move
                                if ribosome_list[complex].position > gene_length: # if the ribosome leaves the ribosome it is recycled
                                    ribosome_list[complex].position = -1
                                    ribosome_list[complex].counter = 0
                            else:
                                ribosome_list[complex].counter += 1
                        else: # if not the first ribosome in the simulation
                            if abs(ribosome_list[complex].position-ribosome_list[complex-1].position) > \
                            ribo_size-1: # if correct distance away from other ribosomes
                                ribosome_list[complex] = move(ribosome_list[complex]) # attempt to move
                                if ribosome_list[complex].position > gene_length: # if the ribosome leaves the ribosome it is recycled
                                    ribosome_list[complex].position = -1
                                    ribosome_list[complex].counter = 0
                            elif ribosome_list[complex-1].position == -1:
                                ribosome_list[complex] = move(ribosome_list[complex]) # attempt to move
                                if ribosome_list[complex].position > gene_length: # if the ribosome leaves the ribosome it is recycled
                                    ribosome_list[complex].position = -1
                                    ribosome_list[complex].counter = 0
                            else:
                                ribosome_list[complex].counter += 1
                    else:
                        ribosome_list[complex].counter += 1 # if not an elongation step then increment amount of time spent in current state
                elif ribosome_list[complex].position > 0: # if away from the beginning of the mRNA
                    if step % kE == 0: # if elongation step
                        if complex == 0:
                            if abs(ribosome_list[complex].position-ribosome_list[len(ribosome_list)-1].position) > \
                            ribo_size-1:
                                ribosome_list[complex] = move(ribosome_list[complex]) # attempt to move
                                if ribosome_list[complex].position > gene_length: # if the ribosome leaves the ribosome it is recycled
                                    ribosome_list[complex].position = -1
                                    ribosome_list[complex].counter = 0
                            elif ribosome_list[complex-1].position == -1:
                                ribosome_list[complex] = move(ribosome_list[complex]) # attempt to move
                                if ribosome_list[complex].position > gene_length: # if the ribosome leaves the ribosome it is recycled
                                    ribosome_list[complex].position = -1
                                    ribosome_list[complex].counter = 0
                            else:
                                ribosome_list[complex].counter += 1
                        else: # if not the first ribosome
                            if abs(ribosome_list[complex].position-ribosome_list[complex-1].position) > \
                            ribo_size-1:
                                ribosome_list[complex] = move(ribosome_list[complex])
                                if ribosome_list[complex].position > gene_length: # if the ribosome leaves the ribosome it is recycled
                                    ribosome_list[complex].position = -1
                                    ribosome_list[complex].counter = 0
                            elif ribosome_list[complex-1].position == -1:
                                ribosome_list[complex] = move(ribosome_list[complex]) # attempt to move
                                if ribosome_list[complex].position > gene_length: # if the ribosome leaves the ribosome it is recycled
                                    ribosome_list[complex].position = -1
                                    ribosome_list[complex].counter = 0
                            else:
                                ribosome_list[complex].counter += 1
                    else:
                        ribosome_list[complex].counter += 1 # if not an elongation step then increment amount of time spent in current state
                else: # if at the -1 position and not initiation
                    ribosome_list[complex].counter += 1 # if the ribosome was in state -1 then increment time spent in current state
    
    return ribosome_list

In [30]:
"""
Description: Creates a list of lists containing
Inputs: ribosomes - the number of ribosomes to be run on a given mRNA
        time - the length of the simulation (sec.)
        ribo_size - the size of the ribosome object (nucleotides)
        probability - the probability that a ribosome will move forward from its current position
        kI - initiation rate
        kE - elongation rate
        n_mRNA - total number of mRNAs
Return: None
"""
def concatenate(ribosomes,time,ribo_size,prob,kI,kE,n_mRNA):    
    occupancy = []
    for mRNAs in range(n_mRNA):
        occupancy.append(simulate(ribosomes,time,ribo_size,prob,kI,kE))
    create_histogram(occupancy)
    return

In [24]:
"""
Initialize the sliders that control length of simulation, number of ribosomes, number of mRNA, size of mRNAs, kI, kE,
    and the probability of moving to the next codon. Update the histogram when Run Interact button is pressed.
"""

initiation = widgets.IntSlider(min = 0, max = 10, value = 1, step = 1, description = 'kI - try/sec.');
elongation = widgets.IntSlider(min = 0, max = 20, value = 1, step = 1, description = 'kE - try/sec.');
prob_move = widgets.FloatSlider(min = 0.00, max = 1.00, value = 0.50, step = 0.01, description = 'P(forward)');
sizes = widgets.IntSlider(min = 25, max = 35, value = 30, step = 1, description = 'Ribo size (nt.)');
secs = widgets.IntSlider(min = 0, max = 10000, value = 100, step = 100, description = 'time (sec.)');
ribo = widgets.IntSlider(min = 0, max = 25, value = 25, step = 5, description = 'n(ribosomes)');
num_cules = widgets.IntSlider(min = 0, max = 5000, value = 1, step = 1, description = 'n(mRNA)');
interact_manual(concatenate,ribosomes=ribo, time=secs, ribo_size=sizes, prob=prob_move, kI=initiation, kE=elongation, n_mRNA=num_cules);