# gRNA Generator 
Problem: We want to generate all possible gRNA molecules in a gene with a specific insertion site. 

Solution: 
- First we locate all protospacer adjacent motifs (PAMs) in the gene (in both transcription directions)
- We then go 20 basepairs towards the 5' end 
- The complement of this 20 basepair region is our corresponding gRNA for the PAM
- Once the gRNAs are generated for all PAMS, we have to account for the distance between the gRNA/PAM and the insertion site
    - We sort the final dataframe by this parameter and return the top `num_rows` number of candidates. 

This allows for researchers to quickly generate a large list of viable gRNA molecules that can be used in CRISPR-Cas9 assays. Sometimes the closest gRNA molecule is not the best candidate for a particular insertion site. 

1. gRNA calculator

In [1]:
from Bio import SeqIO
from Bio import Seq
import csv
import numpy
import pandas as pd

def gRNA_gen(chromosome_file, gene_start, gene_end, insert, direction, num_rows):
    """
    The purpose of this function is to return n number (inputted) of optimal PAMs 
    within a gene given a certain insertion site. Inputs needed are a genbank
    file of the chromosome, the start and ending positions of the gene on the
    chromosome, the insertion site (with relation to the entire chromosome), and
    n number of optimal PAMs desired in the output. 
    """
    # load chromosome and deal with forward case 
    load = SeqIO.read(chromosome_file, 'genbank')
    chromosome = load.seq
    chromosome_length = len(chromosome)
    fwd_gene = chromosome[gene_start - 2:gene_end + 2]
    fwd_insert = insert
    
    # deal with reverse case
    rev_chromosome = chromosome.reverse_complement()
    rev_gene_start = chromosome_length - gene_start
    rev_gene_end = chromosome_length - gene_end
    rev_gene = rev_chromosome[rev_gene_end - 2:rev_gene_start + 2]
    rev_insert = chromosome_length - fwd_insert

    # make a table where PAMs will be entered
    pams = pd.DataFrame(columns = ['PAM', 'position', 'insert dist', 'gRNA',
    'gRNA start', 'gRNA end'])
    if (direction == "+"):
        # forward case 
        for x in range(len(fwd_gene)): 
            # define current position in relation to chromosome 
            current_chromosome_pos = x + (gene_start - 2)
            # calculate distance from current position to insert 
            current_distance = current_chromosome_pos - fwd_insert
            # define current search (3 nucleotides)
            current_search = fwd_gene[x:x+3] 
            # check to see if the current_search is a PAM 
            if((current_search == 'AGG') or (current_search == 'TGG') or (current_search == 'GGG') or (current_search == 'CGG')):
                # make gRNA molecule
                gRNA_start = current_chromosome_pos - 20
                gRNA_end = current_chromosome_pos
                gRNA = chromosome[gRNA_start:gRNA_end]
                # append PAM to dataframe
                pams = pams.append({'PAM': str(current_search), 'position': 
                    current_chromosome_pos, 'insert dist': current_distance,
                    'gRNA': str(gRNA), 'gRNA start': gRNA_start, 'gRNA end':
                    gRNA_end}, ignore_index=True)
    if(direction == "-"):
        # reverse case 
        for x in range (len(rev_gene)): 
            # define current position in relation to chromosme 
            current_chromosome_pos = x + (rev_gene_start - 2)
            # calculate distance from current position to insert 
            current_distance = current_chromosome_pos - rev_insert
            # define current search (3 nucleotides)
            current_search = rev_gene[x:x+3] 
            # check to see if the current_search is a PAM 
            if((current_search == 'CCA') or (current_search == 'CCT') or (current_search == 'CCG') or (current_search == 'CCC')):
                # make gRNA molecule
                gRNA_start = current_chromosome_pos - 20
                gRNA_end = current_chromosome_pos
                gRNA = chromosome[gRNA_start:gRNA_end]
                # append PAM to dataframe
                pams = pams.append({'PAM': str(current_search), 'position': 
                    current_chromosome_pos, 'insert dist': current_distance, 
                    'gRNA': str(gRNA), 'gRNA start': gRNA_start, 'gRNA end':
                    gRNA_end}, ignore_index=True) 
    

    # sort dataframe based on distance between insert and PAMs
    pams = pams.sort_values(by = 'insert dist') 

    # return num_rows of 'pams' dataframe 
    return pams.head(num_rows)

gene: ADE2 (-)

position: 565619

n: 10

In [4]:
file = 'chromosome 15.flat'          # chromosome file associated with ADE2
start = 564475                       # start of gene on chromosome
end = 566191                         # end of gene on chromosome
insert = 565619                      # insert site 
num_rows = 10                        # num_rows
direction = "-"                      # direction of transcribed gene

table = gRNA_gen(file, start, end, insert, direction, num_rows) 
print(table)


   PAM position insert dist                  gRNA gRNA start gRNA end
0  CCT   526927        1255  CCGTAAACGAATACGTAAGT     526907   526927
1  CCA   526931        1259  AAACGAATACGTAAGTACGA     526911   526931
2  CCA   526949        1277  GAAATTACAAAGCATGTCTG     526929   526949
3  CCA   526956        1284  CAAAGCATGTCTGATAAGTT     526936   526956
4  CCT   526970        1298  TAAGTTCACATCAACCTCCA     526950   526970
5  CCA   526976        1304  CACATCAACCTCCAACATTA     526956   526976
6  CCT   526981        1309  CAACCTCCAACATTAACTTG     526961   526981
7  CCT   527050        1378  CAAAGTGGTTATTTCCAATC     527030   527050
8  CCC   527080        1408  TTAAACCCGATATTGTTGTT     527060   527080
9  CCA   527081        1409  TAAACCCGATATTGTTGTTT     527061   527081


  pams = pams.append({'PAM': str(current_search), 'position':
  pams = pams.append({'PAM': str(current_search), 'position':
  pams = pams.append({'PAM': str(current_search), 'position':
  pams = pams.append({'PAM': str(current_search), 'position':
  pams = pams.append({'PAM': str(current_search), 'position':
  pams = pams.append({'PAM': str(current_search), 'position':
  pams = pams.append({'PAM': str(current_search), 'position':
  pams = pams.append({'PAM': str(current_search), 'position':
  pams = pams.append({'PAM': str(current_search), 'position':
  pams = pams.append({'PAM': str(current_search), 'position':
  pams = pams.append({'PAM': str(current_search), 'position':
  pams = pams.append({'PAM': str(current_search), 'position':
  pams = pams.append({'PAM': str(current_search), 'position':
  pams = pams.append({'PAM': str(current_search), 'position':
  pams = pams.append({'PAM': str(current_search), 'position':
  pams = pams.append({'PAM': str(current_search), 'position':
  pams =