# Mutagenesis oligo generator

This code will receive the following:
- FASTA file with a gene of interest
- Position to mutate
- New residue at that position

It will generate sequences for oligonucleotides that can help us get the mutation.

In [1]:
import csv
from collections import OrderedDict
from Bio import SeqIO
from Bio import Seq

## 1.- Load the sequence

In [2]:
target_gene = SeqIO.parse(<input_sequence>, 'fasta')
target_seq = str(target_gene.next().seq)

In [3]:
target_seq

'ATGTCGGCAAACGATAAGCAATACATCTCGTACAACAACGTACATCAACTATGTCAAGTATCCGCTGAGAGAATTAAGAATTTCAAGCCGGACTTAATCATTGCCATTGGTGGTGGTGGTTTCATTCCTGCTAGGATCCTACGTACGTTCCTAAAGGAGCCCGGCGTGCCAACCATCAGAATTTTTGCTATTATTTTGTCTTTGTACGAAGATTTGAACAGTGTAGGCTCAGAAGTTGAGGAAGTTGGTGTTAAGGTTAGCAGAACACAATGGATTGATTACGAGCAATGTAAATTAGATCTAGTCGGCAAGAACGTTCTTATCGTTGACGAAGTCGATGACACCCGTACCACACTTCATTACGCTTTGAGTGAATTGGAAAAGGATGCAGCTGAACAGGCAAAGGCTAAAGGTATCGATACTGAAAAGTCTCCAGAGATGAAAACAAACTTCGGGATTTTTGTTCTACACGATAAGCAAAAACCAAAGAAAGCAGATTTGCCTGCCGAAATGTTGAATGACAAGAACCGTTATTTTGCAGCTAAAACTGTTCCAGACAAGTGGTATGCATATCCATGGGAATCTACTGACATTGTTTTCCATACTAGAATGGCTATTGAACAGGGCAATGACATCTTTATTCCTGAGCAGGAACACAAGCAATGA'

## 2.- Split sequence in codons

In [4]:
target_seq_codons = [target_seq[3*i:3*(i+1)]  for i in range(len(target_seq)/3)]

In [5]:
target_seq_codons

['ATG',
 'TCG',
 'GCA',
 'AAC',
 'GAT',
 'AAG',
 'CAA',
 'TAC',
 'ATC',
 'TCG',
 'TAC',
 'AAC',
 'AAC',
 'GTA',
 'CAT',
 'CAA',
 'CTA',
 'TGT',
 'CAA',
 'GTA',
 'TCC',
 'GCT',
 'GAG',
 'AGA',
 'ATT',
 'AAG',
 'AAT',
 'TTC',
 'AAG',
 'CCG',
 'GAC',
 'TTA',
 'ATC',
 'ATT',
 'GCC',
 'ATT',
 'GGT',
 'GGT',
 'GGT',
 'GGT',
 'TTC',
 'ATT',
 'CCT',
 'GCT',
 'AGG',
 'ATC',
 'CTA',
 'CGT',
 'ACG',
 'TTC',
 'CTA',
 'AAG',
 'GAG',
 'CCC',
 'GGC',
 'GTG',
 'CCA',
 'ACC',
 'ATC',
 'AGA',
 'ATT',
 'TTT',
 'GCT',
 'ATT',
 'ATT',
 'TTG',
 'TCT',
 'TTG',
 'TAC',
 'GAA',
 'GAT',
 'TTG',
 'AAC',
 'AGT',
 'GTA',
 'GGC',
 'TCA',
 'GAA',
 'GTT',
 'GAG',
 'GAA',
 'GTT',
 'GGT',
 'GTT',
 'AAG',
 'GTT',
 'AGC',
 'AGA',
 'ACA',
 'CAA',
 'TGG',
 'ATT',
 'GAT',
 'TAC',
 'GAG',
 'CAA',
 'TGT',
 'AAA',
 'TTA',
 'GAT',
 'CTA',
 'GTC',
 'GGC',
 'AAG',
 'AAC',
 'GTT',
 'CTT',
 'ATC',
 'GTT',
 'GAC',
 'GAA',
 'GTC',
 'GAT',
 'GAC',
 'ACC',
 'CGT',
 'ACC',
 'ACA',
 'CTT',
 'CAT',
 'TAC',
 'GCT',
 'TTG',
 'AGT',
 'GAA',


## 3.- Produce the oligonucleotides

In [16]:
# Save the most abundant codons in a dictionary
# These are for yeast but codon usage data for other organisms can be found at:
# https://www.genscript.com/tools/codon-frequency-table
most_abundant = {
    'C': 'TGT',
    'D': 'GAT',
    'S': 'TCT',
    'Q': 'CAA',
    'K': 'AAA',
    'I': 'ATT',
    'P': 'CCA',
    'T': 'ACT',
    'F': 'TTT',
    'N': 'AAT',
    'G': 'GGT',
    'H': 'CAT',
    'L': 'TTG',
    'R': 'AGA',
    'W': 'TGG',
    'A': 'GCT',
    'V': 'GTT',
    'E': 'GAA',
    'Y': 'TAT',
    'M': 'ATG'
}

In [17]:
# Load a table with mutations
# Should have three columns:
# 1.- Original residue
# 2.- Position
# 3.- Target residue
handle_mutations = open(<input_list_mutations>, 'r')
reader_mut = csv.reader(handle_mutations)
# Skip the first line
header = reader_mut.next()

In [18]:
# Prepare the output file
out_handle = open(<output_file>, 'w')
oligos_writer = csv.writer(out_handle, delimiter = '\t')

oligos_writer.writerow(['Original_res', 'Position', 'Target_res', 'OligoF', 'OligoR'])

for line in reader_mut:
    position = int(line[1])
    new_res = line[2]
    
    # Print the 17 previous bases, the new codon, the following 17 bases
    prev_bases = ''.join(target_seq_codons[position-7:position-1])[1:]
    next_bases = ''.join(target_seq_codons[position:position+6])[:-1]
    oligoF = Seq.Seq(prev_bases + most_abundant[new_res].lower() + next_bases)
    
    # Get the reverse complement
    oligoR = oligoF.reverse_complement()
    
    new_row = [line[0], line[1], line[2], str(oligoF), str(oligoR)]
    oligos_writer.writerow(new_row)
    
out_handle.close()