In [1]:
import coral as cr
from IPython.display import display

# Overview

Example case: swapping in a new gene coding sequence into a yeast plasmid.

Forward design steps:
1. Acquire gene coding sequence of interest
2. Make a mockup of the plasmid you want to build
3. Design the Gibson primers necessary to build the plasmid

Validation of design ('reverse' process):
1. Simulate the PCR on the templates to be used, generates fragments.
2. Simulate the Gibson reaction on the fragments, generates plasmid.
3. Compare simulated constructed plasmid to mockup.

Aquarium submission process:
1. Log into rehearsal server
2. Locate templates for PCR (thing that already exists on Aquarium)
3. Submit primers to Aquarium (save the IDs!)
4. Submit fragments to Aquarium using template + fragment IDs
5. Check your submissions

# Forward Design

### 1: Acquire gene coding sequence of interest

This code comes from yeastmine.yeastgenome.org's query builder. You can run it by changing just the yeast systematic name (starts with Y, ends with C or W). If you want to build a similar query yourself from scratch, use the Query Builder to set one or more constrains and to show one or more columns, then scroll down and click on the 'Python' link.

In [2]:
#!/usr/bin/env python

# This is an automatically generated script to run your query
# to use it you will require the intermine python client.
# To install the client, run the following command from a terminal:
#
#     sudo easy_install intermine
#
# For further documentation you can visit:
#     http://intermine.readthedocs.org/en/latest/web-services/

# The following two lines will be needed in every python script:
from intermine.webservice import Service
service = Service("http://yeastmine.yeastgenome.org/yeastmine/service")

# Get a new query on the class (table) you will be querying:
query = service.new_query("Gene")

# The view specifies the output columns
query.add_view(
    "primaryIdentifier", "secondaryIdentifier", "organism.shortName", "symbol",
    "name", "sequence.residues"
)

# Uncomment and edit the line below (the default) to select a custom sort order:
# query.add_sort_order("Gene.primaryIdentifier", "ASC")

# You can edit the constraint values below
query.add_constraint("secondaryIdentifier", "=", "YJL014W", code = "A")

# Uncomment and edit the code below to specify your own custom logic:
# query.set_logic("A")

for row in query.rows():
    print row["primaryIdentifier"], row["secondaryIdentifier"], row["organism.shortName"], \
        row["symbol"], row["name"], row["sequence.residues"]

S000003551 YJL014W S. cerevisiae CCT3 Chaperonin Containing TCP-1 ATGCAAGCTCCAGTGGTATTTATGAATGCTTCTCAAGAGAGAACTACCGGTCGTCAGGCCCAAATCTCCAATATCACTGCTGCAAAGGCAGTTGCCGATGTCATTCGAACTTGCTTAGGTCCAAAAGCTATGTTAAAGATGTTATTGGATCCTATGGGCGGTCTTGTGTTGACTAACGATGGCCACGCTATTTTAAGGGAAATTGATGTTGCACATCCAGCAGCTAAATCAATGTTGGAGTTATCCCGTACTCAAGACGAAGAGGTTGGTGACGGGACTACAACAGTTATTATTCTAGCTGGTGAGATTTTAGCTCAGTGTGCACCTTATTTGATTGAAAAAAATATTCACCCTGTTATTATTATCCAAGCCTTGAAGAAAGCACTGACTGATGCACTAGAAGTTATCAAACAAGTAAGTAAACCTGTCGATGTGGAAAATGATGCCGCTATGAAAAAATTGATTCAAGCCTCTATCGGTACTAAATATGTCATACATTGGTCAGAGAAAATGTGTGAATTAGCTCTAGACGCTGTTAAGACTGTCCGTAAAGACCTGGGACAAACTGTGGAAGGTGAACCTAATTTTGAAATCGATATCAAAAGATATGTCCGTGTGGAGAAGATTCCTGGGGGGGATGTTCTAGATTCTCGCGTCTTAAAGGGTGTCTTATTGAACAAGGATGTTGTTCATCCAAAGATGTCCCGTCACATAGAGAATCCACGTGTTGTTCTTTTGGATTGTCCACTAGAATATAAGAAGGGTGAATCTCAAACCAACATAGAAATTGAAAAGGAGGAAGATTGGAATAGGATTTTACAAATCGAGGAAGAACAAGTTCAGCTAATGTGTGAACAAATTTTAGCCGTTAGGCCAACACTAGTCATCACTGAAAAAGGTGTTTCAGATCTAGCACAGCATTATTTACTAAAGG

In [3]:
# Get the first value as a dictionary and make it into a
# nice coral.DNA instance.
my_gene = dict(query.rows().next())
gene = cr.DNA(my_gene['Gene.sequence.residues'])
gene.name = my_gene['Gene.secondaryIdentifier']
gene_feature = cr.Feature(gene.name, 0, len(gene), 'CDS')
gene.features.append(gene_feature)
gene

ATGCAAGCTCCAGTGGTATTTATGAATGCTTCTCAAGAGA ... GGTTGATGATATTGTTAGTGGTGTAAGAAAACAAGAATAA
TACGTTCGAGGTCACCATAAATACTTACGAAGAGTTCTCT ... CCAACTACTATAACAATCACCACATTCTTTTGTTCTTATT

### 2: Make a mockup of the plasmid you want to build

In [4]:
backbone_source = cr.seqio.read_dna('./pmod6-pgpd-mcherry.gb')
print type(backbone_source)
print len(backbone_source)
backbone_source.display()

<class 'coral.sequence._dna.DNA'>
5858


In [5]:
mch_feature = backbone_source.select_features('mCherry')[0]
backbone = backbone_source.excise(mch_feature)
backbone.display()

In [6]:
mockup = (gene + backbone).circularize()
mockup
mockup.display()

In [7]:
mockup.features

[YJL014W 'CDS' feature (0 to 1605) on strand 0,
 PP2 'primer_bind' feature (6051 to 6075) on strand 0,
 Actual GPD Promoter 'promoter' feature (6075 to 6730) on strand 0,
 TP 'misc_feature' feature (1605 to 1627) on strand 0,
 CYC1_terminator 'terminator' feature (1627 to 1867) on strand 0,
 pYESTrp_rev_primer 'misc_feature' feature (1636 to 1655) on strand 0,
 CYC1_primer 'misc_feature' feature (1636 to 1655) on strand 0,
 TS 'primer_bind' feature (1867 to 1889) on strand 0,
 URA3 3' UTR '3'UTR' feature (1889 to 2389) on strand 0,
 PmeI(1) 'misc_feature' feature (2389 to 2397) on strand 1,
 M13 Reverse primer 'promoter' feature (2401 to 2418) on strand 0,
 M13_pUC_rev_primer 'misc_feature' feature (2414 to 2437) on strand 0,
 mutant? 'misc_feature' feature (2456 to 2457) on strand 1,
 pBR322_origin 'rep_origin' feature (2788 to 3408) on strand 0,
 mut? 'misc_feature' feature (2816 to 2817) on strand 1,
 Ampicillin 'CDS' feature (3562 to 4423) on strand 1,
 AmpR_promoter 'promoter' fea

### 3: Design the Gibson primers necessary to build the plasmid

The `design.gibson_primers` function designs the two primers that would create a 'Gibson junction' (homology) on two separate fragments - i.e., it designs a reverse primer for fragment 1 and a forward primer for fragment 2.

The `overlap` keyword argument tells the design algorithm to put all of the overhang on just one primer (the one for the gene, in our case)

Note that the `gibson_primers` function creates `coral.Primer` objects, which are containers for the annealing sequence (lowercase), overhang sequence (uppercase), and a specified Tm attribute.

In [8]:
# We choose to put all of the overlap on the gene primers, since backbone
# fragments already exist (we'll use those so as to minimize cost + time)
gene_rev, backbone_fwd = cr.design.gibson_primers(gene, backbone, overlap='right')
backbone_rev, gene_fwd = cr.design.gibson_primers(backbone, gene, overlap='left')

# Note that we don't need to use or order the backbone primers,
# because we already have primers that do that (and their fragments)
gene_primers = [gene_fwd, gene_rev]
gene_primers

[Primer: atcggattctagaactagtggatctacaaaATGCAAGCTCCAGTGGTATTTATG Tm: 65.15,
 Primer: actcgaggtcgacggtatcaTTATTCTTGTTTTCTTACACCACTAACAATATC Tm: 64.19]

# Validation of design ('reverse' process)

### 1: Simulate the PCR on the templates to be used, generates fragments.

In [9]:
# Our gene is on chromosome 10 of the yeast genome
chr10 = cr.seqio.read_dna('./chr10.gb')
gene_amplicon = cr.reaction.pcr(chr10, gene_primers[0], gene_primers[1])

### 2: Simulate the Gibson reaction on the fragments, generates plasmid.

In [10]:
gibsoned = cr.reaction.gibson([gene_amplicon, backbone])

### 3: Compare simulated constructed plasmid to mockup.

In [11]:
# Why is the first comparison False, and the second True?
print gibsoned == mockup
print gibsoned.is_rotation(mockup)

False
True


# Aquarium submission process:

### 1. Log into rehearsal server

In [12]:
import aquariumapi

# The :82 is important! This is where the rehearsal server works.
# NEVER test API-using code on the production server
api = aquariumapi.AquariumAPI('http://54.68.9.194:82/api', 'bolten', 'yourkey')

### 2. Locate template(s) for PCR (thing that already exists on Aquarium)

In [13]:
# The template is a yeast strain. 
# We can use almost any strain in our lab, so we chose
# the yeast strain with sample ID 12103

# We want to work with database models in Aquarium.
# aquariumapi supplies a module for working more easily
# with those models
sample_defs = aquariumapi.models.get_sample_definitions(api)
display(sample_defs)

{u'AssemblyOligo': AssemblyOligo SampleModel,
 u'AssemblyPool': AssemblyPool SampleModel,
 u'E coli strain': E coli strain SampleModel,
 u'Enzyme': Enzyme SampleModel,
 u'Enzyme Buffer': Enzyme Buffer SampleModel,
 u'Fragment': Fragment SampleModel,
 u'Inducer': Inducer SampleModel,
 u'Ladder': Ladder SampleModel,
 u'Plasmid': Plasmid SampleModel,
 u'Primer': Primer SampleModel,
 u'Primer Pool': Primer Pool SampleModel,
 u'Tetra': Tetra SampleModel,
 u'Transformed E coli Strain': Transformed E coli Strain SampleModel,
 u'Yeast Strain': Yeast Strain SampleModel}

In [17]:
# We want to work with a yeast strain
yeastmodel = sample_defs['Yeast Strain']
query = yeastmodel.find({'id': 29})
display(query)
yeastname = query['rows'][0]['name']

{u'result': u'ok',
 u'rows': [{u'created_at': u'2013-10-22T06:33:30-07:00',
   u'data': None,
   u'description': u'Cultures associated with this glycerol stock of W303a are contaminated, grow poorly, and have low transformation efficiency.',
   u'fields': {u'Integrant': u'-none-',
    u'Integrated Marker(s)': u'',
    u'Mating Type': u'a',
    u'Parent': u'-none-',
    u'Plasmid': u'-none-',
    u'Plasmid Marker(s)': u'',
    u'QC Primer1': u'TCYC1_F',
    u'QC Primer2': u'ColonyPCR_URA3_R(pMOD)'},
   u'id': 29,
   u'name': u'W303a faulty',
   u'project': u'Yeast General',
   u'sample_type_id': 5,
   u'updated_at': u'2015-09-01T04:46:11-07:00',
   u'user_id': 16}],

### 3. Submit primers to Aquarium (save the IDs!)

In [20]:
# What info is needed to submit primers?
primermodel = sample_defs['Primer']
display(primermodel.fields)

[['Overhang Sequence', 'string'],
 ['Anneal Sequence', 'string'],
 ['T Anneal', 'number']]

In [21]:
# Name the primers in advance (and supply metadata)
gene_primers[0].name = gene.name + '_Fwd'
gene_primers[0].gene = gene.name
gene_primers[1].name = gene.name + '_Rev'
gene_primers[1].gene = gene.name

In [23]:
# Submit the primers
# IMPORTANT: only run this once if you can
submitted_primers = []
for primer in gene_primers:
    desc = 'Primer to amplify {} with pMOD overhangs.'.format(primer.gene)
    print type(primer.tm), primer.tm
    fields = {'Anneal Sequence': str(primer.anneal),
              'Overhang Sequence': str(primer.overhang)}
    submitted = primermodel.create(primer.name, 
                                   desc,
                                   fields,
                                   'LABW16')
    submitted_primers.append(submitted)
submitted_primers

<type 'float'> 65.1547148567
<type 'float'> 64.1877903446


[{u'result': u'ok',
  u'rows': [{u'created_at': u'2016-01-26T14:54:39-08:00',
    u'data': None,
    u'description': u'Primer to amplify YJL014W with pMOD overhangs.',
    u'field1': u'ATCGGATTCTAGAACTAGTGGATCTACAAA',
    u'field2': u'ATGCAAGCTCCAGTGGTATTTATG',
    u'field3': None,
    u'field4': None,
    u'field5': None,
    u'field6': None,
    u'field7': None,
    u'field8': None,
    u'id': 11799,
    u'name': u'YJL014W_Fwd',
    u'project': u'LABW16',
    u'properties': {u'': None,
     u'Anneal Sequence': u'ATGCAAGCTCCAGTGGTATTTATG',
     u'Overhang Sequence': u'ATCGGATTCTAGAACTAGTGGATCTACAAA',
     u'T Anneal': 0},
    u'sample_type_id': 1,
    u'updated_at': u'2016-01-26T14:54:39-08:00',
    u'user_id': 4}],
 {u'errors': [u'Could not create Sample: Name Samples: must have unique names.'],
  u'result': u'error'}]

### 4. Submit fragments to Aquarium using template + fragment IDs

In [27]:
fragment_model = sample_defs['Fragment']
fragment_model.fields

[['Sequence', 'url'],
 ['Length', 'number'],
 ['Template', 'Plasmid|E coli strain|Fragment|Yeast Strain'],
 ['Forward Primer', 'Primer'],
 ['Reverse Primer', 'Primer'],
 ['Restriction Enzyme(s)', 'string'],
 ['Yeast Marker', 'string']]

In [29]:
fields = {'Length': len(gene_amplicon),
          'Template': yeastname,
          'Forward Primer': gene_primers[0].name,
          'Reverse Primer': gene_primers[1].name}

fragment_model.create(gene.name,
                     '{} with pMOD overhangs'.format(gene.name),
                     fields,
                     'LABW16')

{u'result': u'ok',
 u'rows': [{u'created_at': u'2016-01-26T14:56:47-08:00',
   u'data': None,
   u'description': u'YJL014W with pMOD overhangs',
   u'field1': None,
   u'field2': 1655,
   u'field3': u'W303a faulty',
   u'field4': u'YJL014W_Fwd',
   u'field5': u'YJL014W_Rev',
   u'field6': None,
   u'field7': None,
   u'field8': None,
   u'id': 11800,
   u'name': u'YJL014W',
   u'project': u'LABW16',
   u'properties': {u'': None,
    u'Forward Primer': {u'created_at': u'2016-01-26T14:54:39-08:00',
     u'data': None,
     u'description': u'Primer to amplify YJL014W with pMOD overhangs.',
     u'field1': u'ATCGGATTCTAGAACTAGTGGATCTACAAA',
     u'field2': u'ATGCAAGCTCCAGTGGTATTTATG',
     u'field3': None,
     u'field4': None,
     u'field5': None,
     u'field6': None,
     u'field7': None,
     u'field8': None,
     u'id': 11799,
     u'name': u'YJL014W_Fwd',
     u'project': u'LABW16',
     u'properties': {u'': None,
      u'Anneal Sequence': u'ATGCAAGCTCCAGTGGTATTTATG',
      u'Overha

### 5. Submit plasmid to Aquarium

In [30]:
plasmidmodel = sample_defs['Plasmid']
plasmidmodel.fields

[['Sequence', 'url'],
 ['Sequence Verification', 'url'],
 ['Bacterial Marker', 'string'],
 ['Yeast Marker', 'string'],
 ['Length', 'number'],
 ['Sequencing_primer_ids', 'string']]

In [31]:
fields = {'Bacterial Marker': 'AmpR',
          'Yeast Marker': 'URA3',
          'Length': len(mockup)}

plasmidmodel.create('pMOD6G-{}'.format(gene.name),
                    'Overexpression plasmid for {}'.format(gene.name),
                    fields,
                    'LABW16')

{u'result': u'ok',
 u'rows': [{u'created_at': u'2016-01-26T14:58:16-08:00',
   u'data': None,
   u'description': u'Overexpression plasmid for YJL014W',
   u'field1': None,
   u'field2': None,
   u'field3': u'AmpR',
   u'field4': u'URA3',
   u'field5': 6758,
   u'field6': None,
   u'field7': None,
   u'field8': None,
   u'id': 11801,
   u'name': u'pMOD6G-YJL014W',
   u'project': u'LABW16',
   u'properties': {u'': None,
    u'Bacterial Marker': u'AmpR',
    u'Length': 6758,
    u'Sequence': None,
    u'Sequence Verification': None,
    u'Sequencing_primer_ids': None,
    u'Yeast Marker': u'URA3'},
   u'sample_type_id': 2,
   u'updated_at': u'2016-01-26T14:58:16-08:00',
   u'user_id': 4}],