In [12]:
import re
import json
import logging
import requests
import itertools
import numpy as np
import seaborn as sns
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt

from io import StringIO
from pprint import pprint
from Bio.Seq import Seq
from Bio.Alphabet import generic_dna
from IPython.display import display, Image, HTML, SVG

def uprint(astr): print(astr + "\n" + "-"*len(astr))
def show_html(astr): return display(HTML('{}'.format(astr)))
def show_svg(astr, w=1000, h=1000):
    SVG_HEAD = '''<?xml version="1.0" standalone="no"?><!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">'''
    SVG_START = '''<svg viewBox="0 0 {w:} {h:}" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink= "http://www.w3.org/1999/xlink">'''
    return display(SVG(SVG_HEAD + SVG_START.format(w=w, h=h) + astr + '</svg>'))

def file_get_lines(filename):
    with open(filename) as f:
        return f.readlines()

def table_print(rows, header=True):
    html = ["<table>"]
    html_row = "</td><td>".join(k for k in rows[0])
    html.append("<tr style='font-weight:{}'><td>{}</td></tr>".format('bold' if header is True else 'normal', html_row))
    for row in rows[1:]:
        html_row = "</td><td>".join(row)
        html.append("<tr style='font-family:monospace;'><td>{:}</td></tr>".format(html_row))
    html.append("</table>")
    show_html(''.join(html))

def clean_seq(dna):
    dna = re.sub("\s","",dna)
    #print(dna)
    assert all(nt in "ACGTN" for nt in dna)
    return Seq(dna, generic_dna)

def clean_aas(aas):
    aas = re.sub("\s","",aas)
    assert all(aa in "ACDEFGHIKLMNPQRSTVWY*" for aa in aas)
    return aas

def Images(images, header=None, width="100%"): # to match Image syntax
    if type(width)==type(1): width = "{}px".format(width)
    html = ["<table style='width:{}'><tr>".format(width)]
    if header is not None:
        html += ["<th>{}</th>".format(h) for h in header] + ["</tr><tr>"]

    for image in images:
        html.append("<td><img src='{}' /></td>".format(image))
    html.append("</tr></table>")
    show_html(''.join(html))

def new_section(title, color="#66aa33", padding="120px"):
    style = "text-align:center;background:{};padding:{} 10px {} 10px;".format(color,padding,padding)
    style += "color:#ffffff;font-size:2.55em;line-height:1.2em;"
    return HTML('<div style="{}">{}</div>'.format(style, title))

# Show or hide text
HTML("""
<style>
    .section { display:flex;align-items:center;justify-content:center;width:100%; height:400px; background:#6a3;color:#eee;font-size:275%; }
    .showhide_label { display:block; cursor:pointer; }
    .showhide { position: absolute; left: -999em; }
    .showhide + div { display: none; }
    .showhide:checked + div { display: block; }
    .shown_or_hidden { font-size:85%; }
</style>
""")

# Plotting style
plt.rc("axes", titlesize=20, labelsize=15, linewidth=.25, edgecolor='#444444')
sns.set_context("notebook", font_scale=1.2, rc={})
%matplotlib inline
%config InlineBackend.figure_format = 'retina' # or 'svg'

In [2]:
import autoprotocol
import json
from autoprotocol import Unit
from autoprotocol.container import Container
from autoprotocol.protocol import Protocol
from autoprotocol.protocol import Ref # "Link a ref name (string) to a Container instance."
import requests
import logging

# Transcriptic authorization
org_name = 'hgbrian'
tsc_headers = {k:v for k,v in json.load(open("auth.json")).items() if k in ["X_User_Email","X_User_Token"]}

# Transcriptic-specific dead volumes
_dead_volume = [("96-pcr",3), ("96-flat",25), ("96-flat-uv",25), ("96-deep",15),
                ("384-pcr",2), ("384-flat",5), ("384-echo",15),
                ("micro-1.5",15), ("micro-2.0",15)]
dead_volume = {k:Unit(v,"microliter") for k,v in _dead_volume}


def init_inventory_well(well, headers=tsc_headers, org_name=org_name):
    """Initialize well (set volume etc) for Transcriptic"""
    def _container_url(container_id):
        return 'https://secure.transcriptic.com/{}/samples/{}.json'.format(org_name, container_id)

    response = requests.get(_container_url(well.container.id), headers=headers)
    response.raise_for_status()

    container = response.json()
    well_data = container['aliquots'][well.index]
    well.name = "{}/{}".format(container["label"], well_data['name']) if well_data['name'] is not None else container["label"]
    well.properties = well_data['properties']
    well.volume = Unit(well_data['volume_ul'], 'microliter')

    if 'ERROR' in well.properties:
        raise ValueError("Well {} has ERROR property: {}".format(well, well.properties["ERROR"]))
    if well.volume < Unit(20, "microliter"):
        logging.warn("Low volume for well {} : {}".format(well.name, well.volume))

    return True

def touchdown(fromC, toC, durations, stepsize=2, meltC=98, extC=72):
    """Touchdown PCR protocol generator"""
    assert 0 < stepsize < toC < fromC
    def td(temp, dur): return {"temperature":"{:2g}:celsius".format(temp), "duration":"{:d}:second".format(dur)}

    return [{"cycles": 1, "steps": [td(meltC, durations[0]), td(C, durations[1]), td(extC, durations[2])]}
            for C in np.arange(fromC, toC-stepsize, -stepsize)]

def convert_ug_to_pmol(ug_dsDNA, num_nts):
    """Convert ug dsDNA to pmol"""
    return float(ug_dsDNA)/num_nts * (1e6 / 660.0)

def expid(val):
    """Generate a unique ID per experiment"""
    return "{}_{}".format(experiment_name, val)

def µl(microliters):
    """Unicode function name for creating microliter volumes"""
    return Unit(microliters,"microliter")

In [3]:
sfGFP_plus_SD = clean_seq("""
AGGAGGACAGCTATGTCGAAAGGAGAAGAACTGTTTACCGGTGTGGTTCCGATTCTGGTAGAACTGGA
TGGGGACGTGAACGGCCATAAATTTAGCGTCCGTGGTGAGGGTGAAGGGGATGCCACAAATGGCAAAC
TTACCCTTAAATTCATTTGCACTACCGGCAAGCTGCCGGTCCCTTGGCCGACCTTGGTCACCACACTG
ACGTACGGGGTTCAGTGTTTTTCGCGTTATCCAGATCACATGAAACGCCATGACTTCTTCAAAAGCGC
CATGCCCGAGGGCTATGTGCAGGAACGTACGATTAGCTTTAAAGATGACGGGACCTACAAAACCCGGG
CAGAAGTGAAATTCGAGGGTGATACCCTGGTTAATCGCATTGAACTGAAGGGTATTGATTTCAAGGAA
GATGGTAACATTCTCGGTCACAAATTAGAATACAACTTTAACAGTCATAACGTTTATATCACCGCCGA
CAAACAGAAAAACGGTATCAAGGCGAATTTCAAAATCCGGCACAACGTGGAGGACGGGAGTGTACAAC
TGGCCGACCATTACCAGCAGAACACACCGATCGGCGACGGCCCGGTGCTGCTCCCGGATAATCACTAT
TTAAGCACCCAGTCAGTGCTGAGCAAAGATCCGAACGAAAAACGTGACCATATGGTGCTGCTGGAGTT
CGTGACCGCCGCGGGCATTACCCATGGAATGGATGAACTGTATAAA""")
print("Read in sfGFP plus Shine-Dalgarno: {} bases long".format(len(sfGFP_plus_SD)))

sfGFP_aas = clean_aas("""MSKGEELFTGVVPILVELDGDVNGHKFSVRGEGEGDATNGKLTLKFICTTGKLPVPWPTLVTTLTYG
VQCFSRYPDHMKRHDFFKSAMPEGYVQERTISFKDDGTYKTRAEVKFEGDTLVNRIELKGIDFKEDGNILGHKLEYNFNSHNVYITADKQKN
GIKANFKIRHNVEDGSVQLADHYQQNTPIGDGPVLLPDNHYLSTQSVLSKDPNEKRDHMVLLEFVTAAGITHGMDELYK""")
assert sfGFP_plus_SD[12:].translate() == sfGFP_aas
print("Translation matches protein with accession 532528641")

AGGAGGACAGCTATGTCGAAAGGAGAAGAACTGTTTACCGGTGTGGTTCCGATTCTGGTAGAACTGGATGGGGACGTGAACGGCCATAAATTTAGCGTCCGTGGTGAGGGTGAAGGGGATGCCACAAATGGCAAACTTACCCTTAAATTCATTTGCACTACCGGCAAGCTGCCGGTCCCTTGGCCGACCTTGGTCACCACACTGACGTACGGGGTTCAGTGTTTTTCGCGTTATCCAGATCACATGAAACGCCATGACTTCTTCAAAAGCGCCATGCCCGAGGGCTATGTGCAGGAACGTACGATTAGCTTTAAAGATGACGGGACCTACAAAACCCGGGCAGAAGTGAAATTCGAGGGTGATACCCTGGTTAATCGCATTGAACTGAAGGGTATTGATTTCAAGGAAGATGGTAACATTCTCGGTCACAAATTAGAATACAACTTTAACAGTCATAACGTTTATATCACCGCCGACAAACAGAAAAACGGTATCAAGGCGAATTTCAAAATCCGGCACAACGTGGAGGACGGGAGTGTACAACTGGCCGACCATTACCAGCAGAACACACCGATCGGCGACGGCCCGGTGCTGCTCCCGGATAATCACTATTTAAGCACCCAGTCAGTGCTGAGCAAAGATCCGAACGAAAAACGTGACCATATGGTGCTGCTGGAGTTCGTGACCGCCGCGGGCATTACCCATGGAATGGATGAACTGTATAAA
Read in sfGFP plus Shine-Dalgarno: 726 bases long
Translation matches protein with accession 532528641


In [15]:
pUC19_fasta = file_get_lines('puc19fsa.txt')
pUC19_fwd = clean_seq(''.join(pUC19_fasta[1:]))
pUC19_rev = pUC19_fwd.reverse_complement()
assert len(pUC19_fwd) == 2686

pUC19_MCS = clean_seq("GAATTCGAGCTCGGTACCCGGGGATCCTCTAGAGTCGACCTGCAGGCATGCAAGCTT")
print("Read in pUC19: {} bases long".format(len(pUC19_fwd)))
assert pUC19_MCS in pUC19_fwd
print("Found MCS/polylinker")

#check that the res sites exist once or are (or are one after the other in reverse and forward strange)
REs = {"EcoRI":"GAATTC", "BamHI":"GGATTC"}
for rename, res in REs.items():
    assert (pUC19_fwd.find(res) == pUC19_fwd.rfind(res) and
            pUC19_rev.find(res) == pUC19_rev.rfind(res))
    assert (pUC19_fwd.find(res) == -1 or pUC19_rev.find(res) == -1 or
            pUC19_fwd.find(res) == len(pUC19_fwd) - pUC19_rev.find(res) - len(res))
print("Asserted restriction enzyme sites present only once: {}".format(REs.keys()))

lacZ = pUC19_rev[2217:2541]
print("lacZα sequence:\t{}".format(lacZ))
print("r_MCS sequence:\t{}".format(pUC19_MCS.reverse_complement()))

lacZ_p = lacZ.translate()

#* is a stop
assert lacZ_p[0] == "M" and not "*" in lacZ_p[:-1] and lacZ_p[-1] == "*"
assert pUC19_MCS.reverse_complement() in lacZ
assert pUC19_MCS.reverse_complement() == pUC19_rev[2234:2291]
print("Found MCS once in lacZ sequence")

Read in pUC19: 2686 bases long
Found MCS/polylinker
Asserted restriction enzyme sites present only once: dict_keys(['EcoRI', 'BamHI'])
lacZα sequence:	ATGACCATGATTACGCCAAGCTTGCATGCCTGCAGGTCGACTCTAGAGGATCCCCGGGTACCGAGCTCGAATTCACTGGCCGTCGTTTTACAACGTCGTGACTGGGAAAACCCTGGCGTTACCCAACTTAATCGCCTTGCAGCACATCCCCCTTTCGCCAGCTGGCGTAATAGCGAAGAGGCCCGCACCGATCGCCCTTCCCAACAGTTGCGCAGCCTGAATGGCGAATGGCGCCTGATGCGGTATTTTCTCCTTACGCATCTGTGCGGTATTTCACACCGCATATGGTGCACTCTCAGTACAATCTGCTCTGATGCCGCATAG
r_MCS sequence:	AAGCTTGCATGCCTGCAGGTCGACTCTAGAGGATCCCCGGGTACCGAGCTCGAATTC
MTMITPSLHACRSTLEDPRVPSSNSLAVVLQRRDWENPGVTQLNRLAAHPPFASWRNSEEARTDRPSQQLRSLNGEWRLMRYFLLTHLCGISHRIWCTLSTICSDAA*
Found MCS once in lacZ sequence
