###  Read native synIXR chromosome. Compile segments. Make scramble chromosome from segment config.

In [1]:
from Bio import SeqIO
import re
import operator

In [2]:
gbfile = "../synIXR/sequence.gb"

In [3]:
with open(gbfile, mode="r") as f:
    record = SeqIO.read(f, "genbank")

Find features corresponding to loxP sites

In [4]:
loxPsites = {}
p = re.compile("loxPsym", re.IGNORECASE)
for i in record.features:
    if i.type == "misc_feature":
        if re.search(p, i.qualifiers["note"][0]):
            n = i.qualifiers["note"][0].partition(" ")[2]
            loxPsites[n] = i.location

In [10]:
# verify loxPsites
#for k in loxPsites.keys():
#    print record.seq[loxPsites[k].start.position:loxPsites[k].end.position]

Need to shift the sequences to get proper segments. Find last instance of loxPsym site, shift to its location (so it is first bit of sequence)

In [6]:
lastInd, lastVal = max(enumerate([loxPsites[k].start.position for k in loxPsites.keys()]),key=operator.itemgetter(1))
shift = len(record.seq)-lastVal
tmp = record.seq
record.seq = tmp[lastVal:len(tmp)]+tmp[0:len(tmp)-shift]

In [31]:
loxPseq = "ATAACTTCGTATAATGTACATTATACGAAGTTAT"
segments = {(i):j for i, j in enumerate(record.seq[0:len(record.seq)].split(loxPseq)) if len(j)>0}

In [32]:
segments

{1: Seq('GCGGCCGCGGCCGGCCGCGATCGCTTTTTAAGCAAGGATTTTCTTAACTTCTTC...TTC', IUPACAmbiguousDNA()),
 2: Seq('CGCATTCAATAACCTTATGTATTTTTACGCGTCACTGCGAACGGATGAATATTG...TTG', IUPACAmbiguousDNA()),
 3: Seq('TGTTTATTTCGAATCTTTTTCTTGGGTAGAACTTTTAACGTTGTCAGACGGCAT...CTC', IUPACAmbiguousDNA()),
 4: Seq('CTATCAAAAATCAGAATCTGAGCCCAGTATGTCATCTAATTCTGCGTTCGATAA...AAT', IUPACAmbiguousDNA()),
 5: Seq('CTTATAAATAATATGTATGAATAAACTCTATAGTATGTATCAAGAGAGAAATAT...AGT', IUPACAmbiguousDNA()),
 6: Seq('TAGCAATCATCTCTTGTATATTAAAAATTCAAAAATTCACTTTTTTTTATTATC...ATA', IUPACAmbiguousDNA()),
 7: Seq('ATTTTTTCTTTCATCGCATATCTTATATTCATATAGCCTAGAAAAAAATAATCA...TAA', IUPACAmbiguousDNA()),
 8: Seq('TATGTTTATTTTATTTTTTTCATATCCGCTCGAATATTGCCTTCGTTTAAGGTT...ACA', IUPACAmbiguousDNA()),
 9: Seq('AAAATTGAACCAAAAGCTATAGTAGTCATATATATATATATACACCCTTTTTAT...CTG', IUPACAmbiguousDNA()),
 10: Seq('GCTTAACGCTCTATTCATAGCAGCACGTATATACCGAGAAGAGCTGCCATAGGG...AAT', IUPACAmbiguousDNA()),
 11: Seq('GGGTCAGAATCTGACAGTTCCGGTAAGTTCTTTGGGGATCCTTCTCTTA

In [21]:
def seg2seq(segments = segments, segmentOrder = [1,2,3]):
    loxPseq = "ATAACTTCGTATAATGTACATTATACGAAGTTAT"
    # assume loxPseq site before first base
    fseq = loxPseq
    for ind, val in enumerate(segmentOrder):
        if ind == len(segmentOrder)-1:
            fseq = fseq + segments[val]
        else:
            fseq = fseq + segments[val] + loxPseq
    return fseq

In [34]:
tmp = seg2seq(segmentOrder = [3,2,1])

In [35]:
segments_check = {(i):j for i, j in enumerate(tmp[0:len(tmp)].split(loxPseq))}

In [36]:
segments_check

{0: Seq('', IUPACAmbiguousDNA()),
 1: Seq('TGTTTATTTCGAATCTTTTTCTTGGGTAGAACTTTTAACGTTGTCAGACGGCAT...CTC', IUPACAmbiguousDNA()),
 2: Seq('CGCATTCAATAACCTTATGTATTTTTACGCGTCACTGCGAACGGATGAATATTG...TTG', IUPACAmbiguousDNA()),
 3: Seq('GCGGCCGCGGCCGGCCGCGATCGCTTTTTAAGCAAGGATTTTCTTAACTTCTTC...TTC', IUPACAmbiguousDNA())}

In [37]:
tmp

Seq('ATAACTTCGTATAATGTACATTATACGAAGTTATTGTTTATTTCGAATCTTTTT...TTC', IUPACAmbiguousDNA())