###  Read native synIXR chromosome. Compile segments. Make scramble chromosome from segment config.

In [73]:
from Bio import SeqIO
from Bio.SeqRecord import SeqRecord
import re
import operator
import pandas as pd

Location of the synIXR sequence file

In [74]:
gbfile = "../synIXR/sequence.gb"

In [75]:
with open(gbfile, mode="r") as f:
    record = SeqIO.read(f, "genbank")

Location of the scramble strains

In [76]:
sfile = "../synIXR/scramble_wpacbio.xlsx"

In [77]:
with open(sfile, mode="r") as f:
    scramble = pd.read_excel(f, sheetname = "Sheet1")
# only keep synIXR chr
scramble = scramble.loc[scramble.loc[:,u'Synthetic chromosome'] == "9R",:]

Compile dict of fragment orders for all scramble strains

In [141]:
scrambleSEQ = {}
for i1, v1 in enumerate(scramble.loc[:,u'solutions']):
    scrambleSEQ[scramble.loc[i1,u'Strain ID']] = {}
    for i2, v2 in enumerate(v1.split("\n")):
        if len(v2.split(",")) > 1:
            scrambleSEQ[scramble.loc[i1,u'Strain ID']][i2+1] = [int(i) for i in v2.split(",") if i != '44' and i != '-44']
        else:
            scrambleSEQ[scramble.loc[i1,u'Strain ID']][i2+1] = range(int(v2.split("-")[0]),int(v2.split("-")[1]),1)

Find features corresponding to loxP sites

In [80]:
loxPsites = {}
p = re.compile("loxPsym", re.IGNORECASE)
for i in record.features:
    if i.type == "misc_feature":
        if re.search(p, i.qualifiers["note"][0]):
            n = i.qualifiers["note"][0].partition(" ")[2]
            loxPsites[n] = i.location

In [81]:
# verify loxPsites
#for k in loxPsites.keys():
#    print record.seq[loxPsites[k].start.position:loxPsites[k].end.position]

Need to shift the sequences to get proper segments. Find last instance of loxPsym site, shift to its location (so it is first bit of sequence)

In [82]:
lastInd, lastVal = max(enumerate([loxPsites[k].start.position for k in loxPsites.keys()]),key=operator.itemgetter(1))
shift = len(record.seq)-lastVal
tmp = record.seq
record.seq = tmp[lastVal:len(tmp)]+tmp[0:len(tmp)-shift]

In [83]:
loxPseq = "ATAACTTCGTATAATGTACATTATACGAAGTTAT"
segments = {(i):j for i, j in enumerate(record.seq[0:len(record.seq)].split(loxPseq)) if len(j)>0}

In [91]:
def seg2seq(segments = segments, segmentOrder = [1,2,3], file = None, sname = "", sid = "", desc = ""):
    loxPseq = "ATAACTTCGTATAATGTACATTATACGAAGTTAT"
    # assume loxPseq site before first base
    fseq = loxPseq
    for ind, val in enumerate(segmentOrder):
        thisseq = segments[abs(val)]
        if val < 0:
            # inversion
            thisseq = thisseq[::-1]
        if ind == len(segmentOrder)-1:
            fseq = fseq + thisseq
        else:
            fseq = fseq + thisseq + loxPseq
    r = SeqIO.SeqRecord(fseq, id=sid, name=sname,
                 description=desc)
    if file is not None:
        with open(file, "w") as f:
            SeqIO.write(r, f, "fasta")
    return fseq

### Write fa files

In [144]:
dir = "/g/steinmetz/brooks/git/steinmetz-lab/yeast2_0/scramble/seq/"
for k in scrambleSEQ.keys():
    for k2 in scrambleSEQ[k].keys():
        n = k + "_" + str(k2)
        tmp = seg2seq(segmentOrder = scrambleSEQ[k][k2], file = dir + n + ".fa", sid = n, 
              desc = "Sequence starts at loxPsym_3_3_YIL002C, left loxPsym site flanking Segment 1")