In [12]:
from shoestring.design import Design

import warnings

from Bio import BiopythonParserWarning
warnings.simplefilter('ignore', BiopythonParserWarning)

In [13]:
from shoestring.design import Design
from pyblast.utils import load_fasta_glob, load_genbank_glob, make_circular, make_linear
from shoestring.cost import SpanCost
from os.path import join

span_cost = SpanCost()


here = 'tests'
query = 'pins-01-hu6-sv40-nt1-optgrna.gb'
paths = {
        'primers': join(here, "data/test_data/primers/primers.fasta"),
        'templates': join(here, "data/test_data/genbank/templates/*.gb"),
        'queries': join(
            'tests', "data/test_data/genbank/designs/pmodkan-ho-pact1-z4-er-vpr.gb"
        ),
    }
primers = make_linear(load_fasta_glob(paths["primers"]))
templates = load_genbank_glob(paths["templates"])

query_path = join(here, 'data/test_data/genbank/designs', query)
queries = make_circular(load_genbank_glob(query_path))

  self.cost_matrix = (m * CostParams.material + t * CostParams.time) * 1.0 / e


In [14]:
design = Design(span_cost=span_cost)

design.add_materials(primers=primers, templates=templates, queries=queries)

design.compile()

assert len(design.graphs) == len(queries)
assert len(design.graphs) == 1

paths_dict = design.optimize()

CMD: makeblastdb -dbtype nucl -title b2e0bc59-26c3-4960-98ca-b0f90b080150 -out /var/folders/fr/yc9x0z2s39lcj4f9d95wvrpr0000gn/T/tmpjp7cx49r/b2e0bc59-26c3-4960-98ca-b0f90b080150 -in /var/folders/fr/yc9x0z2s39lcj4f9d95wvrpr0000gn/T/tmpzv4lscrw.fasta
CMD: blastn -db /var/folders/fr/yc9x0z2s39lcj4f9d95wvrpr0000gn/T/tmpjp7cx49r/b2e0bc59-26c3-4960-98ca-b0f90b080150 -out /var/folders/fr/yc9x0z2s39lcj4f9d95wvrpr0000gn/T/tmpjp7cx49r/tmp7vkh_qj5 -query /var/folders/fr/yc9x0z2s39lcj4f9d95wvrpr0000gn/T/tmpdg3rnk9w.fasta -outfmt "7 qacc sacc score evalue bitscore length nident gapopen gaps qlen qstart qend slen sstart send sstrand qseq sseq"
CMD: makeblastdb -dbtype nucl -title cbdff2b6-a605-46a7-8a65-481920c6a4e8 -out /var/folders/fr/yc9x0z2s39lcj4f9d95wvrpr0000gn/T/tmp6uvvjbo4/cbdff2b6-a605-46a7-8a65-481920c6a4e8 -in /var/folders/fr/yc9x0z2s39lcj4f9d95wvrpr0000gn/T/tmpqkfx8nxi.fasta
CMD: blastn -db /var/folders/fr/yc9x0z2s39lcj4f9d95wvrpr0000gn/T/tmp6uvvjbo4/cbdff2b6-a605-46a7-8a65-481920c6a4e8 -

In [15]:
from more_itertools import pairwise
import pandas as pd
from pyblast.utils import Span, is_circular

def find(a, b, alignments):
    for align in alignments:
        if a == align.query_region.a and b == align.query_region.b:
            yield align
            
rows = []
            
for qk, paths in paths_dict.items():
    G = design.graphs[qk]
    alignments = design.container_factory.alignments[qk]
    record = design.container_factory.seqdb[qk]
    path = paths[0]
    
    for n1, n2 in pairwise(path):
        edata = G[n1][n2]
        cost = edata['weight']
        print(edata)
        if n1[-1] == 'A' and n2[-1] == 'B':
            A = n1[0]
            B = n2[0]
            align = list(find(A, B, alignments))[0]
            sk = align.subject_key
            subject_rec = design.container_factory.seqdb[sk]
            subject_seq = str(subject_rec[align.subject_region.a:align.subject_region.b].seq)
            
            rows.append({
                'query': qk,
                'query_name': record.name,
                'query_region': (align.query_region.a, align.query_region.b),
                'subject': sk,
                'subject_name': subject_rec.name,
                'subject_region': (align.subject_region.a, align.subject_region.b),
                'fragment_length': len(align.subject_region),
                'fragment_seq': subject_seq,
                'cost': cost,
                'type': edata['type']
            })
        else:
            B = n1[0]
            A = n2[0]
            span = Span(B, A, len(record), cyclic=is_circular(record), allow_wrap=True)
            ranges = span.ranges()
            frag_seq = record[ranges[0][0]:ranges[0][1]]
            for r in ranges[1:]:
                frag_seq += record[r[0]:r[1]]
            
            rows.append({
                'query': qk,
                'query_name': record.name,
                'query_region': (B, A),
                'subject': None,
                'subject_name': 'SYNTHESIS',
                'subject_region': None,
                'fragment_length': len(span),
                'fragment_seq': str(frag_seq.seq),
                'cost': cost,
                'type': edata['type']
            })
pd.DataFrame(rows)
            
    


{'weight': 0, 'name': '', 'span_length': 777, 'type': 'PRE-MADE DNA FRAGMENT'}
{'weight': 294.55555555555554, 'name': '', 'span_length': 248, 'type': 'JUNCTION_BY_SYNTHESIS'}
{'weight': 60, 'name': '', 'span_length': 3260, 'type': 'PCR_PRODUCT_WITH_LEFT_PRIMER'}
{'weight': 385.0, 'name': '', 'span_length': 494, 'type': 'JUNCTION_BY_SYNTHESIS'}


Unnamed: 0,cost,fragment_length,fragment_seq,query,query_name,query_region,subject,subject_name,subject_region,type
0,0.0,777,GGCCGCGTTGCTGGCGTTTTTCCATAGGCTCCGCCCCCCTGACGAG...,953cd16d-ca38-4fc4-80f2-e26388e475a4,pINS-01-hU6-SV40.NT1-op,"(494, 1271)",522a053b-2dd6-4a23-88fc-b435b31d7476,ColE1_origin,"(0, 777)",PRE-MADE DNA FRAGMENT
1,294.555556,248,TTACCAATGCTTAATCAGTGAGGCACCTATCTCAGCGATCTGTCTA...,953cd16d-ca38-4fc4-80f2-e26388e475a4,pINS-01-hU6-SV40.NT1-op,"(1271, 1519)",,SYNTHESIS,,JUNCTION_BY_SYNTHESIS
2,60.0,3260,ATTGTTGCCGGGAAGCTAGAGTAAGTAGTTCGCCAGTTAATAGTTT...,953cd16d-ca38-4fc4-80f2-e26388e475a4,pINS-01-hU6-SV40.NT1-op,"(1519, 4779)",9244cfe7-e791-4526-9a46-d2b4e82dd705,pINS-0A1-pEF1a-citrine-,"(87, 3347)",PCR_PRODUCT_WITH_LEFT_PRIMER
3,385.0,494,GCCGATACGAAGGTTTTCTCCAGCGAAGGTCGGGCAGGAAGAGGGC...,953cd16d-ca38-4fc4-80f2-e26388e475a4,pINS-01-hU6-SV40.NT1-op,"(4779, 494)",,SYNTHESIS,,JUNCTION_BY_SYNTHESIS
