In [6]:
from shoestring.design import Design

import warnings

from Bio import BiopythonParserWarning
warnings.simplefilter('ignore', BiopythonParserWarning)

In [90]:
from shoestring.design import Design
from pyblast.utils import load_fasta_glob, load_genbank_glob, make_circular, make_linear
from shoestring.cost import SpanCost

span_cost = SpanCost()


here = 'tests'
query = 'pmodkan-ho-pact1-z4-er-vpr.gb'
paths = {
        'primers': join(here, "data/test_data/primers/primers.fasta"),
        'templates': join(here, "data/test_data/genbank/templates/*.gb"),
        'queries': join(
            'tests', "data/test_data/genbank/designs/pmodkan-ho-pact1-z4-er-vpr.gb"
        ),
    }
primers = make_linear(load_fasta_glob(paths["primers"]))
templates = load_genbank_glob(paths["templates"])

query_path = join(here, 'data/test_data/genbank/designs', query)
queries = make_circular(load_genbank_glob(query_path))

  self.cost_matrix = (m * CostParams.material + t * CostParams.time) * 1.0 / e


In [93]:
design = Design(span_cost=span_cost)

design.add_materials(primers=primers, templates=templates, queries=queries)

design.compile()

assert len(design.graphs) == len(queries)
assert len(design.graphs) == 1

paths_dict = design.optimize()

CMD: makeblastdb -dbtype nucl -title c64295f9-7dc5-4a4c-9804-68508dd31b37 -out /var/folders/fr/yc9x0z2s39lcj4f9d95wvrpr0000gn/T/tmpg3sepaz4/c64295f9-7dc5-4a4c-9804-68508dd31b37 -in /var/folders/fr/yc9x0z2s39lcj4f9d95wvrpr0000gn/T/tmpsxed8xuu.fasta
CMD: blastn -db /var/folders/fr/yc9x0z2s39lcj4f9d95wvrpr0000gn/T/tmpg3sepaz4/c64295f9-7dc5-4a4c-9804-68508dd31b37 -out /var/folders/fr/yc9x0z2s39lcj4f9d95wvrpr0000gn/T/tmpg3sepaz4/tmpd565jfnr -query /var/folders/fr/yc9x0z2s39lcj4f9d95wvrpr0000gn/T/tmp_wj9n2iz.fasta -outfmt "7 qacc sacc score evalue bitscore length nident gapopen gaps qlen qstart qend slen sstart send sstrand qseq sseq"
CMD: makeblastdb -dbtype nucl -title 09f5495e-9fc4-4646-ac98-f27b6f53accb -out /var/folders/fr/yc9x0z2s39lcj4f9d95wvrpr0000gn/T/tmpdajgf_m7/09f5495e-9fc4-4646-ac98-f27b6f53accb -in /var/folders/fr/yc9x0z2s39lcj4f9d95wvrpr0000gn/T/tmpeh72j6k_.fasta
CMD: blastn -db /var/folders/fr/yc9x0z2s39lcj4f9d95wvrpr0000gn/T/tmpdajgf_m7/09f5495e-9fc4-4646-ac98-f27b6f53accb -

In [92]:
from more_itertools import pairwise
import pandas as pd
from pyblast.utils import Span, is_circular

def find(a, b, alignments):
    for align in alignments:
        if a == align.query_region.a and b == align.query_region.b:
            yield align
            
rows = []
            
for qk, paths in paths_dict.items():
    G = design.graphs[qk]
    alignments = design.container_factory.alignments[qk]
    record = design.container_factory.seqdb[qk]
    path = paths[0]
    
    for n1, n2 in pairwise(path):
        edata = G[n1][n2]
        cost = edata['weight']
        print(edata)
        if n1[-1] == 'A' and n2[-1] == 'B':
            A = n1[0]
            B = n2[0]
            align = list(find(A, B, alignments))[0]
            sk = align.subject_key
            subject_rec = design.container_factory.seqdb[sk]
            subject_seq = str(subject_rec[align.subject_region.a:align.subject_region.b].seq)
            
            rows.append({
                'query': qk,
                'query_name': record.name,
                'query_region': (align.query_region.a, align.query_region.b),
                'subject': sk,
                'subject_name': subject_rec.name,
                'subject_region': (align.subject_region.a, align.subject_region.b),
                'fragment_length': len(align.subject_region),
                'fragment_seq': subject_seq,
                'cost': cost,
                'type': edata['type']
            })
        else:
            B = n1[0]
            A = n2[0]
            span = Span(B, A, len(record), cyclic=is_circular(record), allow_wrap=True)
            ranges = span.ranges()
            frag_seq = record[ranges[0][0]:ranges[0][1]]
            for r in ranges[1:]:
                frag_seq += record[r[0]:r[1]]
            
            rows.append({
                'query': qk,
                'query_name': record.name,
                'query_region': (B, A),
                'subject': None,
                'subject_name': 'SYNTHESIS',
                'subject_region': None,
                'fragment_length': len(span),
                'fragment_seq': str(frag_seq.seq),
                'cost': cost,
                'type': edata['type']
            })
pd.DataFrame(rows)
            
    


{'weight': 197.33333333333331, 'name': '', 'span_length': 8, 'type': 'JUNCTION_BY_SYNTHESIS'}
{'weight': 90, 'name': '', 'span_length': 1693, 'type': 'PCR_PRODUCT'}
{'weight': 197.33333333333331, 'name': '', 'span_length': 27, 'type': 'JUNCTION_BY_SYNTHESIS'}
{'weight': 90, 'name': '', 'span_length': 7680, 'type': 'PCR_PRODUCT'}


Unnamed: 0,cost,fragment_length,fragment_seq,query,query_name,query_region,subject,subject_name,subject_region,type
0,197.333333,8,GTAGTGGA,c65f4862-7afe-4963-84ea-bca5ef3428ed,pMODKan-HO-pACT1-Z4-,"(4219, 4227)",,SYNTHESIS,,JUNCTION_BY_SYNTHESIS
1,90.0,1693,AGCAGGGCTGACCCCAAGAAGAAGAGGAAGGTGTCGCCAGGGATCC...,c65f4862-7afe-4963-84ea-bca5ef3428ed,pMODKan-HO-pACT1-Z4-,"(4227, 5920)",ee52de72-5b5d-4c23-8347-89626c2ad33c,hCas9-VPR,"(4851, 6544)",PCR_PRODUCT
2,197.333333,27,AATAGTGATACCGTCGACCTCGAGTCA,c65f4862-7afe-4963-84ea-bca5ef3428ed,pMODKan-HO-pACT1-Z4-,"(5920, 5947)",,SYNTHESIS,,JUNCTION_BY_SYNTHESIS
3,90.0,7680,,c65f4862-7afe-4963-84ea-bca5ef3428ed,pMODKan-HO-pACT1-Z4-,"(5947, 4219)",8fa0ff62-da58-4fe5-836b-ad95cc1afb93,pMODKan-HO-pACT1-ZEV4,"(4422, 4219)",PCR_PRODUCT


In [71]:
span.ranges()


[(0, 281)]

In [73]:
span.b

281