# Download genomes of interest from NCBI
 - find your assembly accession on the ncbi website

In [None]:
from genome_handler import downloadNcbiGenomes

# You can download multiple genomes by filling out the accession_list dict
accessions = {'pheobacter' : 'ASM15476v2',
                  'ecoli' : 'ASM584v2',
                  'fusarium' : 'ASM14995v2'
                  } 

downloadNcbiGenomes(
                   accessions = accessions,
                   genome_database_dir = r'C:\Users\Daniel\Documents\analysis\probe_designer\genomes', #specifiy where to store your genome info
                   is_refseq_format = True, # False -> Use GenBank names and locus_tags
                   )

# Identify all specific probes per gene 
 - apply to single organisms or to groups of organisms

In [13]:
from probe_designer import findSpecificProbes


# choose your organism (or multiple ones for cross-design)
accessions = {
            'phaeobacter_inhibens_DSM_17395' : 'ASM15476v2',
            'ecoli_k12' : 'ASM584v2',
            'fusarium' : 'ASM14995v2'     
            }

findSpecificProbes(
            run_name = 'speedy_test', #what your output dir will be called
            genome_database_dir = r'C:\Users\Daniel\Documents\analysis\probe_designer\genomes', #dir with all of your downloaded genomes
            probes_dir = r'C:\Users\Daniel\Documents\analysis\probe_designer\probes',
            accessions = accessions,
            probe_props = {'probe_len': 30,'gc_min': 40,'gc_max': 65,'max_base_rep': 4 }, # Control the probe design properties (default values)
            max_nonspecific_match = 18, # control specificity
            is_allow_gene_duplicates = True, # allow cross-hyb between duplicates?
            
            max_genes_test_parm = 10 #remove when done with testing ###
            )

Designing probes...
 - generate naive probes
 - blasting naive probes
 - determining probe specificities


# Identify the maximum number of probes per gene

In [12]:
from probe_designer import findMaxProbesPerGene

accessions = {
            'fusarium' : 'ASM14995v2', 
            'phaeobacter_inhibens_DSM_17395' : 'ASM15476v2',
            'ecoli_k12' : 'ASM584v2'     
            }

findMaxProbesPerGene(
                    run_name = 'speedy_test',
                    accessions = accessions,
                    genome_database_dir = r'C:\Users\Daniel\Documents\analysis\probe_designer\genomes',
                    probes_dir = r'C:\Users\Daniel\Documents\analysis\probe_designer\probes',
                    min_probe_distance = -5, # min distance between probes (negative numbers allow overlap)
                    method = 'dp', # dp | heuristic
                    is_exon_only = False # only for euks
                    )

Probe overlap enabled for: -5 bp
fusarium ASM14995v2
phaeobacter_inhibens_DSM_17395 ASM15476v2
ecoli_k12 ASM584v2


# assemble probes

In [None]:
from probe_assembler import ProbeAssembler


accessions = {
            # 'fusarium' : 'ASM14995v2', 
            'phaeobacter_inhibens_DSM_17395' : 'ASM15476v2',
            'ecoli_k12' : 'ASM584v2'     
            }

# initiate the assembler
probe_designer = ProbeAssembler(
    run_dir = r'C:\Users\Daniel\Documents\analysis\probe_designer\probes\ecoli_and_phaeobacter_test',
    probe_per_gene = 'ecoli_k12.dp.probes_per_gene.txt', #
    primary_probes = 'ecoli_k12.dp.max_probes.txt', # the probes
    selected_gene_list = 'ecoli_genes_to_select.txt', #a text file with locus_tag column
    is_amplification = True, #True if using Twist
    num_of_flanking = 2, #choose 2 or 4
    max_probes_per_ch = {'fluor_640nm' : 16, 'fluor_560nm' : 18, 'fluor_488nm' : 30}, #
    probes_to_remove = ['R6','R21','R36','R107'], # problematic ROs to remove
    reference_genes = [],
    genes_of_interest = [], #genes to priorize in fluor_640nm
    positive_control_genes = { 'dnaA-dnaN': {'genes': ['b3702','b3701'], 'readouts' : {'fluor_640nm': 4,'fluor_488nm' : 5, 'fluor_560nm' : 9} } }, # gene or operon where probes for 3 channels will be generated
    negative_control_genes = { }, # gene or operon where probes for 3 channels will be generated #'genes': ['P1_gp003','P1_gp013','P1_gp078'], 'readouts' : {'A647': 4,'A488' : 5, 'A550' : 15}
)

probe_designer.design()
