# Download genomes of interest from NCBI
 - find your assembly accession on the ncbi website
 - update the "acessions" dictionary (multiple genomes can be processed together) 
 - run to download and process the files

In [2]:
from genome_handler import downloadNcbiGenomes

# Where you genomes will be saved
genome_database_dir = r'/home/labs/danielda/danielda/scripts/probe_designer/genomes'

# organism_name : accession
accessions = {
    'pheobacter' : 'ASM15476v2',
                  'ecoli' : 'ASM584v2',
                  'fusarium' : 'ASM14995v2',
                  'ostreococcus_tauri' : 'GCF_000214015.3',
                  'os_virus_sOtV5' : 'GCF_000872425.2'
                  }


downloadNcbiGenomes(
                   accessions = accessions,
                   genome_database_dir = genome_database_dir, #specifiy where to store your genome info
                   is_refseq_format = True, # False -> Use GenBank names and locus_tags
                   )



downloading: pheobacter -> ASM15476v2
downloading: ecoli -> ASM584v2
downloading: fusarium -> ASM14995v2
downloading: ostreococcus_tauri -> GCF_000214015.3
downloading: os_virus_sOtV5 -> GCF_000872425.2


# For manually downloaded genomes
 - make sure directory name ends with the accession



In [None]:
from genome_handler import processDownloadedGenome

# Where you genome directory is stored 
genome_database_dir = r'C:\Users\Daniel\Documents\analysis\probe_designer\genomes'
downloaded_genome_path = f'{genome_database_dir}/Rhizobium sp. 57MFTsu3.2_2228664006'

processDownloadedGenome(
            downloaded_genome_path = downloaded_genome_path,
            accession = '2228664006',
            is_img = True
)

# Identify all specific probes per gene 
 - apply to single organisms or to groups of organisms

In [3]:
from probe_designer import findSpecificProbes


# choose your organism (or multiple ones for cross-design)
accessions = {
    # 'pheobacter' : 'ASM15476v2',
                  'ecoli' : 'ASM584v2',
                #   'fusarium' : 'ASM14995v2',
                #   'ostreococcus_tauri' : 'GCF_000214015.3',
                #   'os_virus_sOtV5' : 'GCF_000872425.2'
                  } 

findSpecificProbes(
            run_name = 'ecoli_test', #what your output dir will be called
            genome_database_dir = r'/home/labs/danielda/danielda/scripts/probe_designer/genomes', #dir with all of your downloaded genomes
            probes_dir = r'/home/labs/danielda/danielda/scripts/probe_designer/probes',
            accessions = accessions,
            probe_props = {'probe_len': 30,'gc_min': 40,'gc_max': 65,'max_base_rep': 4 }, # Control the probe design properties (default values)
            max_nonspecific_match = 18, # control specificity
            is_allow_gene_duplicates = False, # allow cross-hyb between duplicates?
            is_parallel = True,
            chunk_size = 500000,
            lsf_scripts_dir = r'/home/labs/danielda/danielda/scripts/probe_designer/lsf_scripts',
            is_overwrite = False,

            )

Designing probes (parallel mode)...
 - finding gene duplicates
 - generate naive probes
 - blasting naive probes
 - determining probe specificities


# Identify the maximum number of probes per gene

In [4]:
from probe_designer import findMaxProbesPerGene

accessions = {
            # 'fusarium' : 'ASM14995v2', 
            # 'phaeobacter_inhibens_DSM_17395' : 'ASM15476v2',
            'ecoli_k12' : 'ASM584v2',
            # 'ostreococcus_tauri' : 'GCF_000214015.3',
            # 'os_virus_sOtV5' : 'GCF_000872425.2',
            # 'rhizobium' : '2228664006'    
            }

findMaxProbesPerGene(
                    run_name = 'ecoli_test',
                    accessions = accessions,
                    genome_database_dir = r'/home/labs/danielda/danielda/scripts/probe_designer/genomes',
                    probes_dir = r'/home/labs/danielda/danielda/scripts/probe_designer/probes',
                    min_probe_distance = 1, # min distance between probes (negative numbers allow overlap)
                    is_exon_only = True 
                    )

ecoli_k12 ASM584v2


# assemble probes

In [None]:
from probe_assembler import ProbeAssembler


accessions = {
            # 'fusarium' : 'ASM14995v2', 
            # 'phaeobacter_inhibens_DSM_17395' : 'ASM15476v2',
            # 'ecoli_k12' : 'ASM584v2',
            # 'ostreococcus_tauri' : 'GCF_000214015.3',
            # 'os_virus_sOtV5' : 'GCF_000872425.2',
            'rhizobium' : '2228664006'     
            }

# initiate the assembler
probe_designer = ProbeAssembler(
    run_dir = r'C:\Users\Daniel\Documents\analysis\probe_designer\probes\rhizobium_IMG',
    probe_per_gene = 'rhizobium.probes_per_gene.txt', #
    primary_probes = 'rhizobium.max_probes.txt', # the probes
    selected_gene_list = 'selected_genes.xlsx', #a text file with locus_tag column
    is_amplification = False, #True if using Twist
    num_of_flanking = 2, #choose 2 or 4
    max_probes_per_ch = {'fluor_640nm' : 25, 'fluor_560nm' : 25, 'fluor_488nm' : 30}, #
    probes_to_remove = ['R6','R21','R36','R107'], # problematic ROs to remove # or readouts that were used in another set
    reference_genes = [],
    genes_of_interest = [], #genes to priorize in fluor_640nm
    positive_control_genes = { }, # gene or operon where probes for 3 channels will be generated
    negative_control_genes = { }, # gene or operon where probes for 3 channels will be generated #'genes': ['P1_gp003','P1_gp013','P1_gp078'], 'readouts' : {'A647': 4,'A488' : 5, 'A550' : 15}
)

probe_designer.design()
