In [3]:
import pandas as pd

# Import OligoMiner modules.
# from oligodesigner import blockParse

from oligodesigner import io
from oligodesigner import parse
from oligodesigner import sequence

ModuleNotFoundError: No module named 'mygene'

In [2]:
# Configs
# To make your database, see https://www.ncbi.nlm.nih.gov/books/NBK569841/

cdna_database = {
                 'human': '/mnt/heintz-bambi3/WORK/tmurakami/oligo/sequence/human_transcriptome/human_transcriptome_db',
                 'mouse': '/mnt/heintz-bambi3/WORK/tmurakami/oligo/sequence/mouse_transcriptome/mouse_transcriptome_db',
                 'rat'  : '/mnt/heintz-bambi3/WORK/tmurakami/oligo/sequence/rat_transcriptome/rat_transcriptome_db'
}


hcr_b_set = {
    'B1':{
        'splitseq_ll':'GAGGAGGGCAGCAAACGG','splitseq_rr':'GAAGAGTCTTCCTTTACG','anchorseq_ll':'aa','anchorseq_rr':'at',
        'splitseq_lr':'GGCAGCAAACGGGAAGAG','splitseq_rl':'GCATTCTTTCTTGAGGAG','anchorseq_lr':'tt','anchorseq_rl':'tt'
    },
    'B2':{
        'splitseq_ll':'CCTCGTAAATCCTCATCA','splitseq_rr':'ATCATCCAGTAAACCGCC','anchorseq_ll':'aa','anchorseq_rr':'aa',
        'splitseq_lr':'AAATCCTCATCAATCATC','splitseq_rl':'AGCTCAGTCCATCCTCGT','anchorseq_lr':'ta','anchorseq_rl':'ta'
    },
    'B3':{
        'splitseq_ll':'GTCCCTGCCTCTATATCT','splitseq_rr':'CCACTCAACTTTAACCCG','anchorseq_ll':'tt','anchorseq_rr':'tt',
        'splitseq_lr':'GCCTCTATATCTCCACTC','splitseq_rl':'AAAGTCTAATCCGTCCCT','anchorseq_lr':'ta','anchorseq_rl':'aa'
    },
    'B4':{
        'splitseq_ll':'CCTCAACCTACCTCCAAC','splitseq_rr':'TCTCACCATATTCGCTTC','anchorseq_ll':'aa','anchorseq_rr':'at',
        'splitseq_lr':'CCTACCTCCAACTCTCAC','splitseq_rl':'CACATTTACAGACCTCAA','anchorseq_lr':'tt','anchorseq_rl':'at'
    },
    'B5':{
        'splitseq_ll':'CTCACTCCCAATCTCTAT','splitseq_rr':'CTACCCTACAAATCCAAT','anchorseq_ll':'aa','anchorseq_rr':'aa',
        'splitseq_lr':'CCCAATCTCTATCTACCC','splitseq_rl':'CACTTCATATCACTCACT','anchorseq_lr':'at','anchorseq_rl':'aa'
    },
    'B8':{
        'splitseq_ll':'CCTTGATTATCTCGTCTC','splitseq_rr':'CATCACTCGCACTCTACC','anchorseq_ll':'at','anchorseq_rr':'aa',
        'splitseq_lr':'TCTACCGTCAAGTCAAAC','splitseq_rl':'CGTCTCCATCACTCGCAC','anchorseq_lr':'aa','anchorseq_rl':'aa'
    }
}

In [3]:
# Parameter settings
mFISH3D_param = {
    'input_file':'/mnt/heintz-bambi3/WORK/tmurakami/oligo/sequence/slc17a7_hs/HCRv3/slc17a7_hs.fasta',
    'hcr_bnum':'B1',
    'genename':'Slc17a7',
    'species':'human',
    'minimum_offtarget_gap':100,
    'oligo_number': 48
}

oligominer_param = {
    'l':20,
    'L':20,
    'gcPercent':25,
    'GCPercent':75,
    'tm':0,
    'TM':100,
    'X':'AAAAAA,TTTTTT,CCCCCC,GGGGGG',
    'sal':390,#default value of blockParse
    'form':30,
    'sp':1,
    'concA':25,#default value of blockParse
    'concB':25,#default value of blockParse
    'headerVal':None,
    'bedVal':False,
    'OverlapModeVal':False,
    'verbocity':False,
    'reportVal':True,
    'debugVal':False,
    'metaVal':False,
    'outNameVal':None,
    'nn_table':'DNA_NN3'
}

In [4]:
input_file = mFISH3D_param['input_file']
spiecies = mFISH3D_param['species']
transcriptomeDatabase = cdna_database[spiecies]
hcr_initiator_set = hcr_b_set[mFISH3D_param['hcr_bnum']]
genename = mFISH3D_param['genename']
minimum_offtarget_gap = mFISH3D_param['minimum_offtarget_gap']
oligo_number = mFISH3D_param['oligo_number']

# Make path to temp files and result file
(oligominer_fastq, oligominer_fasta, blast_result, oligo_sets, param_file) = io.gen_file_path(input_file)
# Save parameters to csv file.
io.record_param(mFISH3D_param, oligominer_param, param_file)

In [5]:
# Run OligoMiner
blockParse.runSequenceCrawler(input_file, *parse.oligominer_param_parser(oligominer_param))

0 of 2949
132 candidate probes identified in 2.94 kb yielding 44.96 candidates/kb


In [6]:
geneinfo = sequence.fetch_mygene(genename, spiecies)
# Blast after converting fastq to fasta
io.convert_fastq2fasta(oligominer_fastq,oligominer_fasta)
io.add_id(oligominer_fasta)
'''Number of thread can be inserted as a blast parameter.'''
sequence.run_blast(oligominer_fasta,blast_result,transcriptomeDatabase)
# Read blast result file, exclude sequence which is homologous to the gene of interest from result
df_blast = sequence.exclude_self(io.read_blast(blast_result), geneinfo)
# Remove combination if sstart and send are close enough and if they are rective combination.
oligo_to_be_removed = sequence.get_off_targeting_oligo(minimum_offtarget_gap, df_blast)

In [7]:
oligominer_df = io.dataframe_from_oligominer(oligominer_fastq)
selected_oligo_df = sequence.remove_oligo(oligominer_df, oligo_to_be_removed)

interval = sequence.calc_oligo_interval(selected_oligo_df, oligo_number)
selected_oligo_df = pd.concat([selected_oligo_df,interval],axis=1)

# Make oligos with HCR sequences.
res = sequence.add_hcr_seq_v3(selected_oligo_df, hcr_initiator_set)
res.to_csv(oligo_sets, sep=',', index=False, header=True)

In [8]:
res

Unnamed: 0,gene,oligo_id,seq,start,end,interval_after,mean_interval,hcr_seq
0,NM_020309.4,NM_020309.4_0,"(A, C, T, T, G, C, A, G, C, C, T, C, C, T, T, ...",1,20,34.0,,"(G, A, G, G, A, G, G, G, C, A, G, C, A, A, A, ..."
1,NM_020309.4,NM_020309.4_1,"(T, G, G, A, C, C, C, C, G, G, G, A, A, C, C, ...",54,73,59.0,,"(G, C, A, T, T, C, T, T, T, C, T, T, G, A, G, ..."
2,NM_020309.4,NM_020309.4_2,"(C, G, G, C, A, G, G, A, G, C, C, G, C, C, A, ...",132,151,2.0,,"(G, A, G, G, A, G, G, G, C, A, G, C, A, A, A, ..."
3,NM_020309.4,NM_020309.4_3,"(A, G, T, T, C, C, G, C, C, A, G, G, A, G, G, ...",153,172,2.0,,"(G, C, A, T, T, C, T, T, T, C, T, T, G, A, G, ..."
4,NM_020309.4,NM_020309.4_4,"(G, G, A, A, G, C, T, A, G, C, G, G, G, T, C, ...",174,193,2.0,,"(G, A, G, G, A, G, G, G, C, A, G, C, A, A, A, ..."
5,NM_020309.4,NM_020309.4_5,"(T, C, G, G, G, A, A, G, C, T, G, C, A, C, C, ...",195,214,2.0,,"(G, C, A, T, T, C, T, T, T, C, T, T, G, A, G, ..."
6,NM_020309.4,NM_020309.4_6,"(T, G, G, A, G, A, A, G, C, G, G, C, A, G, G, ...",216,235,2.0,,"(G, A, G, G, A, G, G, G, C, A, G, C, A, A, A, ..."
7,NM_020309.4,NM_020309.4_7,"(C, G, G, A, G, A, C, G, C, T, G, G, A, G, C, ...",237,256,4.0,,"(G, C, A, T, T, C, T, T, T, C, T, T, G, A, G, ..."
8,NM_020309.4,NM_020309.4_8,"(G, A, T, G, G, G, C, G, C, C, C, G, G, T, G, ...",260,279,10.0,,"(G, A, G, G, A, G, G, G, C, A, G, C, A, A, A, ..."
9,NM_020309.4,NM_020309.4_9,"(G, G, A, C, C, C, G, C, C, G, G, T, G, G, T, ...",289,308,2.0,,"(G, C, A, T, T, C, T, T, T, C, T, T, G, A, G, ..."
