In [1]:
from subprocess import run
from tempfile import TemporaryDirectory
from pathlib import Path
from Bio.SeqIO import parse,write,read
from Bio.SeqRecord import SeqRecord
from typing import List
def run_orfipy(infile:str,minlen:int=300,
           cpu:int=1,table:int=9,
           conda_env:str='/home/hugheslab1/zfdeng/miniconda3/envs/n4j/'
           )->List[SeqRecord]:
    '''
    table:
        check: orfipy.translation_tables.translation_tables_dict
        ref: https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi?chapter=cgencodes
    
    '''
    with TemporaryDirectory() as tempdir:
        o=run([
            'conda','run',
            '-p',conda_env,
            'orfipy',Path(infile).absolute(),
            '--pep','pep.fa',
            '--min', f'{minlen}',
            '--max', '10000',
            '--procs', f'{cpu}','--table',
            f'{table}','--outdir','orfs_out',
            '--ignore-case'
        ],capture_output=True,cwd=tempdir)
        return list(parse(f'{tempdir}/orfs_out/pep.fa','fasta'))
        



In [2]:
import re
import logging
def match_bec(bec:str):
    '''
    bes: begin/end/strand in the given string
      in shape of [b-e](s)
    '''
    pattern = re.compile(r'\[(\d+)-(\d+)\]\((.+)\)')
    match = pattern.match(bec)
    if match:
        b,e,s = match.groups()
        s='s' if s=='+' else 'a'
        return {'begin':int(b),'end':int(e),'strand':s}
    else:
        logging.error(f'invalid bec string: {bec}')
        return {'begin':-1,'end':-1,'strand':'o'} 

def parse_orfipy_description(description:str):
    o=dict()
    descriptions=description.split()
    o['orf_id']=descriptions[0].split('.')[1]
    o.update(match_bec(descriptions[1]))
    o.update({i.split(':')[0]:i.split(':')[1] for i in descriptions[2:]})
    transannot=str(int(o['frame'][-1])-1)+o['strand']
    o['transannot']=transannot
    o.pop('frame')
    o.pop('strand')
    return o


In [3]:
from typing import Dict,Tuple
import pandas as pd
import numpy as np
split_hits_line=lambda line:[i.strip() for i in [line[:4],line[4:34],line[34:40],
    line[40:48],line[48:56],line[56:63],line[63:69],line[69:74],line[74:85],line[85:94],line[94:]]]

def robust_full(length:int,char:str):
    if length>0:
        return np.full(length,char)
    elif length==0:
        return np.array([],dtype=type(char))
    else:
        logging.error(f'negative length: {length} {char}')
        # return np.array([],dtype=type(char))
        raise ValueError
    
# split_align_line=lambda line:[i.strip() for i in 
#     (line[:16],line[16:21],line[21:102],line[102:107],line[107:])]
def parse_hhr(hhr:str)->Tuple[Dict[str,str],pd.DataFrame,List[dict]]:
    '''
    for the hits_list:
    deletion: denoted with '-';
    insertion: denoted with lower case;
    consensus: =:<-1.5 ; -:-1.5~-0.5; .:-0.5~0.5; 
               +:0.5~1.5; |:>1.5; ' ':mismatch
    '''
    with open(hhr,'r') as f:
        line='holder'
        current_section='meta'
        meta:Dict[str,str]={}
        # parsed_hhr={'head':{},'summary':[],'alignment':[]}
        while line != '':
            line=f.readline()
            if not line.strip():
                continue
            
            if current_section=='meta' and line.startswith(' No Hit'):
                current_section='hits'
                hits_cols=split_hits_line(line)
                hits_list=[]
                continue
            elif current_section=='hits' and line.startswith('No 1'):
                current_section='aligns'
                hits=pd.DataFrame(hits_list,columns=hits_cols)
                # hits.set_index('No',inplace=True)
                aligns_list=[]
                current_hit=0
            
            if current_section=='meta':
                meta[line[:14].strip()]=line[14:].strip()
            elif current_section=='hits':
                hits_list.append(split_hits_line(line))
            elif current_section=='aligns':
                if line.startswith('No '):
                    #wrap last hit
                    if current_hit>0:
                        #
                        left,right=tb-1,model_length-te
                        q_align_array=np.array(list(q_align),dtype=str)
                        t_align_array=np.array(list(t_align),dtype=str)
                        ins=np.where(t_align_array=='-')[0]
                        if len(ins)>0:
                            q_align_array[ins]=np.vectorize(lambda x:x.lower())(q_align_array[ins])
                        q_align_array=''.join(np.hstack([robust_full(left,'-'),q_align_array,robust_full(right,'-')]))
                        consensus_array=''.join(np.hstack([robust_full(left,' '),
                            np.array(list(consensus),dtype=str),robust_full(right,' ')]))
                        #
                        align_meta['P-value']=hits['P-value'][current_hit-1]
                        #
                        aligns_list.append(dict(
                            align_meta=align_meta,
                            q_align=q_align_array,
                            consens=consensus_array,
                            query_begin=qb,query_end=qe,
                            target_begin=tb,target_end=te,
                            model_length=model_length
                        ))
                        # DON'T Delete it. It's the original parse result
                        # aligns_list.append(dict(
                        #     align_meta=align_meta,
                        #     q_align=q_align,
                        #     t_align=t_align,
                        #     consens=consensus,
                        #     qb=qb,qe=qe,tb=tb,te=te,
                        #     model_length=model_length
                        # ))

                    #init this hit
                    current_hit=int(line.strip().replace('No ',''))
                    align_meta={}
                    q_align=''
                    t_align=''
                    consensus=''
                    consensus_flag=0
                    qb=qe=tb=te=model_length=0
                    continue
                elif line.startswith('>'):
                    align_meta['annotation']=line.replace('>','').strip()
                
                elif line.startswith('Probab'):
                    align_meta.update({i.split('=')[0]:i.split('=')[1] 
                            for i in line.strip().split()})
                elif line.startswith('Q ') and not line.startswith('Q Consensus'):
                    s0,s1,s2,s3,s4,s5=line.split()
                    if qb==0:
                        qb=int(s2)
                    qe=int(s4)
                    q_align+=s3
                elif line.startswith('Q Consensus'):
                    seq_len=len(line.split()[3])
                    consensus_flag=1
                elif consensus_flag==1:
                    consensus+=line[22:22+seq_len]
                    consensus_flag=0
                elif line.startswith('T ') and not line.startswith('T Consensus'):
                    s0,s1,s2,s3,s4,s5=line.split()
                    if tb==0:
                        tb=int(s2)
                    te=int(s4)
                    t_align+=s3
                    if model_length==0:
                        model_length=int(s5[1:-1])
    return meta,hits,aligns_list

from typing import Union
def run_hhblits(infile:Union[Path,str,SeqRecord],cpu:int=4,
            blitsbin:str='/home/hugheslab1/zfdeng/pangengraph/hh-suite/build/src/hhblits',
            blitsdb:str='/home/hugheslab1/zfdeng/pangengraph_2/hhs-db/pfam'
            ):
    # infile='tmp/AAbV||GBBW01007738#2s#11629#11886.fasta'
    with TemporaryDirectory() as tempdir:
        if isinstance(infile,SeqRecord):
            infile_path=f'{tempdir}/infile.fasta'
            write(infile,infile_path,'fasta')
        else:
            infile_path=Path(infile).absolute()
        o=run([blitsbin,'-i',infile_path,'-d',blitsdb,'-cpu',str(cpu),'-o','out.hhr','-hide_pred','-hide_dssp'],
            capture_output=True,cwd=tempdir)
        return parse_hhr(f'{tempdir}/out.hhr')
        #,'-o','hahaha.hhr'

def orfindice_to_fastaindice(
    trans_begin:int,trans_end:int,strand:str,orf_begin:int,orf_end:int):
    if strand=='s':
        return trans_begin+orf_begin*3-3,trans_begin+orf_end*3
    elif strand=='a':
        return trans_end-orf_end*3,trans_end-orf_begin*3+3

def merge_align_info(align:dict,description_dict:dict):
    merged_dict={}
    merged_dict['hitannot']=align['align_meta'].pop('annotation')

    #query
    merged_dict['transannot']=description_dict['transannot']
    merged_dict['begin'],merged_dict['end']=orfindice_to_fastaindice(
        int(description_dict['begin']),int(description_dict['end']),
        description_dict['transannot'][-1],
        align['query_begin'],align['query_end']
    )
    merged_dict['align_seq']=align['q_align']
    merged_dict['align_consensus']=align['consens']

    #target info
    merged_dict['hit_begin']=align['target_begin']
    merged_dict['hit_end']=align['target_end']
    merged_dict['model_length']=align['model_length']
    merged_dict['template_neff']=align['align_meta']['Template_Neff']
    #hitscores
    for k in ['Probab','Identities','Similarity', 
            'E-value','P-value',
            'Aligned_cols','Score','Sum_probs']:
        v=align['align_meta'][k]
        v=v.replace('%','') if k == 'Identities' else v
        v=int(v) if k=='Aligned_cols' else float(v)
        v=v/100 if k in ['Probab','Identities','Sum_probs'] else v
        merged_dict[k.lower()]=v
    return merged_dict

def hhblits_annotation(infasta:str,cpu:int=4):
    stem=Path(infasta).stem.replace(':genome','')
    orfs=run_orfipy(infasta,cpu=cpu)
    o=[]
    for orf in orfs:
        orf.id=orf.name=stem+orf.id
        description_dict=parse_orfipy_description(orf.description)
        orf.description=';'.join([f'{k}:{v}' for k,v in description_dict.items()])
        aligns=run_hhblits(orf,cpu=cpu)[2]
        # return description_dict,aligns
        for align in aligns:
            o.append(merge_align_info(align,description_dict))
    return pd.DataFrame(o)
    

In [4]:
infasta='/home/hugheslab1/zfdeng/pangengraph_2/_data/genome_fasta/EBOV||AF086833:genome.fasta'
# annotations=hhblits_annotation(infasta)
orfs=run_orfipy(infasta,cpu=4)

In [5]:
orfs

[SeqRecord(seq=Seq('ISKGMEWQLTCHLQLKDGASGPVSHQRWSIMKLVNGLKTATILKSKNLTGVSVY...LHF'), id='_ORF.1', name='_ORF.1', description='_ORF.1 [6243-6588](+) type:complete length:345 frame:1 start:ATC stop:TGA', dbxrefs=[]),
 SeqRecord(seq=Seq('MEASYERGRPRAARQHSRDGHDHHVRARSSSRENYRGEYRQSRSASQVRVPTVF...GTP'), id='_ORF.2', name='_ORF.2', description='_ORF.2 [8508-9372](+) type:complete length:864 frame:1 start:ATG stop:TAA', dbxrefs=[]),
 SeqRecord(seq=Seq('MAKATGRYNLISPKKDLEKGVVLSDLCNFLVSQTIQGWKVYWAGIEFDVTHKGM...LAI'), id='_ORF.3', name='_ORF.3', description='_ORF.3 [10344-11097](+) type:complete length:753 frame:1 start:ATG stop:TAA', dbxrefs=[]),
 SeqRecord(seq=Seq('MATQHTQYPDARLSSPIVLDQCDLVTRACGLYSSYSLNPQLRNCKLPKHIYRLK...RFD'), id='_ORF.4', name='_ORF.4', description='_ORF.4 [11580-18216](+) type:complete length:6636 frame:1 start:ATG stop:TGA', dbxrefs=[]),
 SeqRecord(seq=Seq('LEIVKRQIARNHKIPSMDSRPQKIWMAPSLTESDMDYHKILTAGLSVQQGIVRQ...HHQ'), id='_ORF.5', name='_ORF.5', description='_ORF.5 [421-26

In [71]:
annotations

Unnamed: 0,hitannot,transannot,begin,end,align_seq,align_consensus,hit_begin,hit_end,model_length,template_neff,probab,identities,similarity,e-value,p-value,aligned_cols,score,sum_probs
0,PF10783.12 ; DUF2599 ; Protein of unknown func...,0s,6249,6291,----------------------------------------------...,...,54,67,99,8.600,0.4397,0.36,0.740,0.89,0.00024,14,23.13,0.118
1,PF11488.11 ; Lge1 ; Transcriptional regulatory...,0s,6447,6582,-----------------MCTKYQERDRVPETLPSIKRVLSSCMIDL...,.-.|+.|.+.+-..+-........+.+.....,18,62,71,8.600,0.2553,0.22,0.266,2.80,0.00075,45,19.62,0.257
2,PF17923.4 ; TetR_C_18 ; Tetracyclin repressor-...,0s,6474,6564,--------------------------------------RVPETLPS...,...+.-.....,39,68,113,4.300,0.0683,0.17,0.214,26.00,0.00530,30,20.24,0.223
3,"PF14764.9 ; SPG48 ; AP-5 complex subunit, vesi...",0s,6462,6531,------------------QERDRVPETLPSIKRVLSSCMID-----...,++.+-++..+|.|||+.....-| ...,19,41,118,6.900,0.0623,0.17,0.130,25.00,0.00600,23,18.54,0.177
4,PF10441.12 ; Urb2 ; Urb2/Npa2 family,0s,6489,6579,----------------------------------------------...,...,155,184,209,8.800,0.0610,0.23,0.335,23.00,0.00610,30,18.88,0.210
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
197,PF07849.14 ; DUF1641 ; Protein of unknown func...,1a,682,730,-----------VISLVRMMKTKHEKET-------------,+.+++++|+...-+..,12,27,40,7.600,0.0399,0.13,0.106,42.00,0.01000,16,14.61,0.079
198,PF17012.8 ; DUF5091 ; Domain of unknown functi...,1a,556,880,----------------------HGSKCSLN-VFSTRYCWQQFLKAL...,..|--.|| ++-.-++.++|.+.|...,23,137,145,4.600,0.0384,0.16,0.219,53.00,0.01100,102,20.46,0.552
199,PF15464.9 ; DUF4633 ; Domain of unknown functi...,1a,793,847,----------------------------------------------...,...,91,108,112,3.200,0.0313,0.39,0.726,74.00,0.01400,18,19.66,0.154
200,PF05617.14 ; Prolamin_like ; Prolamin-like,1a,526,562,--------------------------NGQTCCQDLVVI--------...,-+..||+.++-+ ...,27,38,65,9.200,0.0310,0.42,1.024,52.00,0.01400,12,14.38,0.091


In [63]:
orfindice_to_fastaindice(
    int(description_dict['begin']),int(description_dict['end']),
    description_dict['transannot'][-1],
    align['query_begin'],align['query_end']
)

(6249, 6291)

In [66]:
# merged_dict={}
# align=aligns[0]


        


In [67]:
merged_dict

{'hitannot': 'PF10783.12 ; DUF2599 ; Protein of unknown function (DUF2599)',
 'transannot': '0s',
 'begin': 6249,
 'end': 6291,
 'align_seq': '-----------------------------------------------------KGMEWQLTCHLQLK--------------------------------',
 'align_consensus': '                                                     ..|.+|+.||+...                                ',
 'hit_begin': 54,
 'hit_end': 67,
 'model_length': 99,
 'template_neff': '8.600',
 'probab': 0.4397,
 'identities': 0.36,
 'similarity': 0.74,
 'e-value': 0.89,
 'p-value': 0.00024,
 'aligned_cols': 14,
 'score': 23.13,
 'sum_probs': 0.11800000000000001}

In [59]:
aligns[0]

{'align_meta': {'annotation': 'PF10783.12 ; DUF2599 ; Protein of unknown function (DUF2599)',
  'Probab': '43.97',
  'E-value': '0.89',
  'Score': '23.13',
  'Aligned_cols': '14',
  'Identities': '36%',
  'Similarity': '0.740',
  'Sum_probs': '11.8',
  'Template_Neff': '8.600',
  'P-value': '0.00024'},
 'q_align': '-----------------------------------------------------KGMEWQLTCHLQLK--------------------------------',
 'consens': '                                                     ..|.+|+.||+...                                ',
 'query_begin': 3,
 'query_end': 16,
 'target_begin': 54,
 'target_end': 67,
 'model_length': 99}

In [55]:
description_dict

{'orf_id': '1',
 'begin': 6243,
 'end': 6588,
 'type': 'complete',
 'length': '345',
 'start': 'ATC',
 'stop': 'TGA',
 'transannot': '0s'}

In [23]:

stem=Path(infasta).stem.replace(':genome','')
o=run_orfipy(infasta)
for i in o:
    i.id=i.name=stem+i.id
    i.description=';'.join([f'{k}:{v}' for k,v in parse_orfipy_description(i.description).items()])
    

In [24]:
o

[SeqRecord(seq=Seq('ISKGMEWQLTCHLQLKDGASGPVSHQRWSIMKLVNGLKTATILKSKNLTGVSVY...LHF'), id='EBOV||AF086833_ORF.1', name='EBOV||AF086833_ORF.1', description='orf_id:1;begin:6243;end:6588;type:complete;length:345;start:ATC;stop:TGA;transannot:0s', dbxrefs=[]),
 SeqRecord(seq=Seq('MEASYERGRPRAARQHSRDGHDHHVRARSSSRENYRGEYRQSRSASQVRVPTVF...GTP'), id='EBOV||AF086833_ORF.2', name='EBOV||AF086833_ORF.2', description='orf_id:2;begin:8508;end:9372;type:complete;length:864;start:ATG;stop:TAA;transannot:0s', dbxrefs=[]),
 SeqRecord(seq=Seq('MAKATGRYNLISPKKDLEKGVVLSDLCNFLVSQTIQGWKVYWAGIEFDVTHKGM...LAI'), id='EBOV||AF086833_ORF.3', name='EBOV||AF086833_ORF.3', description='orf_id:3;begin:10344;end:11097;type:complete;length:753;start:ATG;stop:TAA;transannot:0s', dbxrefs=[]),
 SeqRecord(seq=Seq('MATQHTQYPDARLSSPIVLDQCDLVTRACGLYSSYSLNPQLRNCKLPKHIYRLK...RFD'), id='EBOV||AF086833_ORF.4', name='EBOV||AF086833_ORF.4', description='orf_id:4;begin:11580;end:18216;type:complete;length:6636;start:ATG;stop:TGA;tran

In [27]:
write(o[-1],'ebv_16.fasta','fasta')

1

In [28]:
scop_out=run_hhblits('ebv_16.fasta',blitsdb='/home/hugheslab1/zfdeng/pangengraph_2/scop90/scop90')
#pool, no use

In [35]:
fasta_obj:SeqRecord=read(infasta,'fasta')
trans_begin=379
trans_end=916
annot_begin=103
annot_end=112
strand='a'
fasta_obj[trans_end-annot_end*3:trans_end-annot_begin*3+3].reverse_complement().translate()

SeqRecord(seq=Seq('VYLIHWDDSL'), id='<unknown id>', name='<unknown name>', description='<unknown description>', dbxrefs=[])

In [26]:
fasta_obj:SeqRecord=read(infasta,'fasta')
trans_begin=421
annot_begin=103
annot_end=112
strand='s'
fasta_obj[trans_begin+annot_begin*3-3:trans_begin+annot_end*3].translate()

SeqRecord(seq=Seq('NLEEICQLIIQAFEAGVDFQESADSF'), id='<unknown id>', name='<unknown name>', description='<unknown description>', dbxrefs=[])

In [45]:
scop_out[1]['P-value'][1]

'0.012'

In [19]:
_=run_hhblits('ebv_05.fasta')

In [20]:
_

({'Query': 'EBOV||AF086833_ORF.5 orf_id:5;begin:421;end:2686;type:complete;length:2265;start:TTG;stop:TGA;transannot:2s',
  'Match_columns': '755',
  'No_of_seqs': '104 out of 114',
  'Neff': '3.09153',
  'Searched_HMMs': '578',
  'Date': 'Thu Apr 11 15:09:31 2024',
  'Command': '/home/hugheslab1/zfdeng/pangengraph/hh-suite/build/src/hhblits -i /home/hugheslab1/zfdeng/pangengraph_2/ebv_05.fasta -d /home/hugheslab1/zfdeng/pangengraph_2/hhs-db/pfam -cpu 4 -o out.hhr -hide_pred -hide_dssp'},
    No                             Hit   Prob  E-value  P-value   Score   SS  \
 0   1   PF05505.15 ; Ebola_NP ; Ebola  100.0   1E-243   2E-247  1940.5  0.0   
 1   2  PF06407.14 ; BDV_P40 ; Borna d   99.9  3.6E-27  7.6E-31   241.5  0.0   
 2   3  PF00973.22 ; Paramyxo_ncap ; P   96.5    4E-06  8.1E-10    86.8  0.0   
 3   4  PF03216.16 ; Rhabdo_ncap_2 ; R   88.2    0.013    3E-06    53.0  0.0   
 4   5  PF03246.16 ; Pneumo_ncap ; Pne   62.8     0.41  7.1E-05    45.6  0.0   
 5   6  PF16402.8 ; DUF501

In [None]:
'sada'.islower

In [81]:
len('PVKMNGDDIVFRSSPGMADRWMESVSLAGLTLSRGKTMVDKSYFTLNSRMFISGFSKVH')

59

In [55]:
# for i in open('o1.hhr','r').readline():
#     break
# i
with open('xx','r') as f:
    i='holder'
    while i:
        i=f.readline()
        print(i)
        

asdasd

sad

as



In [49]:
open('o1.hhr','r').readline()

'Query         _ORF.4 orf_id:4;begin:59;end:2159;strand:+;type:complete;length:2100;frame:3;start:ATG;stop:TAA\n'

In [41]:
list(parse('haha.fasta','fasta'))

[SeqRecord(seq=Seq('LGSRFVICGIVRRCICLAGAGCPSRCHSFFSVGCYLRGSMTWSNMQGGWVKSLR...SIE'), id='_ORF.1', name='_ORF.1', description='_ORF.1 orf_id:1;begin:234;end:687;strand:+;type:complete;length:453;frame:1;start:TTG;stop:TAG', dbxrefs=[]),
 SeqRecord(seq=Seq('MIENGIGRLSLSKMPPRGISLDSSDAGISGGVLVSWVSRHAMPFVFWSLVLRRN...HCL'), id='_ORF.2', name='_ORF.2', description='_ORF.2 orf_id:2;begin:1698;end:2241;strand:+;type:complete;length:543;frame:1;start:ATG;stop:TAA', dbxrefs=[]),
 SeqRecord(seq=Seq('MRQSNCFLRFEGGPGEPQGTIAVVLRGLLWSPSFSFEKQVVEGESRSYRVFETL...KIE'), id='_ORF.3', name='_ORF.3', description='_ORF.3 orf_id:3;begin:1282;end:1624;strand:+;type:complete;length:342;frame:2;start:ATG;stop:TAA', dbxrefs=[]),
 SeqRecord(seq=Seq('MTSIKKSLGAVGCPVMGRLKEFVGAMGEPYGLSLPLPDLQAVPFQDRLSLLKKF...PMF'), id='_ORF.4', name='_ORF.4', description='_ORF.4 orf_id:4;begin:59;end:2159;strand:+;type:complete;length:2100;frame:3;start:ATG;stop:TAA', dbxrefs=[]),
 SeqRecord(seq=Seq('MGSLETTTAADTNPSGLRLPFGKTSKCTNYRHVHT

In [23]:
o[0].description.split()

['_ORF.1',
 '[234-687](+)',
 'type:complete',
 'length:453',
 'frame:1',
 'start:TTG',
 'stop:TAG']

In [None]:
# o
# from orfipy.findorfs import revcomp,oc
# def worker_single(seqs,minlen,maxlen,strand,starts,stops,table,include_stop,partial3,partial5,bw_stops,ignore_case,file_streams,tmp):
#     """
#     Compute ORFs using single thread

#     Parameters
#     ----------
#     seqs : Fastx object
#         A Fastx object.
#     minlen : int
#         min len of ORFs.
#     maxlen : int
#         Max len of ORFs.
#     strand : char
#         Strand to use (b)oth, (f)wd or (r)ev.
#     starts : list
#         List of start codons.
#     stops : list
#         List of stop codons.
#     table : dict
#         translation tab.
#     include_stop : bool
#         Include stop in results.
#     partial3 : bool
#         report ORF without stop codon.
#     partial5 : bool
#         report ORF without start codon.
#     bw_stops : bool
#         Orfs defined as between stops.
#     ignore_case : bool
#         Convert lower case chars to upper
#     file_streams : list
#         List of file streams for outputting the results.
#     tmpdir : str
#         Out directory.

#     Returns
#     -------
#     None.

#     """
   
#     # ut.print_notification('orfipy single-mode')
#     #outputs
#     outputs=[]
#     for f in file_streams:
#         if f:
#             outputs.append(True)
#         else:
#             outputs.append(False)
#     #results=[]
#     for name, seq, *rest in seqs:
#         global _total_seqs
#         _total_seqs+=1
#         #print(name, seq)
#         thisname=name
#         thisseq=seq
#         if ignore_case: thisseq=thisseq.upper()
#         #ignore if seq is < minlen
#         if len(thisseq)<minlen:
#             continue
#         thisseq_rc=None
#         if strand == 'b' or strand =='r':
#             thisseq_rc=revcomp(thisseq)
        
#         res=oc.start_search(thisseq,thisseq_rc,thisname,minlen,maxlen,strand,starts,stops,table, include_stop, partial3, partial5, bw_stops,outputs)
#     return res
        # write_results_single(res, file_streams)