In [25]:
import os
from Bio import SeqIO
import io
from Bio.Seq import Seq
import subprocess

In [27]:
def make_raw_files_for_alignment(gene_sequence,annotated_genome_location,annotated_species_name,error_exon,query_fasta_sequence,query_length):
    for offset in range(3):
        translated_sequence = str(gene_sequence[offset:].translate()).split("*")
        for i in range(len(translated_sequence)):
            if len(translated_sequence[i])> 0.8*query_length:
                sequence_set = f">set{i+1}_frame{offset}\n{translated_sequence[i]}\n\n"
                # print(i+1, offset)
                
                with open(f"{annotated_genome_location}/{annotated_species_name}/Period_gene_genomic_sequence_individual_exon/{error_exon}/for_alignment/{error_exon}_translated_genomic_sequence_{i+1}_frame{offset}.fa",'w') as out_file:
                    output = f"{query_fasta_sequence}\n\n{sequence_set}"
                    out_file.write(output)

In [28]:
def run_mafft(annotated_genome_location,annotated_species_name,error_exon):
    location = f'{annotated_genome_location}/{annotated_species_name}/Period_gene_genomic_sequence_individual_exon/{error_exon}/for_alignment'
    list_of_files_to_run_mafft_on = os.listdir(location)
    for file in list_of_files_to_run_mafft_on:
        if file.endswith(".fa"):
            command = f'"/home/saurav/miniconda3/envs/ncbi_datasets/bin/mafft" --localpair --maxiterate 16 --reorder --distout "{location}/{file}" > "{location}/alignment_{file}.txt"'
            # print(command)
            subprocess.run(f'{command}', shell=True, stderr = subprocess.DEVNULL) 
            # os.system(f'{command}')
            command = f'"/home/saurav/miniconda3/envs/ncbi_datasets/bin/mafft" --localpair --clustalout --maxiterate 16 --reorder "{location}/{file}" > "{location}/alignment_clustal_{file}.txt"'
            subprocess.run(f'{command}', shell=True, stderr = subprocess.DEVNULL) 
            
            # os.system(f'{command}')
    return(location)

In [96]:
def process_mafft_output(mafft_run_folder):
    list_of_files_in_mafft_run_folder = os.listdir(mafft_run_folder)
    score_output = []
    score = 99
    min_score_sequence = ''
    alignment_file = ''
    for file in list_of_files_in_mafft_run_folder:
        if file.endswith(".fa.hat2"):
            with io.open(f"{mafft_run_folder}/{file}", 'r') as dist_matrix_file:
                dist_matrix_list = dist_matrix_file.readlines()
            
            sequence_name = dist_matrix_list[-2].rstrip().split("=")[1]
            distance_score = float(dist_matrix_list[-1].rstrip())
            if len(score_output) < 5:
                score_output.append( [sequence_name,distance_score])
            else:
                for i in range(len(score_output)):
                    score_at_this_index = score_output[i][1]
                    if distance_score < score_at_this_index:
                        score_output[i] = [sequence_name,distance_score]
                        break
            if distance_score < score:
                score = distance_score
                min_score_sequence = sequence_name
                alignment_file = f'alignment_{file.replace(".hat2",".txt")}'
    # print(score_output)
    print(f"min = {min_score_sequence}, {score}" )
    # score_out_merged = '\n'.join(score_output)
    print(f"5 top scores:\n{score_output}")
    
    alignment_file = SeqIO.parse(f"{mafft_run_folder}/{alignment_file}", 'fasta')
    # print (records.id)
    fasta_start_position = 0
    fasta_end_position = 0
    start_switch = 0
    end_switch = 0
    for records in alignment_file:
        
        if start_switch == 1 and end_switch == 1:
            print(fasta_start_position, fasta_end_position)
            gene_sequence = records.seq[fasta_start_position:fasta_end_position]
            print(f"{records.id}\n{gene_sequence}")
        if error_exon in records.id:
            for current_position in range(len(records.seq)):
                # print(f"current_position = {current_position}, {len(records.seq)}")
                # print(records.seq[current_position])
                if "-" not in records.seq[current_position] and start_switch == 0:
                    fasta_start_position = current_position
                    start_switch = 1
                if end_switch == 1 and "-" not in records.seq[current_position]:
                    end_switch = 0
                if current_position == len(records.seq)-1:
                    end_switch = 1
                    start_switch = 1
                    
                if start_switch == 1 and records.seq[current_position] == "-" and end_switch == 0:
                    fasta_end_position = current_position
                    end_switch = 1
    return(gene_sequence)

In [97]:
def get_genome_file(genome_location,species):
    list_of_files_in_genome_folder = os.listdir(f"{genome_location}/{species}")
    for file in list_of_files_in_genome_folder:
        if file.endswith("_genomic.fna"):
            genome_file = file
    return(genome_file)  

def get_annotated_genome_name(annotated_genome_location, species):
    list_of_annotated_genomes = os.listdir(annotated_genome_location)
  
    # print(annotated_species)
    for annotated_species in list_of_annotated_genomes:
        if annotated_species.endswith(species):
            return(annotated_species)
             
    if annotated_species_name == '':
        print(f"Error with annotated species name")
        assert False

def check_and_make_folders(annotated_genome_location,annotated_species_name,error_exon):
    list_of_files_inside_annotated_species_folder = os.listdir(f"{annotated_genome_location}/{annotated_species_name}")

    if "Period_gene_genomic_sequence_individual_exon" not in list_of_files_inside_annotated_species_folder:
        os.mkdir(f"{annotated_genome_location}/{annotated_species_name}/Period_gene_genomic_sequence_individual_exon")
    
    list_of_exon_directories = os.listdir(f"{annotated_genome_location}/{annotated_species_name}/Period_gene_genomic_sequence_individual_exon/")
    
    if error_exon not in list_of_exon_directories:
        os.mkdir(f"{annotated_genome_location}/{annotated_species_name}/Period_gene_genomic_sequence_individual_exon/{error_exon}")
        os.mkdir(f"{annotated_genome_location}/{annotated_species_name}/Period_gene_genomic_sequence_individual_exon/{error_exon}/for_alignment")
    elif error_exon in list_of_exon_directories:
        list_of_folders_1 = os.listdir(f"{annotated_genome_location}/{annotated_species_name}/Period_gene_genomic_sequence_individual_exon/{error_exon}")
        if "for_alignment" not in list_of_folders_1:
            os.mkdir(f"{annotated_genome_location}/{annotated_species_name}/Period_gene_genomic_sequence_individual_exon/{error_exon}/for_alignment")
        list_of_files = os.listdir(f"{annotated_genome_location}/{annotated_species_name}/Period_gene_genomic_sequence_individual_exon/{error_exon}/for_alignment")
        for file in list_of_files:
            os.remove(f"{annotated_genome_location}/{annotated_species_name}/Period_gene_genomic_sequence_individual_exon/{error_exon}/for_alignment/{file}")


In [102]:
def get_gene_sequence(genome_location, species, genome_file, scaffold,gene_start,gene_end,complement,annotated_genome_location,annotated_species_name):
    from Bio import SeqIO
    print("Getting Gene")
    list_of_files_inside_annotated_species_folder = os.listdir(f"{annotated_genome_location}/{annotated_species_name}")

    if "Period_gene_genomic_sequence_individual_exon" not in list_of_files_inside_annotated_species_folder:
        os.mkdir(f"{annotated_genome_location}/{annotated_species_name}/Period_gene_genomic_sequence_individual_exon")

    list_of_files_inside_indiv_exon_folder = os.listdir(f"{annotated_genome_location}/{annotated_species_name}/Period_gene_genomic_sequence_individual_exon")
    if "temp" not in list_of_files_inside_indiv_exon_folder:
         os.mkdir(f"{annotated_genome_location}/{annotated_species_name}/Period_gene_genomic_sequence_individual_exon/temp")

    # os.system(f'samtools faidx "{genome_location}/{species}/{genome_file}"')
    # subprocess.run("pwd")
    subprocess.run(f'samtools faidx "{genome_location}/{species}/{genome_file}"', shell = True, stderr = subprocess.DEVNULL)
    # os.system(f'samtools faidx "{genome_location}/{species}/{genome_file}" {scaffold}:{gene_start}-{gene_end} > "{annotated_genome_location}/{annotated_species_name}/Period_gene_genomic_sequence_individual_exon/temp/temp_genome.fa"')
    subprocess.run(f'samtools faidx "{genome_location}/{species}/{genome_file}" {scaffold}:{gene_start}-{gene_end} > "{annotated_genome_location}/{annotated_species_name}/Period_gene_genomic_sequence_individual_exon/temp/temp_genome.fa"', shell = True, stderr = subprocess.DEVNULL)
    genome = SeqIO.parse(f"{annotated_genome_location}/{annotated_species_name}/Period_gene_genomic_sequence_individual_exon/temp/temp_genome.fa", "fasta")
    for entries in genome:
        gene_sequence = entries.seq
        if complement == "1":
            gene_sequence = gene_sequence.reverse_complement()
        break
    # print(gene_sequence)
    return (gene_sequence)

In [99]:
def mafft_process(previous_exon_coordinates,
                  next_exon_coordinates,
                  current_exon_coordinates,
                  query_species,
                  query_transcript,
                  query_location,
                  annotated_genome_location,
                 genome_location):
    query_exon = current_exon_coordinates[6].split("query")[-1][1:]
    error_exon = query_exon
    # print("AA")
    genome_file = get_genome_file(genome_location,species)
    
    upstream_exon_line, downstream_exon_line = previous_exon_coordinates.split(","),next_exon_coordinates.split(",")
    complement,scaffold = upstream_exon_line[4], upstream_exon_line[1]

    if complement == "0":
        gene_start = max(int(upstream_exon_line[2]),int(upstream_exon_line[3]))
        gene_end = min(int(downstream_exon_line[2]),int(downstream_exon_line[3]))
    
    if complement == "1":
        gene_start = max(int(downstream_exon_line[2]),int(downstream_exon_line[3]))
        gene_end = min(int(upstream_exon_line[2]),int(upstream_exon_line[3]))
    # print("BB")

    # print(gene_sequence)
    # gene_sequence = Seq("GGTAAggacattataatttttaatgcatTCCCTCGAAACTTGAAATCTTCTAATCCACTAgagtttctataaaaaaaaatatggcataTTATGCCGTCATGCCAATCCATTATGGCggatatccatactaatattatgctTGTGAAATTGTttctgtttgtttctttgtttgttacctatgaaCAGCTAAACCGTTgaatcgattttcatgaaatttgacACACACGTAGTTTGCATCTTTAGTACAGTGAtttcattataacatttatattggcTGTAATATAAAAAGCGTTAACCATCTGATTTGTGAGATGGTGGAGCTAAGGCTAAAAATTCAGTATGTAACtattttaagttaaatgtaTAGCATCATTTGCATCTGAAGCCGTTTCATCATATAGATTGATTagcgttttttattatatttggtcGGAGTAATACTAATAATCTATTAGCGTGAGCAACATTAATTAATCTGTTTGacagattttattaaaatttactttcatGATATTGAACTATAGAAAGAGTAAAGGTGAAATAGAGAAGTTAGTAATACAAAGTGAATTCACAAAAccgttttattttacaaaaaattgaatgaaattataatttattttcatatttacagGATCGCAAAACCTTTGCATCCCAAATCACTAGCGGGCTTTTAGCTCCGAAATCAACTAGCGGTGAACAACCTCAAGGTAATTTCGATTGATTTAATGATCCATGCTTCCAAtagatttaatttgaaaatttgtcgTTTGTATTAGCTTCAATACATCACATTCTCAAACATGACTTCATACAACCAAACTTATCCgctacaattttataattgagATAGTGAAAAAGTTAATAGCTTCAAGATACAAATTTTACAGGGTTCTTTCATACAAACGTGTAATTAATTCTAAATGTTAtaactaagtataaaaataatgtttcctTTAAAGTGAACTGTCTTCTTGCTGATTCTAGTACTAGTATCaataatttcaattaataagTACCTCAATAAGATAAACCGCCATATTTTATTGCCGGCTGTTTCATgtaaaattatcttatttaaaaacatttaaaagacAAATTTACAAAATGTATTAAGCTTCTGTGAGAATTATGTGCACTATGTTTCATACATAGCAAAAGTTGTATTTCATTATTTGagatgaaattgaaaatttaatgttaataatttttgttttataatgcaTTTAAAGTAATCATCAACACATAACCTAACTTTTGGGATAGTTAAAATTTGTGTCGCAACTTCAATAAGAGCTTGTCCAC")

    with open(f"{query_location}/{query_species}/{query_transcript}/query_{query_exon}.fa", 'r') as query_file:
        query_file_list = query_file.readlines()
        query_fasta_sequence = "".join(query_file_list)
        left_overhang = query_file_list[0].split("Frame")[1][1]
        right_overhang = query_file_list[0].split("rightoh")[1][1]
        original_query_name = query_file_list[0]
        original_query = query_file_list[1]
        
    query_length = len(query_fasta_sequence.split("\n")[1])
    
    annotated_species_name = get_annotated_genome_name(annotated_genome_location, species)
    # print(annotated_species_name)

    # print(annotated_genome_location)
    gene_sequence = get_gene_sequence(genome_location, species, genome_file, scaffold,gene_start,gene_end,complement,annotated_genome_location,annotated_species_name)
    # assert False
    check_and_make_folders(annotated_genome_location,annotated_species_name,error_exon)
    
    
    make_raw_files_for_alignment(gene_sequence,annotated_genome_location,annotated_species_name,error_exon,query_fasta_sequence,query_length)
    
    return(annotated_species_name, error_exon,species, genome_file,gene_sequence,left_overhang,right_overhang,scaffold, original_query_name,original_query )


In [108]:
blast_output_location = "/mnt/h/My Drive/Circadian Rhythm Genes Project/6.Period Exon Analysis/2.Blast/1.Blast_output"
species = "Maniola_hyperantus"

annotated_genome_location = "/mnt/h/My Drive/Circadian Rhythm Genes Project/6.Period Exon Analysis/1.Annotated Species"
query_location = "/mnt/h/My Drive/Circadian Rhythm Genes Project/6.Period Exon Analysis/2.Blast/0.Query"
query_species = "5.Bicyclus_anynana"
query_transcript = "XM_024088150.2"

genome_location = "/mnt/f/Genomes_2023-12-26"



output_coordinate_file = "Species," + "Scaffold," + "Start," + "Stop," + "Complement," + "Error," + "Gene,"+ "Query_start," + "Query_stop,"+ "Query_Length," +  "AG_GT," + "Spliceator_prediction\n"
exons_to_check_list = ["Exon_3","Exon_5","Exon_7","Exon_15","Exon_16","Exon_17","Exon_19","Exon_20","Exon_21","Exon_22","Exon_23","Exon_24","Exon_26"]
list_of_files_in_species_folder = os.listdir(f"{blast_output_location}/{species}")
coordinate_file_name = ''
for file_names in list_of_files_in_species_folder:
    if file_names.endswith("_coordinates.csv"):
        coordinate_file_name = file_names
if coordinate_file_name =='':
    print(f"Coordinate file error")
    assert False

with io.open(f"{blast_output_location}/{species}/{coordinate_file_name}", 'r') as temp_file_open:
    coordinate_file_lines = temp_file_open.readlines()
# print(coordinate_file_lines[0])
for exons_to_check in exons_to_check_list:
    for i in range(1,len(coordinate_file_lines)):
        coordinate_file_lines_split = coordinate_file_lines[i].split(",")
        current_exon_coordinates = coordinate_file_lines[i].split(",")
        # if coordinate_file_lines_split[5] == "Y":
        # print(coordinate_file_lines_split[6])
        if  coordinate_file_lines_split[6].endswith(exons_to_check):
            if i == 1:
                print(f'First Exon has errors\n{coordinate_file_lines[i]}')
                assert False
            else:
                print(coordinate_file_lines[i])
                process_current_exon = input("Error Found! Process?")
                
                if process_current_exon.lower()[0] == "y":
                    previous_exon_number = i-1
                    next_exon_number = i+1
                    previous_exon_coordinates = ''
                    next_exon_coordinates = ''
                    while True:
                        print(previous_exon_number)
                        if coordinate_file_lines[previous_exon_number].split(",")[5] == "N":
                            previous_exon_coordinates = coordinate_file_lines[previous_exon_number]
                            break
                        else:
                            proceed_prompt = input(f"Previous exon coordinate\n{coordinate_file_lines[previous_exon_number]}\nProceed?")
                            if proceed_prompt.lower()[0] == "y":
                                previous_exon_coordinates = coordinate_file_lines[previous_exon_number]
                                break
                        previous_exon_number -= 1
                    while True:
                        print(next_exon_number)
                        if coordinate_file_lines[next_exon_number].split(",")[5] == "N":
                            next_exon_coordinates = coordinate_file_lines[next_exon_number]
                            break
                        else:
                            proceed_prompt = input(f"Next exon coordinate\n{coordinate_file_lines[next_exon_number]}\nProceed?")
                            if proceed_prompt.lower()[0] == "y":
                                next_exon_coordinates = coordinate_file_lines[next_exon_number]
                                break
                        
                        next_exon_number += 1
            
                    
                    print(previous_exon_coordinates)
                    print(next_exon_coordinates)
                    annotated_species_name, error_exon, species, genome_file, gene_sequence,left_overhang,right_overhang,scaffold,original_query_name, original_query = mafft_process(previous_exon_coordinates,
                                                                                                                      next_exon_coordinates,
                                                                                                                      current_exon_coordinates,
                                                                                                                      query_species,
                                                                                                                      query_transcript,
                                                                                                                      query_location,
                                                                                                                      annotated_genome_location,
                                                                                                                         genome_location)
            
                    mafft_run_folder = run_mafft(annotated_genome_location,annotated_species_name,error_exon)
            
                    mafft_run_folder = f'{annotated_genome_location}/{annotated_species_name}/Period_gene_genomic_sequence_individual_exon/{error_exon}/for_alignment'
                    possible_gene_sequence = process_mafft_output(mafft_run_folder)
                    
                    while True:
                        print(f"Original query = {original_query}")
                        print(f"New query = {possible_gene_sequence}")
                        check_gene_sequence = input("is the query sequence ok?")
                        if check_gene_sequence.lower()[0] == "n":
                            possible_gene_sequence = input("Enter the desired query sequence :")
                            break
                        if check_gene_sequence.lower()[0] == "y":
                            break
            
                    list_of_folders_inside_exon_folder = os.listdir(f"{annotated_genome_location}/{annotated_species_name}/Period_gene_genomic_sequence_individual_exon/{error_exon}")
                    if "for_blast" not in list_of_folders_inside_exon_folder:
                        os.mkdir(f"{annotated_genome_location}/{annotated_species_name}/Period_gene_genomic_sequence_individual_exon/{error_exon}/for_blast")
                    else:
                        list_of_files = os.listdir(f"{annotated_genome_location}/{annotated_species_name}/Period_gene_genomic_sequence_individual_exon/{error_exon}/for_blast")
                        for file in list_of_files:
                            os.remove(f"{annotated_genome_location}/{annotated_species_name}/Period_gene_genomic_sequence_individual_exon/{error_exon}/for_blast/{file}")
            
                    
                    with open(f"{annotated_genome_location}/{annotated_species_name}/Period_gene_genomic_sequence_individual_exon/{error_exon}/for_blast/new_query.txt",'w') as query_file:
                        query = f">Query_{error_exon}\n{possible_gene_sequence}"
                        query_file.write(query)
        #         else:
        #             output_coordinate_file += coordinate_file_lines[i].rstrip()+",NA,NA\n"
        # else:
        #         output_coordinate_file +=coordinate_file_lines[i].rstrip()+",NA,NA\n"

Maniola_hyperantus,NC_048564.1,6317034,6317209,0,N,5.Bicyclus_anynana_XM_024088150.2_query_Exon_3,1,58,58



Error Found! Process? y


2


Previous exon coordinate
Maniola_hyperantus,NC_048564.1,0,-18,0,Y,5.Bicyclus_anynana_XM_024088150.2_query_Exon_2,1,29,23

Proceed? n


1
4
Maniola_hyperantus,NC_048564.1,6314503,6314591,0,N,5.Bicyclus_anynana_XM_024088150.2_query_Exon_1,1,29,29

Maniola_hyperantus,NC_048564.1,6318417,6318505,0,N,5.Bicyclus_anynana_XM_024088150.2_query_Exon_4,7,28,28

Getting Gene
min = set56_frame2, 1.109
5 top scores:
[['set56_frame2', 1.109], ['set19_frame0', 1.809], ['set58_frame0', 1.815], ['set27_frame1', 1.782], ['set19_frame2', 1.87]]
37 95
set56_frame2
SNNQGQPAEKRSKDKDAKKKKPLQSLQTDLQVTEVINETECVRELAPILESSKEEISD
Original query = NNNQTQSPEKRPKEKELKKKKTQLTTQNDVQVVEAKIEIVCDVTMKPVFEPLPEEISD
New query = SNNQGQPAEKRSKDKDAKKKKPLQSLQTDLQVTEVINETECVRELAPILESSKEEISD


is the query sequence ok? n
Enter the desired query sequence : NNQGQPAEKRSKDKDAKKKKPLQSLQTDLQVTEVINETECVRELAPILESSKEEISD


Maniola_hyperantus,NC_048564.1,-18,-15,0,Y,5.Bicyclus_anynana_XM_024088150.2_query_Exon_5,7,28,23



Error Found! Process? y


4
6
Maniola_hyperantus,NC_048564.1,6318417,6318505,0,N,5.Bicyclus_anynana_XM_024088150.2_query_Exon_4,7,28,28

Maniola_hyperantus,NC_048564.1,6319028,6319159,0,N,5.Bicyclus_anynana_XM_024088150.2_query_Exon_6,1,44,44

Getting Gene
min = set7_frame0, 1.097
5 top scores:
[['set4_frame2', 1.56], ['set5_frame2', 1.688], ['set7_frame0', 1.097], ['set8_frame0', 1.727], ['set3_frame1', 1.799]]
0 23
set7_frame0
PDVCNTPVGTPLALVTSHPNSIQ
Original query = PSPSTTPTPLTFVTSKPNSNQIS
New query = PDVCNTPVGTPLALVTSHPNSIQ


is the query sequence ok? n
Enter the desired query sequence : DVCNTPVGTPLALVTSHPNSIQ


Maniola_hyperantus,NC_048564.1,6320071,6320146,0,N,5.Bicyclus_anynana_XM_024088150.2_query_Exon_7,1,29,29



Error Found! Process? y


6
8
Maniola_hyperantus,NC_048564.1,6319028,6319159,0,N,5.Bicyclus_anynana_XM_024088150.2_query_Exon_6,1,44,44

Maniola_hyperantus,NC_048564.1,6321581,6321784,0,N,5.Bicyclus_anynana_XM_024088150.2_query_Exon_8,3,67,67

Getting Gene
min = set15_frame0, 0.975
5 top scores:
[['set15_frame0', 0.975], ['set35_frame0', 1.704], ['set7_frame0', 1.713], ['set62_frame1', 1.725], ['set14_frame0', 1.628]]
5 34
set15_frame0
DRDTFASQITSGL---AAPK-TVNGTQTK
Original query = DRNTFASQISSGLVVVAAPKQQVDGTKGK
New query = DRDTFASQITSGL---AAPK-TVNGTQTK


is the query sequence ok? y


Maniola_hyperantus,NC_048564.1,6330528,6330686,0,N,5.Bicyclus_anynana_XM_024088150.2_query_Exon_15,1,52,52



Error Found! Process? y


14
16
Maniola_hyperantus,NC_048564.1,6329634,6329738,0,N,5.Bicyclus_anynana_XM_024088150.2_query_Exon_14,1,34,34

Maniola_hyperantus,NC_048564.1,6330800,6330968,0,N,5.Bicyclus_anynana_XM_024088150.2_query_Exon_16,1,51,55

Getting Gene
min = set20_frame2, 0.799
5 top scores:
[['set11_frame0', 1.885], ['set11_frame1', 1.831], ['set12_frame1', 1.87], ['set18_frame1', 1.721], ['set20_frame2', 0.799]]
29 81
set20_frame2
FSKCVSSMLNNPGEFDERASTSDPSTVADASTSHTGTSNKYPVLRLTESLLN
Original query = FSKNFLSMVNSSGEFDEMASTSDSSTAAVASSNNACSSNGFQALRLTESLLN
New query = FSKCVSSMLNNPGEFDERASTSDPSTVADASTSHTGTSNKYPVLRLTESLLN


is the query sequence ok? y


Maniola_hyperantus,NC_048564.1,6330800,6330968,0,N,5.Bicyclus_anynana_XM_024088150.2_query_Exon_16,1,51,55



Error Found! Process? y


15
17
Maniola_hyperantus,NC_048564.1,6330528,6330686,0,N,5.Bicyclus_anynana_XM_024088150.2_query_Exon_15,1,52,52

Maniola_hyperantus,NC_048564.1,6331104,6331239,0,N,5.Bicyclus_anynana_XM_024088150.2_query_Exon_17,2,45,45

Getting Gene
min = set2_frame1, 0.492
5 top scores:
[['set2_frame1', 0.492]]
23 121
set2_frame1
HNIDMEKKLLKQHRETRSSSKSDREKASNESRQKKKEHLARCNALFQPTTAGLSPNQVFSFYFVEWEKISTDSCLLSVSDTHGLKQHPYKKNRQITEF
Original query = HNVEMEKELLKLHRETRSSKSDREKASNESRLKKKEHLARCNAFFMPTSAASEFK
New query = HNIDMEKKLLKQHRETRSSSKSDREKASNESRQKKKEHLARCNALFQPTTAGLSPNQVFSFYFVEWEKISTDSCLLSVSDTHGLKQHPYKKNRQITEF


is the query sequence ok? n
Enter the desired query sequence : HNIDMEKKLLKQHRETRSSSKSDREKASNESRQKKKEHLARCNALFQPTTAG


Maniola_hyperantus,NC_048564.1,6331104,6331239,0,N,5.Bicyclus_anynana_XM_024088150.2_query_Exon_17,2,45,45



Error Found! Process? y


16
18
Maniola_hyperantus,NC_048564.1,6330800,6330968,0,N,5.Bicyclus_anynana_XM_024088150.2_query_Exon_16,1,51,55

Maniola_hyperantus,NC_048564.1,6331623,6331727,0,N,5.Bicyclus_anynana_XM_024088150.2_query_Exon_18,1,31,34

Getting Gene
min = set1_frame1, 1.096
5 top scores:
[['set9_frame0', 1.757], ['set1_frame1', 1.096], ['set5_frame1', 1.806], ['set3_frame2', 1.839], ['set6_frame2', 1.9]]
0 90
set1_frame1
QVFSFYFVEWEKISTDSCLLSVSDTHGLKQHPYKKNRQITEFCLQHHGVKRASKQAEEGTTHKHRCSSPRNRRKLSQKVINNAPLSAATN
Original query = PQGVKRSSKNTDDWGTNKHRCSSARTTRRRFTEPPNNPPLSATYN
New query = QVFSFYFVEWEKISTDSCLLSVSDTHGLKQHPYKKNRQITEFCLQHHGVKRASKQAEEGTTHKHRCSSPRNRRKLSQKVINNAPLSAATN


is the query sequence ok? n
Enter the desired query sequence : HGVKRASKQAEEGTTHKHRCSSPRNRRKLSQKVINNAPLSAATN


Maniola_hyperantus,NC_048564.1,0,6,0,Y,5.Bicyclus_anynana_XM_024088150.2_query_Exon_19,1,31,33



Error Found! Process? y


18
20


Next exon coordinate
Maniola_hyperantus,NC_048561.1,910017,910138,1,Y,5.Bicyclus_anynana_XM_024088150.2_query_Exon_20,9,39,40

Proceed? n


21


Next exon coordinate
Maniola_hyperantus,NC_048558.1,3090742,3090903,0,Y,5.Bicyclus_anynana_XM_024088150.2_query_Exon_21,19,53,54

Proceed? n


22


Next exon coordinate
Maniola_hyperantus,NC_048540.1,13943498,13943638,1,Y,5.Bicyclus_anynana_XM_024088150.2_query_Exon_22,2,34,47

Proceed? n


23


Next exon coordinate
Maniola_hyperantus,NC_048564.1,6338236,6338415,0,Y,5.Bicyclus_anynana_XM_024088150.2_query_Exon_23,4,54,63

Proceed? y


Maniola_hyperantus,NC_048564.1,6331623,6331727,0,N,5.Bicyclus_anynana_XM_024088150.2_query_Exon_18,1,31,34

Maniola_hyperantus,NC_048564.1,6338236,6338415,0,Y,5.Bicyclus_anynana_XM_024088150.2_query_Exon_23,4,54,63

Getting Gene
min = set24_frame2, 1.526
5 top scores:
[['set24_frame2', 1.526], ['set64_frame1', 1.628], ['set101_frame2', 1.681], ['set17_frame1', 1.73], ['set98_frame1', 1.732]]
4 36
set24_frame2
VLPLYYTPTSNQPQPSGFNTNTNFPYAHYHRY
Original query = PVPMYYIPAAPYQMAPKSEAGPSASQAQYQRHS
New query = VLPLYYTPTSNQPQPSGFNTNTNFPYAHYHRY


is the query sequence ok? y


Maniola_hyperantus,NC_048561.1,910017,910138,1,Y,5.Bicyclus_anynana_XM_024088150.2_query_Exon_20,9,39,40



Error Found! Process? y


19


Previous exon coordinate
Maniola_hyperantus,NC_048564.1,0,6,0,Y,5.Bicyclus_anynana_XM_024088150.2_query_Exon_19,1,31,33

Proceed? n


18
21


Next exon coordinate
Maniola_hyperantus,NC_048558.1,3090742,3090903,0,Y,5.Bicyclus_anynana_XM_024088150.2_query_Exon_21,19,53,54

Proceed? n


22


Next exon coordinate
Maniola_hyperantus,NC_048540.1,13943498,13943638,1,Y,5.Bicyclus_anynana_XM_024088150.2_query_Exon_22,2,34,47

Proceed? n


23


Next exon coordinate
Maniola_hyperantus,NC_048564.1,6338236,6338415,0,Y,5.Bicyclus_anynana_XM_024088150.2_query_Exon_23,4,54,63

Proceed? n


24


Next exon coordinate
Maniola_hyperantus,NC_048564.1,6338986,6339093,0,Y,5.Bicyclus_anynana_XM_024088150.2_query_Exon_24,18,36,36

Proceed? n


25
Maniola_hyperantus,NC_048564.1,6331623,6331727,0,N,5.Bicyclus_anynana_XM_024088150.2_query_Exon_18,1,31,34

Maniola_hyperantus,NC_048564.1,6339971,6340072,0,N,5.Bicyclus_anynana_XM_024088150.2_query_Exon_25,1,34,34

Getting Gene
min = set7_frame0, 1.68
5 top scores:
[['set7_frame0', 1.68], ['set97_frame1', 1.716], ['set160_frame2', 1.72], ['set111_frame2', 1.735], ['set117_frame2', 1.735]]
0 42
set7_frame0
VTFYRGKSRSSRGIFKNLNPRVRSCGYQLVSNKNIVNPKYHK
Original query = MNGFSIPYMGHQQNMNPAGRSSRTETANANMNMQPNVYSP
New query = VTFYRGKSRSSRGIFKNLNPRVRSCGYQLVSNKNIVNPKYHK


is the query sequence ok? GKSRSSRGIFKNLNPRVRSCGYQLVSNKNIVNPKYHK


Original query = MNGFSIPYMGHQQNMNPAGRSSRTETANANMNMQPNVYSP
New query = VTFYRGKSRSSRGIFKNLNPRVRSCGYQLVSNKNIVNPKYHK


is the query sequence ok? n
Enter the desired query sequence : GKSRSSRGIFKNLNPRVRSCGYQLVSNKNIVNPKYHK


Maniola_hyperantus,NC_048558.1,3090742,3090903,0,Y,5.Bicyclus_anynana_XM_024088150.2_query_Exon_21,19,53,54



Error Found! Process? y


20


Previous exon coordinate
Maniola_hyperantus,NC_048561.1,910017,910138,1,Y,5.Bicyclus_anynana_XM_024088150.2_query_Exon_20,9,39,40

Proceed? n


19


Previous exon coordinate
Maniola_hyperantus,NC_048564.1,0,6,0,Y,5.Bicyclus_anynana_XM_024088150.2_query_Exon_19,1,31,33

Proceed? n


18
22


Next exon coordinate
Maniola_hyperantus,NC_048540.1,13943498,13943638,1,Y,5.Bicyclus_anynana_XM_024088150.2_query_Exon_22,2,34,47

Proceed? n


23


Next exon coordinate
Maniola_hyperantus,NC_048564.1,6338236,6338415,0,Y,5.Bicyclus_anynana_XM_024088150.2_query_Exon_23,4,54,63

Proceed? y


Maniola_hyperantus,NC_048564.1,6331623,6331727,0,N,5.Bicyclus_anynana_XM_024088150.2_query_Exon_18,1,31,34

Maniola_hyperantus,NC_048564.1,6338236,6338415,0,Y,5.Bicyclus_anynana_XM_024088150.2_query_Exon_23,4,54,63

Getting Gene
min = set64_frame1, 1.118
5 top scores:
[['set64_frame1', 1.118], ['set98_frame1', 1.806], ['set39_frame2', 1.822], ['set101_frame2', 1.836], ['set110_frame2', 1.849]]
6 60
set64_frame1
QCMMLGQAVYGSPYMYSAVNQQMPYAVQQRFIPQQTPHTQ---PLELSSSNYEE
Original query = QCMMFGPPVFASPFMYPQYDSPMAYTIPQNFGQHQIPNTQTMGTLGLSSNNYEE
New query = QCMMLGQAVYGSPYMYSAVNQQMPYAVQQRFIPQQTPHTQ---PLELSSSNYEE


is the query sequence ok? y


Maniola_hyperantus,NC_048540.1,13943498,13943638,1,Y,5.Bicyclus_anynana_XM_024088150.2_query_Exon_22,2,34,47



Error Found! Process? y


21


Previous exon coordinate
Maniola_hyperantus,NC_048558.1,3090742,3090903,0,Y,5.Bicyclus_anynana_XM_024088150.2_query_Exon_21,19,53,54

Proceed? n


20


Previous exon coordinate
Maniola_hyperantus,NC_048561.1,910017,910138,1,Y,5.Bicyclus_anynana_XM_024088150.2_query_Exon_20,9,39,40

Proceed? n


19


Previous exon coordinate
Maniola_hyperantus,NC_048564.1,0,6,0,Y,5.Bicyclus_anynana_XM_024088150.2_query_Exon_19,1,31,33

Proceed? n


18
23


Next exon coordinate
Maniola_hyperantus,NC_048564.1,6338236,6338415,0,Y,5.Bicyclus_anynana_XM_024088150.2_query_Exon_23,4,54,63

Proceed? y


Maniola_hyperantus,NC_048564.1,6331623,6331727,0,N,5.Bicyclus_anynana_XM_024088150.2_query_Exon_18,1,31,34

Maniola_hyperantus,NC_048564.1,6338236,6338415,0,Y,5.Bicyclus_anynana_XM_024088150.2_query_Exon_23,4,54,63

Getting Gene
min = set101_frame2, 1.719
5 top scores:
[['set101_frame2', 1.719], ['set18_frame0', 1.769], ['set42_frame2', 1.789], ['set132_frame2', 1.801], ['set40_frame0', 1.774]]
8 55
set101_frame2
TCVVDVSPTSFEPIRGPFSRESVPSSRVSEGQRALQAAARAPHSQNR
Original query = ACKLTIPMKYVQSCNGHGRREKNVECRANNGVHGRDISSSFAGASNR
New query = TCVVDVSPTSFEPIRGPFSRESVPSSRVSEGQRALQAAARAPHSQNR


is the query sequence ok? y


Maniola_hyperantus,NC_048564.1,6338236,6338415,0,Y,5.Bicyclus_anynana_XM_024088150.2_query_Exon_23,4,54,63



Error Found! Process? y


22


Previous exon coordinate
Maniola_hyperantus,NC_048540.1,13943498,13943638,1,Y,5.Bicyclus_anynana_XM_024088150.2_query_Exon_22,2,34,47

Proceed? n


21


Previous exon coordinate
Maniola_hyperantus,NC_048558.1,3090742,3090903,0,Y,5.Bicyclus_anynana_XM_024088150.2_query_Exon_21,19,53,54

Proceed? n


20


Previous exon coordinate
Maniola_hyperantus,NC_048561.1,910017,910138,1,Y,5.Bicyclus_anynana_XM_024088150.2_query_Exon_20,9,39,40

Proceed? n


19


Previous exon coordinate
Maniola_hyperantus,NC_048564.1,0,6,0,Y,5.Bicyclus_anynana_XM_024088150.2_query_Exon_19,1,31,33

Proceed? n


18
24


Next exon coordinate
Maniola_hyperantus,NC_048564.1,6338986,6339093,0,Y,5.Bicyclus_anynana_XM_024088150.2_query_Exon_24,18,36,36

Proceed? y


Maniola_hyperantus,NC_048564.1,6331623,6331727,0,N,5.Bicyclus_anynana_XM_024088150.2_query_Exon_18,1,31,34

Maniola_hyperantus,NC_048564.1,6338986,6339093,0,Y,5.Bicyclus_anynana_XM_024088150.2_query_Exon_24,18,36,36

Getting Gene
min = set132_frame2, 1.027
5 top scores:
[['set132_frame2', 1.027], ['set101_frame2', 1.818], ['set42_frame1', 1.856], ['set40_frame0', 1.824], ['set60_frame0', 1.802]]
0 107
set132_frame2
NSSLYVYLEACNVKRVPGSVFESVKYLGRASIIFYNRGFIYYRKIMSVFQQSTDTVTDERMR---RLGNSDSANDKTDGESSYSSFYSSFFKTDSGSGSDSRQQTKD
Original query = NNDANDERNKRKLRFDNSDGTNEKTDGESSYSSFYSSFFKTDSGSNEESDSKSRPVKEGTKAS
New query = NSSLYVYLEACNVKRVPGSVFESVKYLGRASIIFYNRGFIYYRKIMSVFQQSTDTVTDERMR---RLGNSDSANDKTDGESSYSSFYSSFFKTDSGSGSDSRQQTKD


is the query sequence ok? n
Enter the desired query sequence : DTVTDERMR---RLGNSDSANDKTDGESSYSSFYSSFFKTDSGSGSDSRQQTKDYINTSVSCNEN


Maniola_hyperantus,NC_048564.1,6338986,6339093,0,Y,5.Bicyclus_anynana_XM_024088150.2_query_Exon_24,18,36,36



Error Found! Process? y


23


Previous exon coordinate
Maniola_hyperantus,NC_048564.1,6338236,6338415,0,Y,5.Bicyclus_anynana_XM_024088150.2_query_Exon_23,4,54,63

Proceed? y


25
Maniola_hyperantus,NC_048564.1,6338236,6338415,0,Y,5.Bicyclus_anynana_XM_024088150.2_query_Exon_23,4,54,63

Maniola_hyperantus,NC_048564.1,6339971,6340072,0,N,5.Bicyclus_anynana_XM_024088150.2_query_Exon_25,1,34,34

Getting Gene
min = set16_frame1, 1.155
5 top scores:
[['set26_frame1', 1.729], ['set32_frame2', 1.739], ['set20_frame0', 1.787], ['set29_frame0', 1.825], ['set16_frame1', 1.155]]
0 57
set16_frame1
EECSAMQLTLSYKLLQYRHRTSSTNGTFRQNSDVNKPNVARARKMPRRKMEPPWMEQ
Original query = NKYKILSSKTPTIPSTYVPNDRKVARRKMEPPWMEQ
New query = EECSAMQLTLSYKLLQYRHRTSSTNGTFRQNSDVNKPNVARARKMPRRKMEPPWMEQ


is the query sequence ok? n
Enter the desired query sequence : SYKLLQYRHRTSSTNGTFRQNSDVNKPNVARARKMPRRKMEPPWMEQ


Maniola_hyperantus,NC_048564.1,6340547,6340696,0,Y,5.Bicyclus_anynana_XM_024088150.2_query_Exon_26,3,26,50



Error Found! Process? y


25
27
Maniola_hyperantus,NC_048564.1,6339971,6340072,0,N,5.Bicyclus_anynana_XM_024088150.2_query_Exon_25,1,34,34

Maniola_hyperantus,NC_048564.1,6341479,6341580,0,N,5.Bicyclus_anynana_XM_024088150.2_query_Exon_27,2,30,32

Getting Gene
min = set10_frame1, 0.733
5 top scores:
[['set26_frame2', 1.821], ['set23_frame2', 1.853], ['set18_frame0', 1.858], ['set10_frame1', 0.733], ['set4_frame2', 1.798]]
0 81
set10_frame1
KRYILLALLVSAAAGMVSSLKCPLTFGIYFSVTFQPSLVNEQLSQLYLDLQLEGVAARLTLEEGITSSSSSGEESTAKSPK
Original query = SPLVCEQLSQLYLDLQLQGVAARLTLEDGITSSSSSGEDTPNHPKPSTSK
New query = KRYILLALLVSAAAGMVSSLKCPLTFGIYFSVTFQPSLVNEQLSQLYLDLQLEGVAARLTLEEGITSSSSSGEESTAKSPK


is the query sequence ok? n
Enter the desired query sequence : SLVNEQLSQLYLDLQLEGVAARLTLEEGITSSSSSGEESTAKSPK
