In [16]:
import re
import sys
import os
from Bio import Entrez
Entrez.email = 'saubaralwork@gmail.com'

def get_ncbi_tax(taxon):
    '''Getn NCBI taxonomy'''
    # code from https://harryincupboard.blog/3
    # If the input is a string
    if not re.match(r'\d+', taxon):
        # Get taxonomy ID using Entrez
        taxon2 = '"' + taxon + '"'
        handle = Entrez.esearch(
            db='taxonomy', term=taxon2, rettype='gb', retmode='text')
        record = Entrez.read(handle, validate=False)
        handle.close()
        # If there's no result
        if not record['IdList']:
            sys.exit(
                '[ERROR] The taxon "{}" you provided is invalid. '
                'Please check NCBI Taxonomy'.format(taxon))
        tax_id = record['IdList']
    else:
        tax_id = taxon

    # Now connect NCBI again using the tax_id
    # Entrez.efetch will give you various information
    handle2 = Entrez.efetch(db='taxonomy', id=tax_id, retmode='xml')
    record2 = Entrez.read(handle2, validate=False)
    handle2.close()

    tax_list = record2[0]['LineageEx']
    return (tax_list)

# taxonomic_list = get_ncbi_tax("Brenthis daphne")
# print(taxonomic_list)
list_of_genomes = os.listdir("J:/Genomes_2023-12-26")
output = 'Superfamily\tFamily\tSpecies\n'
for genomes in list_of_genomes:
    species = genomes.replace("_"," ")
    print(species)
    try:
        taxonomic_list = get_ncbi_tax(species)
        for taxonomic_unit in taxonomic_list:
            if taxonomic_unit["Rank"] == "superfamily":
                super_family = taxonomic_unit["ScientificName"]
            if taxonomic_unit["Rank"] == "family":
                family = taxonomic_unit["ScientificName"]
    except:
        super_family = "Missing"
        family = "Missing"
    
    
    output += f"{super_family}\t{family}\t{species}\n "
    print(f"{genomes}, {family}, {super_family}")
        
#     print(taxonomic_list)
#     break

with open("H:/My Drive/Circadian Rhythm Genes Project/Documentation/List of Genomes/taxonomic_list.csv",'w') as out_file:
    out_file.write(output)


Abrostola tripartita
Abrostola_tripartita, Noctuidae, Noctuoidea
Abrostola triplasia
Abrostola_triplasia, Noctuidae, Noctuoidea
Acentria ephemerella
Acentria_ephemerella, Crambidae, Pyraloidea
Achlya flavicornis
Achlya_flavicornis, Drepanidae, Drepanoidea
Acleris cristana
Acleris_cristana, Tortricidae, Tortricoidea
Acleris emargana
Acleris_emargana, Tortricidae, Tortricoidea
Acleris holmiana
Acleris_holmiana, Tortricidae, Tortricoidea
Acleris literana
Acleris_literana, Tortricidae, Tortricoidea
Acleris sparsana
Acleris_sparsana, Tortricidae, Tortricoidea
Acrobasis consociella
Acrobasis_consociella, Pyralidae, Pyraloidea
Acrobasis repandana
Acrobasis_repandana, Pyralidae, Pyraloidea
Acrobasis suavella
Acrobasis_suavella, Pyralidae, Pyraloidea
Acronicta aceris
Acronicta_aceris, Noctuidae, Noctuoidea
Acronicta leporina
Acronicta_leporina, Noctuidae, Noctuoidea
Acronicta psi
Acronicta_psi, Noctuidae, Noctuoidea
Adoxophyes honmai
Adoxophyes_honmai, Tortricidae, Tortricoidea
Agonopterix aren

Choristoneura_fumiferana, Tortricidae, Tortricoidea
Chrysodeixis includens
Chrysodeixis_includens, Noctuidae, Noctuoidea
Chrysoteuchia culmella
Chrysoteuchia_culmella, Crambidae, Pyraloidea
Clepsis dumicolana
Clepsis_dumicolana, Tortricidae, Tortricoidea
Clostera curtula
Clostera_curtula, Notodontidae, Noctuoidea
Cnaphalocrocis medinalis
Cnaphalocrocis_medinalis, Crambidae, Pyraloidea
Coenonympha glycerion
Coenonympha_glycerion, Nymphalidae, Papilionoidea
Coleophora deauratella
Coleophora_deauratella, Coleophoridae, Gelechioidea
Coleophora flavipennella
Coleophora_flavipennella, Coleophoridae, Gelechioidea
Colias behrii
Colias_behrii, Pieridae, Papilionoidea
Colias croceus
Colias_croceus, Pieridae, Papilionoidea
Colias eurytheme
Colias_eurytheme, Pieridae, Papilionoidea
Colias nastes
Colias_nastes, Pieridae, Papilionoidea
Colostygia pectinataria
Colostygia_pectinataria, Geometridae, Geometroidea
Conistra vaccinii
Conistra_vaccinii, Noctuidae, Noctuoidea
Conogethes punctiferalis
Conoget

Glaucopsyche_alexis, Lycaenidae, Papilionoidea
Globia sparganii
Globia_sparganii, Noctuidae, Noctuoidea
Gortyna flavago
Gortyna_flavago, Noctuidae, Noctuoidea
Grapholita molesta
Grapholita_molesta, Tortricidae, Tortricoidea
Griposia aprilina
Griposia_aprilina, Noctuidae, Noctuoidea
Gymnoscelis rufifasciata
Gymnoscelis_rufifasciata, Geometridae, Geometroidea
Habrosyne pyritoides
Habrosyne_pyritoides, Drepanidae, Drepanoidea
Hebomoia glaucippe
Hebomoia_glaucippe, Pieridae, Papilionoidea
Hecatera dysodea
Hecatera_dysodea, Noctuidae, Noctuoidea
Hedya salicella
Hedya_salicella, Tortricidae, Tortricoidea
Heliconius charithonia
Heliconius_charithonia, Nymphalidae, Papilionoidea
Heliconius nattereri
Heliconius_nattereri, Nymphalidae, Papilionoidea
Heliconius sara
Heliconius_sara, Nymphalidae, Papilionoidea
Helicoverpa armigera
Helicoverpa_armigera, Noctuidae, Noctuoidea
Helicoverpa assulta
Helicoverpa_assulta, Noctuidae, Noctuoidea
Helicoverpa zea
Helicoverpa_zea, Noctuidae, Noctuoidea
Helioth

Notocelia_uddmanniana, Tortricidae, Tortricoidea
Notodonta dromedarius
Notodonta_dromedarius, Notodontidae, Noctuoidea
Notodonta ziczac
Notodonta_ziczac, Notodontidae, Noctuoidea
Nudaria mundana
Nudaria_mundana, Erebidae, Noctuoidea
Nycteola revayana
Nycteola_revayana, Nolidae, Noctuoidea
Nymphalis c-album
Nymphalis_c-album, Nymphalidae, Papilionoidea
Nymphalis io
Nymphalis_io, Nymphalidae, Papilionoidea
Nymphalis polychloros
Nymphalis_polychloros, Nymphalidae, Papilionoidea
Nymphalis urticae
Nymphalis_urticae, Nymphalidae, Papilionoidea
Nymphula nitidulata
Nymphula_nitidulata, Crambidae, Pyraloidea
Ochlodes sylvanus
Ochlodes_sylvanus, Hesperiidae, Hesperioidea
Ochropacha duplaris
Ochropacha_duplaris, Drepanidae, Drepanoidea
Ochropleura leucogaster
Ochropleura_leucogaster, Noctuidae, Noctuoidea
Ochropleura plecta
Ochropleura_plecta, Noctuidae, Noctuoidea
Oegoconia quadripuncta
Oegoconia_quadripuncta, Autostichidae, Gelechioidea
Oeneis ivallda
Oeneis_ivallda, Nymphalidae, Papilionoidea


Thymelicus_acteon, Hesperiidae, Hesperioidea
Thymelicus sylvestris
Thymelicus_sylvestris, Hesperiidae, Hesperioidea
Tiliacea aurago
Tiliacea_aurago, Noctuidae, Noctuoidea
Timandra comae
Timandra_comae, Geometridae, Geometroidea
Tinea pellionella
Tinea_pellionella, Tineidae, Tineoidea
Tinea semifulvella
Tinea_semifulvella, Tineidae, Tineoidea
Tinea trinotella
Tinea_trinotella, Tineidae, Tineoidea
Tortricodes alternella
Tortricodes_alternella, Tortricidae, Tortricoidea
Tortrix viridana
Tortrix_viridana, Tortricidae, Tortricoidea
Trichoplusia ni
Trichoplusia_ni, Noctuidae, Noctuoidea
Trilocha varians
Trilocha_varians, Bombycidae, Bombycoidea
Trisateles emortualis
Trisateles_emortualis, Noctuidae, Noctuoidea
Troides aeacus
Troides_aeacus, Papilionidae, Papilionoidea
Troides oblongomaculatus
Troides_oblongomaculatus, Papilionidae, Papilionoidea
Tuta absoluta
Tuta_absoluta, Gelechiidae, Gelechioidea
Tyria jacobaeae
Tyria_jacobaeae, Erebidae, Noctuoidea
Udea ferrugalis
Udea_ferrugalis, Crambi