In [1]:
from ete3 import Tree

In [2]:
#Read the phylogenetic tree
file_tree = 'RAxML_bipartitions.result_FIN4_raw_rooted_wBoots_4098mam1out_OK.newick'
t = Tree(file_tree, format=1)

In [4]:
# Make dictionary of orthologs ID vs latin names of species
latin = {}
with open('Metadata') as file1:
    for line in file1:
        dat = line.strip().split('\t')
        if not line.startswith('#'):
            latin[dat[0]] = [dat[1].replace(' ','_')]
# Make a list of species in the collection
species = {latin[i][0]:'' for i in latin}
print(len(species))

520


In [6]:
#Make a list of species which are in the phylogenetic tree
intree = {}
for node in t.traverse("postorder"):
    if len(node.name) > 20:
        intree['_'.join(node.name.split('_')[:2])] = node.name

In [7]:
# Manually curated dictionary connecting species of our dataset (key) with correct identifiers of the tree 
#  or the closest relative available in the tree (value) 
close_rel = {'Orientallactaga_bullata':'Allactaga_bullata_DIPODIDAE_RODENTIA',
             'Carlito_syrichta':'Tarsius_wallacei_TARSIIDAE_PRIMATES',
             'Cephalopachus_bancanus':'Tarsius_dentatus_TARSIIDAE_PRIMATES',
             'Hexaprotodon_liberiensis':'Choeropsis_liberiensis_HIPPOPOTAMIDAE_CETARTIODACTYLA',
             'Plecturocebus_donacophilus':'Callicebus_brunneus_PITHECIIDAE_PRIMATES',
             'Notamacropus_eugenii':'Onychogalea_fraenata_MACROPODIDAE_DIPROTODONTIA',
             'Dicerorhinus_sumatrensis_harrissoni':'Dicerorhinus_sumatrensis_RHINOCEROTIDAE_PERISSODACTYLA',
             'Bubalus_carabanensis':'Bubalus_depressicornis_BOVIDAE_CETARTIODACTYLA',
             'Alexandromys_oeconomus':'Microtus_middendorffii_CRICETIDAE_RODENTIA',
             'Acomys_kempi':'Acomys_spinosissimus_MURIDAE_RODENTIA',
             'Microtus_richardsoni_arvicoloides':'Microtus_richardsoni_CRICETIDAE_RODENTIA',
             'Ellobius_lutescens':'Ellobius_talpinus_CRICETIDAE_RODENTIA',
             'Neodon_shergylaensis':'Neodon_sikimensis_CRICETIDAE_RODENTIA',
             'Peromyscus_maniculatus_sonoriensis':'Peromyscus_maniculatus_CRICETIDAE_RODENTIA',
             'Peromyscus_maniculatus_bairdii':'Peromyscus_maniculatus_CRICETIDAE_RODENTIA',
             'Peromyscus_polionotus_subgriseus':'Peromyscus_polionotus_CRICETIDAE_RODENTIA',
             'Ceratotherium_simum_cottoni':'Ceratotherium_simum_RHINOCEROTIDAE_PERISSODACTYLA',
             'Peromyscus_californicus_insignis':'Peromyscus_californicus_CRICETIDAE_RODENTIA',
             'Cricetomys_ansorgei':'Cricetomys_emini_NESOMYIDAE_RODENTIA',
             'Mus_musculus_musculus':'Mus_musculus_MURIDAE_RODENTIA',
             'Mus_musculus_domesticus':'Mus_musculus_MURIDAE_RODENTIA',
             'Orycteropus_afer_afer':'Orycteropus_afer_ORYCTEROPODIDAE_TUBULIDENTATA',
             'Lama_glama_chaku':'Lama_glama_CAMELIDAE_CETARTIODACTYLA',
             'Vicugna_pacos_huacaya':'Vicugna_pacos_CAMELIDAE_CETARTIODACTYLA',
             'Lama_guanicoe_cacsilensis':'Lama_guanicoe_CAMELIDAE_CETARTIODACTYLA',
             'Vicugna_vicugna_mensalis':'Vicugna_vicugna_CAMELIDAE_CETARTIODACTYLA',
             'Marmota_marmota_marmota':'Marmota_marmota_SCIURIDAE_RODENTIA',
             'Elephas_maximus_indicus':'Elephas_maximus_ELEPHANTIDAE_PROBOSCIDEA',
             'Nanger_dama_ruficollis':'Nanger_dama_BOVIDAE_CETARTIODACTYLA',
             'Kobus_leche_leche':'Kobus_leche_BOVIDAE_CETARTIODACTYLA',
             'Mustela_putorius_furo':'Mustela_putorius_MUSTELIDAE_CARNIVORA',
             'Platanista_minor':'Platanista_gangetica_PLATANISTIDAE_CETARTIODACTYLA',
             'Hippotragus_niger_niger':'Hippotragus_niger_BOVIDAE_CETARTIODACTYLA',
             'Sus_scrofa_domesticus':'Sus_scrofa_SUIDAE_CETARTIODACTYLA',
             'Sus_scrofa_scrofa':'Sus_scrofa_SUIDAE_CETARTIODACTYLA',
             'Muntiacus_gongshanensis':'Muntiacus_muntjak_CERVIDAE_CETARTIODACTYLA',
             'Trichechus_manatus_latirostris':'Trichechus_manatus_TRICHECHIDAE_SIRENIA',
             'Odocoileus_hemionus_hemionus':'Odocoileus_hemionus_CERVIDAE_CETARTIODACTYLA',
             'Odocoileus_virginianus_texanus':'Odocoileus_virginianus_CERVIDAE_CETARTIODACTYLA',
             'Ovis_ammon_polii':'Ovis_ammon_BOVIDAE_CETARTIODACTYLA',
             'Ovis_nivicola_lydekkeri':'Ovis_nivicola_BOVIDAE_CETARTIODACTYLA',
             'Hipposideros_pendleburyi':'Hipposideros_pendelburyi_HIPPOSIDERIDAE_CHIROPTERA',
             'Enhydra_lutris_nereis':'Enhydra_lutris_MUSTELIDAE_CARNIVORA',
             'Giraffa_tippelskirchi':'Giraffa_camelopardalis_GIRAFFIDAE_CETARTIODACTYLA',
             'Bos_indicus_x_Bos_taurus':'Bos_indicus_BOVIDAE_CETARTIODACTYLA',
             'Bos_grunniens_x_Bos_taurus':'Bos_grunniens_BOVIDAE_CETARTIODACTYLA',
             'Enhydra_lutris_kenyoni':'Enhydra_lutris_MUSTELIDAE_CARNIVORA',
             'Giraffa_camelopardalis_rothschildi':'Giraffa_camelopardalis_GIRAFFIDAE_CETARTIODACTYLA',
             'Giraffa_camelopardalis_antiquorum':'Giraffa_camelopardalis_GIRAFFIDAE_CETARTIODACTYLA',
             'Bos_taurus_x_Bison_bison':'Bos_taurus_BOVIDAE_CETARTIODACTYLA',
             'Cervus_canadensis':'Cervus_elaphus_CERVIDAE_CETARTIODACTYLA',
             'Cervus_hanglu_yarkandensis':'Cervus_elaphus_CERVIDAE_CETARTIODACTYLA',
             'Bos_gaurus_x_Bos_taurus':'Bos_gaurus_BOVIDAE_CETARTIODACTYLA',
             'Microcebus_sp._3_GT-2019':'Microcebus_murinus_CHEIROGALEIDAE_PRIMATES',
             'Rangifer_tarandus_platyrhyncus':'Rangifer_tarandus_CERVIDAE_CETARTIODACTYLA',
             'Rangifer_tarandus_tarandus':'Rangifer_tarandus_CERVIDAE_CETARTIODACTYLA',
             'Spilogale_interrupta':'Spilogale_putorius_MEPHITIDAE_CARNIVORA',
             'Rangifer_tarandus_granti':'Rangifer_tarandus_CERVIDAE_CETARTIODACTYLA',
             'Gulo_gulo_luscus':'Gulo_gulo_MUSTELIDAE_CARNIVORA',
             'Neophocaena_asiaeorientalis_sunameri':'Neophocaena_asiaeorientalis_PHOCOENIDAE_CETARTIODACTYLA',
             'Neophocaena_asiaeorientalis_asiaeorientalis':'Neophocaena_asiaeorientalis_PHOCOENIDAE_CETARTIODACTYLA',
             'Balaenoptera_acutorostrata_scammoni':'Balaenoptera_acutorostrata_BALAENOPTERIDAE_CETARTIODACTYLA',
             'Balaenoptera_ricei':'Balaenoptera_edeni_BALAENOPTERIDAE_CETARTIODACTYLA',
             'Trachypithecus_phayrei_crepuscula':'Trachypithecus_phayrei_CERCOPITHECIDAE_PRIMATES',
             'Hippopotamus_amphibius_kiboko':'Hippopotamus_amphibius_HIPPOPOTAMIDAE_CETARTIODACTYLA',
             'Gorilla_gorilla_gorilla':'Gorilla_gorilla_HOMINIDAE_PRIMATES',
             'Ursus_thibetanus_thibetanus':'Ursus_thibetanus_URSIDAE_CARNIVORA',
             'Macaca_fuscata_fuscata':'Macaca_fuscata_CERCOPITHECIDAE_PRIMATES',
             'Macaca_thibetana_thibetana':'Macaca_thibetana_CERCOPITHECIDAE_PRIMATES',
             'Cercopithecus_albogularis':'Cercopithecus_diana_CERCOPITHECIDAE_PRIMATES',
             'Dicerorhinus_sumatrensis_sumatrensis':'Dicerorhinus_sumatrensis_RHINOCEROTIDAE_PERISSODACTYLA',
             'Odobenus_rosmarus_divergens':'Odobenus_rosmarus_ODOBENIDAE_CARNIVORA',
             'Equus_quagga_burchellii':'Equus_quagga_EQUIDAE_PERISSODACTYLA',
             'Proteles_cristata_cristata':'Proteles_cristata_HYAENIDAE_CARNIVORA',
             'Pusa_hispida_saimensis':'Pusa_hispida_PHOCIDAE_CARNIVORA',
             'Saimiri_boliviensis_boliviensis':'Saimiri_boliviensis_CEBIDAE_PRIMATES',
             'Ceratotherium_simum_simum':'Ceratotherium_simum_RHINOCEROTIDAE_PERISSODACTYLA',
             'Diceros_bicornis_minor':'Diceros_bicornis_RHINOCEROTIDAE_PERISSODACTYLA',
             'Colobus_angolensis_palliatus':'Colobus_angolensis_CERCOPITHECIDAE_PRIMATES',
             'Panthera_tigris_jacksoni':'Panthera_tigris_FELIDAE_CARNIVORA',
             'Panthera_tigris_tigris':'Panthera_tigris_FELIDAE_CARNIVORA',
             'Puma_yagouaroundi':'Puma_concolor_FELIDAE_CARNIVORA',
             'Ailurus_styani':'Ailurus_fulgens_AILURIDAE_CARNIVORA',
             'Molossus_nigricans':'Molossus_molossus_MOLOSSIDAE_CHIROPTERA',
             'Tupaia_chinensis':'Tupaia_glis_TUPAIIDAE_SCANDENTIA',
             'Solenodon_paradoxus_woodi':'Solenodon_paradoxus_SOLENODONTIDAE_EULIPOTYPHLA',
             'Otocyon_megalotis_megalotis':'Otocyon_megalotis_CANIDAE_CARNIVORA',
             'Canis_lupus_dingo':'Canis_lupus_CANIDAE_CARNIVORA',
             'Canis_lupus_familiaris':'Canis_lupus_CANIDAE_CARNIVORA',
             'Leopardus_geoffroyi':'Leopardus_colocolo_FELIDAE_CARNIVORA',
             'Pteropus_pselaphon':'Pteropus_conspicillatus_PTEROPODIDAE_CHIROPTERA',
             'Mus_musculus_castaneus':'Mus_musculus_MURIDAE_RODENTIA',
             'Mus_musculus_molossinus':'Mus_musculus_MURIDAE_RODENTIA',
             'Oryctolagus_cuniculus_cuniculus':'Oryctolagus_cuniculus_LEPORIDAE_LAGOMORPHA',
             'Rhabdomys_dilectus':'Rhabdomys_pumilio_MURIDAE_RODENTIA',
             'Ovis_ammon_polii_x_Ovis_aries':'Ovis_ammon_BOVIDAE_CETARTIODACTYLA',
             'Pteronotus_parnellii_mesoamericanus':'Pteronotus_parnellii_MORMOOPIDAE_CHIROPTERA',
             'Petaurus_breviceps_papuanus':'Petaurus_breviceps_PETAURIDAE_DIPROTODONTIA',
             'Bettongia_penicillata_ogilbyi':'Bettongia_penicillata_POTOROIDAE_DIPROTODONTIA',
             'Bubalus_bubalis':'Bubalus_arnee_BOVIDAE_CETARTIODACTYLA', 
             'Cervus_canadensis':'Cervus_elaphus_CERVIDAE_CETARTIODACTYLA', 
             'Physeter_catodon':'Physeter_macrocephalus_PHYSETERIDAE_CETARTIODACTYLA', 
             'Leopardus_geoffroyi':'Leopardus_tigrinus_FELIDAE_CARNIVORA',
             'Puma_yagouaroundi':'Puma_concolor_FELIDAE_CARNIVORA',
             'Neogale_vison':'Neovison_vison_MUSTELIDAE_CARNIVORA',
             'Equus_asinus':'Equus_africanus_EQUIDAE_PERISSODACTYLA',
             
             'Cebus_imitator':'Cebus_capucinus_CEBIDAE_PRIMATES', 
             'Piliocolobus_tephrosceles':'Procolobus_badius_CERCOPITHECIDAE_PRIMATES', 
             
             'Cricetulus_griseus':'Cricetulus_kamensis_CRICETIDAE_RODENTIA', 
             'Grammomys_surdaster':'Grammomys_dolichurus_MURIDAE_RODENTIA', 
             'Nannospalax_galili':'Spalax_ehrenbergi_SPALACIDAE_RODENTIA',
             'Tupaia_chinensis':'Tupaia_belangeri_TUPAIIDAE_SCANDENTIA',
            'Mus_musculus':'Mus_musculus_MURIDAE_RODENTIA',
            'Homo_sapiens':'Homo_sapiens_HOMINIDAE_PRIMATES'}

In [8]:
#Add to dictionary identifiers of the tree
#latin = {orthologID:[species name, treeID]}

for item in latin:
    if latin[item][0] in intree:
        latin[item].append(intree[latin[item][0]])
    elif latin[item][0] in close_rel:
        latin[item].append(close_rel[latin[item][0]])
    else:
        print(item, latin[item][0])

In [9]:
#Make a list of the tree identifiers for successful connections 
to_prune = list(set([latin[item][1] for item in latin]))
print(len(to_prune))

477


In [12]:
#Prune the initial tree to only species in our dataset
t.prune(to_prune)

In [57]:
print(t)


                                                /-Dinomys_branickii_DINOMYIDAE_RODENTIA
                                             /-|
                                            |   \-Chinchilla_lanigera_CHINCHILLIDAE_RODENTIA
                                            |
                                          /-|      /-Octodon_degus_OCTODONTIDAE_RODENTIA
                                         |  |   /-|
                                         |  |  |   \-Ctenomys_sociabilis_CTENOMYIDAE_RODENTIA
                                         |   \-|
                                       /-|     |   /-Myocastor_coypus_MYOCASTORIDAE_RODENTIA
                                      |  |      \-|
                                      |  |         \-Capromys_pilorides_CAPROMYIDAE_RODENTIA
                                      |  |
                                      |  |   /-Erethizon_dorsatum_ERETHIZONTIDAE_RODENTIA
                                      |   \-|
                      

In [15]:
t.write(format=1, outfile="./PhyloTree/all_species_tree_pruned.nw")

Make a tree of archetypes only

In [5]:
t = Tree('./PhyloTree/all_species_tree_pruned.nw', format=1)

In [3]:
archetypes = ['GCF_015852505.1_mTacAcu1.pri', 'GCF_016433145.1_AgileGrace', 'GCA_016432865.2_AdamAnt_v2',
         'GCF_000313985.2_ASM31398v2', 'GCA_000152225.2_Pcap_2.0', 'GCF_014898055.1_MPIMG_talOcc4',
         'GCF_000296755.1_EriEur2.0', 'GCA_029834395.2_SorCin_2.1', 'Human_NEAT1', 'GCA_013371645.1_OryCun3.0',
         'GCF_014633375.1_OchPri4.0', 'GCF_020740685.1_mJacJac1.mat.Y.cur', 'GCA_023101885.1_KIZ_TCIN_v1.0',
         'Mouse_NEAT1','GCF_019054845.1_ASM1905484v1', 'GCA_004027875.1_AplRuf_v1_BIUU']

In [4]:
tax = []
with open('./Key_Files/Metadata') as file1:
    for line in file1:
        dat = line.strip().split('\t')
        if not line.startswith('#'):
            if dat[0] in archetypes:
                if not dat[15]:
                    print(dat[0])
                else:
                    tax.append(dat[15])

In [6]:
t.prune(tax)
print(t)
t.write(format=1, outfile="./PhyloTree/archetypes.nw")


                        /-Dipodomys_spectabilis_HETEROMYIDAE_RODENTIA
                       |
                     /-|      /-Typhlomys_cinereus_PLATACANTHOMYIDAE_RODENTIA
                    |  |   /-|
                    |   \-|   \-Mus_musculus_MURIDAE_RODENTIA
                  /-|     |
                 |  |      \-Jaculus_jaculus_DIPODIDAE_RODENTIA
                 |  |
               /-|   \-Aplodontia_rufa_APLODONTIIDAE_RODENTIA
              |  |
              |  |   /-Ochotona_princeps_OCHOTONIDAE_LAGOMORPHA
            /-|   \-|
           |  |      \-Oryctolagus_cuniculus_LEPORIDAE_LAGOMORPHA
           |  |
         /-|   \-Homo_sapiens_HOMINIDAE_PRIMATES
        |  |
        |  |      /-Erinaceus_europaeus_ERINACEIDAE_EULIPOTYPHLA
        |  |   /-|
      /-|   \-|   \-Sorex_cinereus_SORICIDAE_EULIPOTYPHLA
     |  |     |
     |  |      \-Talpa_occidentalis_TALPIDAE_EULIPOTYPHLA
     |  |
   /-|  |   /-Procavia_capensis_PROCAVIIDAE_HYRACOIDEA
  |  |   \-|
  |  |      \-

Retrieving phylogenetic distance from the tree for all species to echidna 

In [5]:
tax_class = {}
latin = {}
#Dictionary connecting orthologsID with tree IDs
conn = {}
with open('Metadata') as file1:
    for line in file1:
        dat = line.strip().split('\t')
        if not line.startswith('#') and dat[3] == '1':
            dat = line.strip().split('\t')          
            latin[dat[0]] = dat[1]
            tax_class[dat[0]] = dat[19].upper()            
            if dat[14] == '1':
                conn[dat[0]] = dat[15]


In [5]:
out = open('phylodist_to_echidna','w')
#Identifier of echidna
item = 'GCF_015852505.1_mTacAcu1.pri'

for item2 in conn:
    if item2 != item:
        try:
            d = t.get_distance(conn[item],conn[item2])
            out.write('\t'.join([item,item2,latin[item2],tax_class[item2],str(d)]) + '\n')
        except:
            pass
out.close()