## Nordicmicroalgae NOMAC 2017.

Prepares synonyms and links to external species lists.

In [1]:
import pandas as pd
import numpy as np
import datetime

#### Import NOMAC 2017.

In [2]:
date_iso = datetime.datetime.now().date().isoformat()
df_nomac2017 = pd.read_csv('./out_data/' + 'NOMAC_2017_version_' + date_iso + '.txt',
                    sep='\t', encoding='cp1252',
                    na_values=['nan'], keep_default_na=False, )

In [3]:
df_nomac2017

Unnamed: 0,scientific_name,rank,author,parent,classification,source
0,Biota,Top,,,,NOMAC_2017_top_taxa
1,Eukaryota,Empire,Chatton,Biota,,NOMAC_2017_top_taxa
2,Prokaryota,Empire,Allsopp,Biota,,NOMAC_2017_top_taxa
3,Bacteria,Kingdom,Cavalier-Smith,Prokaryota,,NOMAC_2017_top_taxa
4,Chromista,Kingdom,Cavalier-Smith,Eukaryota,,NOMAC_2017_top_taxa
5,Plantae,Kingdom,Haeckel,Eukaryota,,NOMAC_2017_top_taxa
6,Protozoa,Kingdom,R.Owen,Eukaryota,,NOMAC_2017_top_taxa
7,Fungi,Kingdom,T.L.Jahn & F.F.Jahn ex R.T.Moore,Eukaryota,,NOMAC_2017_top_taxa
8,Eukaryota unassigned,Kingdom,,Eukaryota,,NOMAC_2017_top_taxa
9,Cyanobacteria,Phylum,"Stanier ex Cavalier-Smith, 2002",Bacteria,Bacteria - Cyanobacteria,Artsnavnebase 20170303


#### Import DynTaxa.

In [4]:
df_dyntaxa = pd.read_excel('./in_data/' + 'Biota_Dyntaxa_20170303.xlsx', 
                usecols=['TaxonId', 'Vetenskapligt namn', 'URL till taxoninformation'], 
#                          sep=';', encoding='cp1252',
                na_values=['nan'], keep_default_na=False, 
                )
    #
old_header=['TaxonId', 'Vetenskapligt namn', 'URL till taxoninformation'] 
new_header=['dyntaxa_id', 'dyntaxa_scientific_name', 'dyntaxa_url'] 
df_dyntaxa.rename(columns=dict(zip(old_header, new_header)), inplace=True)

In [5]:
df_dyntaxa

Unnamed: 0,dyntaxa_id,dyntaxa_scientific_name,dyntaxa_url
0,0,Biota,https://www.dyntaxa.se/taxon/info/0
1,6000006,Saprophytes,https://www.dyntaxa.se/taxon/info/6000006
2,6000005,Parasymbionts,https://www.dyntaxa.se/taxon/info/6000005
3,6000002,Crustose lichenes,https://www.dyntaxa.se/taxon/info/6000002
4,6000004,Foliose lichenes,https://www.dyntaxa.se/taxon/info/6000004
5,6000003,Fruticose lichenes,https://www.dyntaxa.se/taxon/info/6000003
6,6001047,Algae,https://www.dyntaxa.se/taxon/info/6001047
7,5000001,Animalia,https://www.dyntaxa.se/taxon/info/5000001
8,5000012,Acanthocephala,https://www.dyntaxa.se/taxon/info/5000012
9,4000035,Archiacanthocephala,https://www.dyntaxa.se/taxon/info/4000035


In [6]:
df_nomac2017_2 = pd.merge(df_nomac2017, df_dyntaxa,
                          how='left', 
                          left_on='scientific_name', 
                          right_on='dyntaxa_scientific_name')

In [7]:
df_nomac2017_2.dyntaxa_id = df_nomac2017_2.dyntaxa_id.dropna().apply(lambda x: str(int(x)) )

df_nomac2017_2

Unnamed: 0,scientific_name,rank,author,parent,classification,source,dyntaxa_id,dyntaxa_scientific_name,dyntaxa_url
0,Biota,Top,,,,NOMAC_2017_top_taxa,0,Biota,https://www.dyntaxa.se/taxon/info/0
1,Eukaryota,Empire,Chatton,Biota,,NOMAC_2017_top_taxa,,,
2,Prokaryota,Empire,Allsopp,Biota,,NOMAC_2017_top_taxa,,,
3,Bacteria,Kingdom,Cavalier-Smith,Prokaryota,,NOMAC_2017_top_taxa,5000052,Bacteria,https://www.dyntaxa.se/taxon/info/5000052
4,Chromista,Kingdom,Cavalier-Smith,Eukaryota,,NOMAC_2017_top_taxa,5000055,Chromista,https://www.dyntaxa.se/taxon/info/5000055
5,Plantae,Kingdom,Haeckel,Eukaryota,,NOMAC_2017_top_taxa,5000045,Plantae,https://www.dyntaxa.se/taxon/info/5000045
6,Protozoa,Kingdom,R.Owen,Eukaryota,,NOMAC_2017_top_taxa,5000060,Protozoa,https://www.dyntaxa.se/taxon/info/5000060
7,Fungi,Kingdom,T.L.Jahn & F.F.Jahn ex R.T.Moore,Eukaryota,,NOMAC_2017_top_taxa,5000039,Fungi,https://www.dyntaxa.se/taxon/info/5000039
8,Eukaryota unassigned,Kingdom,,Eukaryota,,NOMAC_2017_top_taxa,6011679,Eukaryota unassigned,https://www.dyntaxa.se/taxon/info/6011679
9,Cyanobacteria,Phylum,"Stanier ex Cavalier-Smith, 2002",Bacteria,Bacteria - Cyanobacteria,Artsnavnebase 20170303,5000053,Cyanobacteria,https://www.dyntaxa.se/taxon/info/5000053


In [9]:
df_nomac2017_2.to_csv('./out_data/' + 'NOMAC_2017_extended_links_synonyms.txt',
         sep='\t', encoding='cp1252', index = False)