# NSL export to CoLDP: algae

In [1]:
import pandas as pd
import numpy as np

namefile = 'data/algae/AANI-names-2023-06-24-3420.csv'
taxonfile = 'data/algae/AAL-taxon-2023-06-24-3606.csv'

### Names

In [2]:
df_names = pd.read_csv(namefile)

# Remove excess columns
columns = [
    'scientificNameID', 
    'nameAccordingToID', 
    'taxonRank', 
    'scientificName', 
    'genericName', 
    'specificEpithet', 
    'infraspecificEpithet', 
    'scientificNameAuthorship', 
    'nomenclaturalStatus',
    'originalNameUsageID', 
    'originalNameUsage'
]

df1 = df_names[columns]

# Instance ID is in nameAccordingToID for reasons unfathomable; rename to taxonID
df1 = df1.rename(columns={'nameAccordingToID': 'taxonID'})

# Filter for names with originalNameUsage
# Replace instance IDs in originalNameUsageID with scientificNameIDs; we will not need taxonID anymore after that
df2 = df1[~df1['originalNameUsageID'].isna()].merge(df1, how='left', left_on='originalNameUsageID', right_on='taxonID')[[
    'scientificNameID_x', 
    'taxonRank_x', 
    'scientificName_x', 
    'genericName_x', 
    'specificEpithet_x', 
    'infraspecificEpithet_x', 
    'scientificNameAuthorship_x', 
    'nomenclaturalStatus_x', 
    'scientificNameID_y', 
    'scientificName_y'
]]

column_mappings = {
    'scientificNameID_x': 'scientificNameID',
    'scientificName_x': 'scientificName',
    'taxonRank_x': 'taxonRank',
    'genericName_x': 'genericName',
    'specificEpithet_x': 'specificEpithet',
    'infraspecificEpithet_x': 'infraspecificEpithet',
    'scientificNameAuthorship_x': 'scientificNameAuthorship',
    'nomenclaturalStatus_x': 'nomenclaturalStatus',
    'scientificNameID_y': 'originalNameUsageID',
    'scientificName_y': 'originalNameUsage'
}

df2 = df2.rename(columns=column_mappings)

# Filter for names without originalNameUsage
df3 = df_names[df_names['originalNameUsage'].isna()][columns]
df3.drop(columns=['nameAccordingToID'], inplace=True)

# Merge dataframes
df5 = pd.concat([df2, df3])
df5 = df5.sort_values(by='scientificName')

# Make taxonRank lowercase
df5['taxonRank'] = df5['taxonRank'].str.lower()

# Map Latin rank names to less wankerish English ones
ranks = {
    'genus': 'genus',
    'species': 'species',
    'familia': 'family',
    'subfamilia': 'subfamily',
    'ordo': 'order',
    'subordo': 'suborder',
    'superordo': 'superorder',
    'subspecies': 'subspecies',
    'classis': 'class',
    'subclassis': 'subclass',
    'subdivision': 'subphylum',
    'subbdivision': 'subphylum',
    'varietas': 'variety',
    'subvarietas': 'subvariety',
    'subgenus': 'subgenus',
    'superspecies': 'superspecies',
    'forma': 'form',
    'division': 'phylum',
    'regnum': 'kingdom',
    'special form': 'special form',
    'sectio': 'section',
    'regio': 'domain',
    '[unknown]': '[unknown]',
    '[unranked]': '[unranked]'
}

taxon_ranks = []
uninomials = []
generic_names = []
for index, row in df5.iterrows():
    taxon_ranks.append(ranks[row['taxonRank']])

    uninomial = np.NaN
    generic_name = row['genericName']

    if not isinstance(row['genericName'], str):
        if not isinstance(row['scientificNameAuthorship'], str):
            uninomial = row['scientificName']
        else:
            uninomial = row['scientificName'][0:len(row['scientificName'])-len(row['scientificNameAuthorship'])-1]

    if row['taxonRank'] == 'genus':
        uninomial = row['genericName']
        generic_name = np.NaN

    uninomials.append(uninomial)
    generic_names.append(generic_name)

df5['taxonRank'] = taxon_ranks
df5['genericName'] = generic_names
df5['uninomial'] = uninomials

# Add nomenclaturalCode; 'ICN' in GBIF vocab. but 'botanical' in CoLDP
df5['code'] = 'botanical'

df5 = df5[[
    'scientificNameID', 
    'taxonRank', 
    'scientificName', 
    'uninomial',
    'genericName', 
    'specificEpithet', 
    'infraspecificEpithet', 
    'scientificNameAuthorship', 
    'code', 
    'nomenclaturalStatus', 
    'originalNameUsageID', 
    'originalNameUsage'
  ]]

df_nam = df5
df5

Unnamed: 0,scientificNameID,taxonRank,scientificName,uninomial,genericName,specificEpithet,infraspecificEpithet,scientificNameAuthorship,code,nomenclaturalStatus,originalNameUsageID,originalNameUsage
1,https://id.biodiversity.org.au/name/algae/2008...,genus,Acanthoceras Honigm.,Acanthoceras,,,,Honigm.,botanical,,,
0,https://id.biodiversity.org.au/name/algae/2008...,species,Acanthoceras zachariasii (Brun) Simonsen,,Acanthoceras,zachariasii,,(Brun) Simonsen,botanical,,https://id.biodiversity.org.au/name/algae/2008...,Attheya zachariasii Brun
19691,https://id.biodiversity.org.au/name/algae/2008...,family,Acanthocerataceae,Acanthocerataceae,,,,,botanical,,,
5198,https://id.biodiversity.org.au/name/algae/2000...,genus,Acanthococcus Hook.f. & Harv.,Acanthococcus,,,,Hook.f. & Harv.,botanical,,,
1268,https://id.biodiversity.org.au/name/algae/2000...,species,Acanthococcus acicularis (J.Agardh) J.Agardh,,Acanthococcus,acicularis,,(J.Agardh) J.Agardh,botanical,,https://id.biodiversity.org.au/name/algae/2001...,Cystoclonium aciculare J.Agardh
...,...,...,...,...,...,...,...,...,...,...,...,...
9619,https://id.biodiversity.org.au/name/algae/2001...,species,[Genus Not Recorded] lichenoides,,[Genus Not Recorded],lichenoides,,,botanical,,,
9618,https://id.biodiversity.org.au/name/algae/2001...,form,[Genus Not Recorded] lichenoides f. coronopifolia,,[Genus Not Recorded],lichenoides,coronopifolia,,botanical,,,
20097,https://id.biodiversity.org.au/name/algae/2000...,class,[Uncertain Class Affinity],[Uncertain Class Affinity],,,,,botanical,,,
20157,https://id.biodiversity.org.au/name/algae/2000...,order,[Uncertain Order Affinity],[Uncertain Order Affinity],,,,,botanical,,,


### Name relations

In [3]:
df_name_relationships = df5[~df5['originalNameUsageID'].isna()][['scientificNameID', 'scientificName', 'originalNameUsageID', 'originalNameUsage']]

df_namerel = df_name_relationships[['scientificNameID', 'originalNameUsageID']]
df_namerel['type'] = 'basionym'
df_namerel.rename(columns={'scientificNameID': 'nameID', 'originalNameUsageID': 'relatedNameID'}, inplace=True)

# Remove originalNameUsage columns from Names
drop_columns = [
    'originalNameUsageID',
    'originalNameUsage'
]

rename_columns = {
    'scientificNameID': 'ID',
    'taxonRank': 'rank',
    'scientificName': 'scientificName',
    'genericName': 'genus',
    'scientificNameAuthorship': 'authorship',
    'nomenclaturalStatus': 'status'
}

df5.drop(columns=drop_columns, inplace=True)
df5.rename(columns=rename_columns, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_namerel['type'] = 'basionym'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_namerel.rename(columns={'scientificNameID': 'nameID', 'originalNameUsageID': 'relatedNameID'}, inplace=True)


### Taxa

In [4]:
# Read Taxon export into dataframe
df_taxa = pd.read_csv(taxonfile)
df_taxa = df_taxa[df_taxa['nameType'] == 'scientific']

# Create dataframe with IDs
# This is used later to replace tree element IDs (in taxonID) with instance IDs (in taxonConceptID)
df_id = df_taxa[['taxonID', 'taxonConceptID']]

In [5]:
# Filter on accepted names; these are the taxa
df_tax = df_taxa[df_taxa['taxonomicStatus'] == 'accepted']

# Replace tree element IDs in taxonID with instance IDs (see above)
df_tax['taxonID'] = df_tax['taxonConceptID']

# Replace tree element IDs in parentNameUsageID with instance IDs
df_tax = df_tax.merge(df_id, how='left', left_on='parentNameUsageID', right_on='taxonID')
df_tax['parentNameUsageID'] = df_tax['taxonConceptID_y']
df_tax.drop(columns=['taxonID_y', 'taxonConceptID_y'], inplace=True)
df_tax.rename(columns={'taxonID_x': 'taxonID', 'taxonConceptID_x': 'taxonConceptID'}, inplace=True)

df_tax = df_tax[['taxonID',
 'scientificNameID',
 'scientificName',
 'scientificNameAuthorship',
 'nameAccordingTo',
 'nameAccordingToID',
 'parentNameUsageID',
 'taxonRank',
 'taxonRankSortOrder',
 'kingdom',
 'class',
 'subclass',
 'family',
 'taxonConceptID',
 'taxonRemarks',
 'higherClassification'
]]

# Translate ranks into English
df_tax['taxonRank'] = df_tax['taxonRank'].str.lower()
ranks = {
    'genus': 'genus',
    'species': 'species',
    'familia': 'family',
    'subfamilia': 'subfamily',
    'ordo': 'order',
    'subordo': 'suborder',
    'superordo': 'superorder',
    'subspecies': 'subspecies',
    'classis': 'class',
    'subclassis': 'subclass',
    'subdivision': 'subphylum',
    'subbdivision': 'subphylum',
    'varietas': 'variety',
    'subgenus': 'subgenus',
    'superspecies': 'superspecies',
    'forma': 'form',
    'division': 'phylum',
    'regnum': 'kingdom',
    'special form': 'special form',
    'sectio': 'section',
    'regio': 'domain',
    '[unknown]': '[unknown]'
}

taxon_ranks = []
for index, row in df_tax.iterrows():
    taxon_ranks.append(ranks[row['taxonRank']])

df_tax['taxonRank'] = taxon_ranks


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_tax['taxonID'] = df_tax['taxonConceptID']


#### Higher classification

In [6]:
# Create dictionary with higher taxa from instance ID
def get_higher_taxa(id, higher={}):
    row = df_tax.loc[df_tax['taxonID'] == id]

    if len(row) > 0:
        parent = row[[
            'taxonRank', 
            'scientificName', 
            'scientificNameAuthorship', 
            'parentNameUsageID']].to_dict(orient='records')[0]
        
        if isinstance(parent['scientificNameAuthorship'], str):
            higher[parent['taxonRank']] = parent['scientificName'][0:len(parent['scientificName'])-len(parent['scientificNameAuthorship'])-1]
        else:
            higher[parent['taxonRank']] = parent['scientificName']

        if isinstance(parent['parentNameUsageID'], str):
            return get_higher_taxa(parent['parentNameUsageID'], higher)
        else:
            return higher
    else:
        return higher

In [7]:
# Create higher classification
def create_higher_classification(id):
    higher = get_higher_taxa(id, {})
    keys = list(higher.keys())

    hcl = {}

    hcl['taxonID'] = id
    hcl['species'] = higher['species'] if 'species' in keys else np.NaN
    hcl['section'] = higher['section'] if 'section' in keys else np.NaN
    hcl['subgenus'] = higher['subgenus'] if 'subgenus' in keys else np.NaN
    hcl['genus'] = higher['genus'] if 'genus' in keys else np.NaN
    hcl['subtribe'] = higher['subtribe'] if 'subtribe' in keys else np.NaN
    hcl['tribe'] = higher['tribe'] if 'tribe' in keys else np.NaN
    hcl['subfamily'] = higher['subfamily'] if 'subfamily' in keys else np.NaN
    hcl['family'] = higher['family'] if 'family' in keys else np.NaN
    hcl['superfamily'] = higher['superfamily'] if 'superfamily' in keys else np.NaN
    hcl['suborder'] = higher['suborder'] if 'suborder' in keys else np.NaN
    hcl['order'] = higher['order'] if 'order' in keys else np.NaN
    hcl['subclass'] = higher['subclass'] if 'subclass' in keys else np.NaN
    hcl['class'] = higher['class'] if 'class' in keys else np.NaN
    hcl['subphylum'] = higher['subphylum'] if 'subphylum' in keys else np.NaN
    hcl['phylum'] = higher['phylum'] if 'phylum' in keys else np.NaN
    hcl['kingdom'] = higher['kingdom'] if 'kingdom' in keys else np.NaN

    cl = list(higher.values())
    for i in range(len(cl) // 2):
        cl[i], cl[-1 - i] = cl[-1 - i], cl[i]

    hcl['classification'] = ' | '.join(cl)

    return hcl

create_higher_classification('https://id.biodiversity.org.au/instance/lichen/30043034')


{'taxonID': 'https://id.biodiversity.org.au/instance/lichen/30043034',
 'species': nan,
 'section': nan,
 'subgenus': nan,
 'genus': nan,
 'subtribe': nan,
 'tribe': nan,
 'subfamily': nan,
 'family': nan,
 'superfamily': nan,
 'suborder': nan,
 'order': nan,
 'subclass': nan,
 'class': nan,
 'subphylum': nan,
 'phylum': nan,
 'kingdom': nan,
 'classification': ''}

In [8]:
cl = []
for index, row in df_tax.iterrows():
    cl.append(create_higher_classification(row['taxonID']))

df_higher = pd.DataFrame.from_dict(cl)

df_higher = df_higher.merge(df_tax[['taxonID', 'scientificName', 'taxonRank']], how='left', left_on='taxonID', right_on='taxonID')

df_higher = df_higher[['taxonID',
 'scientificName',
 'taxonRank',
 'kingdom',
 'phylum',
 'subphylum',
 'class',
 'subclass',
 'order',
 'suborder',
 'superfamily',
 'family',
 'tribe',
 'subfamily',
 'subtribe',
 'genus',
 'subgenus',
 'section',
 'species',
 'classification']]

df_higher.rename(columns={'classification': 'higherClassification'}, inplace=True)

df_higher

Unnamed: 0,taxonID,scientificName,taxonRank,kingdom,phylum,subphylum,class,subclass,order,suborder,superfamily,family,tribe,subfamily,subtribe,genus,subgenus,section,species,higherClassification
0,https://id.biodiversity.org.au/instance/algae/...,Eukaryota,kingdom,Eukaryota,,,,,,,,,,,,,,,,Eukaryota
1,https://id.biodiversity.org.au/instance/algae/...,Anthozoa,phylum,Eukaryota,Anthozoa,,,,,,,,,,,,,,,Eukaryota | Anthozoa
2,https://id.biodiversity.org.au/instance/algae/...,Coraliidae,family,Eukaryota,Anthozoa,,,,,,,Coraliidae,,,,,,,,Eukaryota | Anthozoa | Coraliidae
3,https://id.biodiversity.org.au/instance/algae/...,Bacillariophyta,phylum,Eukaryota,Bacillariophyta,,,,,,,,,,,,,,,Eukaryota | Bacillariophyta
4,https://id.biodiversity.org.au/instance/algae/...,Bacillariophyceae,class,Eukaryota,Bacillariophyta,,Bacillariophyceae,,,,,,,,,,,,,Eukaryota | Bacillariophyta | Bacillariophyceae
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3006,https://id.biodiversity.org.au/instance/algae/...,Vaucheriales,order,Eukaryota,Xanthophyta,,Xanthophyceae,,Vaucheriales,,,,,,,,,,,Eukaryota | Xanthophyta | Xanthophyceae | Vauc...
3007,https://id.biodiversity.org.au/instance/algae/...,Vaucheriaceae,family,Eukaryota,Xanthophyta,,Xanthophyceae,,Vaucheriales,,,Vaucheriaceae,,,,,,,,Eukaryota | Xanthophyta | Xanthophyceae | Vauc...
3008,https://id.biodiversity.org.au/instance/algae/...,Vaucheria DC.,genus,Eukaryota,Xanthophyta,,Xanthophyceae,,Vaucheriales,,,Vaucheriaceae,,,,Vaucheria,,,,Eukaryota | Xanthophyta | Xanthophyceae | Vauc...
3009,https://id.biodiversity.org.au/instance/algae/...,Vaucheria caloundrensis Cribb,species,Eukaryota,Xanthophyta,,Xanthophyceae,,Vaucheriales,,,Vaucheriaceae,,,,Vaucheria,,,Vaucheria caloundrensis,Eukaryota | Xanthophyta | Xanthophyceae | Vauc...


#### Synonyms

In [9]:
# Filter on synonyms
df_syn = df_taxa[df_taxa['taxonomicStatus'].isin(['synonym', 'nomenclatural synonym', 'taxonomic synonym']) & ~df_taxa['acceptedNameUsageID'].isna()][[
    'taxonID',
    'scientificNameID',
    'scientificName',
    'acceptedNameUsageID',
    'acceptedNameUsage',
    'taxonomicStatus'
]]

# Replace tree element IDs in acceptedNameUsageID with instance IDs
df_syn = df_syn.merge(df_id, how='left', left_on='acceptedNameUsageID', right_on='taxonID')

df_syn.drop(columns=['acceptedNameUsageID', 'taxonID_y'], inplace=True)
df_syn.rename(columns={
    'taxonID_x': 'ID',
    'taxonConceptID': 'taxonID',
    'scientificNameID': 'nameID'    
}, inplace=True)
df_syn = df_syn[['ID', 'taxonID', 'nameID', 'scientificName', 'acceptedNameUsage', 'taxonomicStatus']]

# Get accepted names
df_dwc_syn = df_syn.merge(df_tax[['taxonID', 'scientificName']], left_on='taxonID', right_on='taxonID')

df_dwc_syn.drop(columns=['taxonID', 'nameID', 'acceptedNameUsage'], inplace=True)

df_dwc_syn.rename(columns={
    'ID': 'taxonID',
    'scientificName_x': 'scientificName',
    'scientificName_y': 'acceptedNameUsage'
}, inplace=True)

df_syn.drop(columns=['scientificName', 'acceptedNameUsage', 'taxonomicStatus'], inplace=True)

df_syn

Unnamed: 0,ID,taxonID,nameID
0,https://id.biodiversity.org.au/instance/algae/...,http://id.biodiversity.org.au/instance/algae/2...,https://id.biodiversity.org.au/name/algae/2015...
1,https://id.biodiversity.org.au/instance/algae/...,http://id.biodiversity.org.au/instance/algae/2...,https://id.biodiversity.org.au/name/algae/2015...
2,https://id.biodiversity.org.au/instance/algae/...,http://id.biodiversity.org.au/instance/algae/2...,https://id.biodiversity.org.au/name/algae/2015...
3,https://id.biodiversity.org.au/instance/algae/...,http://id.biodiversity.org.au/instance/algae/2...,https://id.biodiversity.org.au/name/algae/2015...
4,https://id.biodiversity.org.au/instance/algae/...,http://id.biodiversity.org.au/instance/algae/2...,https://id.biodiversity.org.au/name/algae/2008...
...,...,...,...
1101,https://id.biodiversity.org.au/instance/algae/...,http://id.biodiversity.org.au/instance/algae/2...,https://id.biodiversity.org.au/name/algae/2000...
1102,https://id.biodiversity.org.au/instance/algae/...,http://id.biodiversity.org.au/instance/algae/2...,https://id.biodiversity.org.au/name/algae/2000...
1103,https://id.biodiversity.org.au/instance/algae/...,http://id.biodiversity.org.au/instance/algae/2...,https://id.biodiversity.org.au/name/algae/2000...
1104,https://id.biodiversity.org.au/instance/algae/...,http://id.biodiversity.org.au/instance/algae/2...,https://id.biodiversity.org.au/name/algae/2001...


#### Remove excess columns

In [10]:
df_tax = df_tax[['taxonID',
 'scientificNameID',
 'nameAccordingToID',
 'parentNameUsageID',
 'taxonRemarks']]

df_tax.rename(columns={
    'taxonID': 'ID',
    'scientificNameID': 'nameID',
    'nameAccordingToID': 'accordingToID',
    'parentNameUsageID': 'parentID',
    'taxonRemarks': 'remarks'
}, inplace=True)

df_tax

Unnamed: 0,ID,nameID,accordingToID,parentID,remarks
0,https://id.biodiversity.org.au/instance/algae/...,https://id.biodiversity.org.au/name/algae/2020...,https://id.biodiversity.org.au/reference/aani/...,,
1,https://id.biodiversity.org.au/instance/algae/...,https://id.biodiversity.org.au/name/algae/2001...,https://id.biodiversity.org.au/reference/aani/...,https://id.biodiversity.org.au/instance/algae/...,
2,https://id.biodiversity.org.au/instance/algae/...,https://id.biodiversity.org.au/name/algae/2001...,https://id.biodiversity.org.au/reference/aani/...,https://id.biodiversity.org.au/instance/algae/...,
3,https://id.biodiversity.org.au/instance/algae/...,https://id.biodiversity.org.au/name/algae/2016...,https://id.biodiversity.org.au/reference/aani/...,https://id.biodiversity.org.au/instance/algae/...,
4,https://id.biodiversity.org.au/instance/algae/...,https://id.biodiversity.org.au/name/algae/2015...,https://id.biodiversity.org.au/reference/aani/...,https://id.biodiversity.org.au/instance/algae/...,
...,...,...,...,...,...
3006,https://id.biodiversity.org.au/instance/algae/...,https://id.biodiversity.org.au/name/algae/2000...,https://id.biodiversity.org.au/reference/aani/...,https://id.biodiversity.org.au/instance/algae/...,
3007,https://id.biodiversity.org.au/instance/algae/...,https://id.biodiversity.org.au/name/algae/2009...,https://id.biodiversity.org.au/reference/aani/...,https://id.biodiversity.org.au/instance/algae/...,
3008,https://id.biodiversity.org.au/instance/algae/...,https://id.biodiversity.org.au/name/algae/2000...,https://id.biodiversity.org.au/reference/aani/...,https://id.biodiversity.org.au/instance/algae/...,
3009,https://id.biodiversity.org.au/instance/algae/...,https://id.biodiversity.org.au/name/algae/2001...,https://id.biodiversity.org.au/reference/aani/...,https://id.biodiversity.org.au/instance/algae/...,


### Reference

In [11]:
df_ref = df_taxa[['nameAccordingToID', 'nameAccordingTo']]
df_ref.drop_duplicates(inplace=True)
df_ref.rename(columns={'nameAccordingToID': 'ID', 'nameAccordingTo': 'citation'}, inplace=True)
df_ref

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_ref.drop_duplicates(inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_ref.rename(columns={'nameAccordingToID': 'ID', 'nameAccordingTo': 'citation'}, inplace=True)


Unnamed: 0,ID,citation
0,https://id.biodiversity.org.au/reference/aani/...,"Algal Subcommittee (2019), CHAH Algal Subcommi..."
8,https://id.biodiversity.org.au/reference/aani/...,"Guiry, M.D. & Guiry, G M. (2021), -. Algaebase..."
10,https://id.biodiversity.org.au/reference/aani/...,"McCarthy, P. (2012), Census of Australian Mari..."
16,https://id.biodiversity.org.au/reference/aani/...,"Algal Subcommittee (2021), CHAH Algal Subcommi..."
35,https://id.biodiversity.org.au/reference/aani/...,"Guiry, M.D. & Guiry, G M. (2022), -. Algaebase..."
41,https://id.biodiversity.org.au/reference/aani/...,"Bukhtiyarova, L. (1995), New taxonomic combina..."
82,https://id.biodiversity.org.au/reference/aani/...,"ABRS (2022), Australian Algal Name Index Edn. ..."
92,https://id.biodiversity.org.au/reference/aani/...,"Entwisle, T.J. & Nairn, L. (2011), Census of F..."
97,https://id.biodiversity.org.au/reference/aani/...,"Smith, W. (1856), Syn. Brit. Diatomaceae 2"
155,https://id.biodiversity.org.au/reference/aani/...,"Algal Subcommittee (2022), CHAH Algal Subcommi..."


### Create CoLDP

In [12]:
import os
from zipfile import ZipFile

os.chdir('/home/niels/code/jupyter-notebooks/nsl_export/coldp/algae')

df_tax.to_csv('taxon.tsv', sep='\t', index=False)
df_syn.to_csv('synonym.tsv', sep='\t', index=False)
df_nam.to_csv('name.tsv', sep='\t', index=False)
df_name_relationships.to_csv('dwc_basionyms.tsv', sep='\t', index=False)
df_namerel.to_csv('namerelation.tsv', sep='\t', index=False)
df_higher.to_csv('dwc_higherclassification.tsv', sep='\t', index=False)
df_dwc_syn.to_csv('dwc_synonym.tsv', sep='\t', index=False)
df_ref.to_csv('reference.tsv', sep='\t', index='False')

with ZipFile('nsl_algae_coldp.zip', 'w') as zipobj:
    zipobj.write('taxon.tsv')
    zipobj.write('name.tsv')
    zipobj.write('synonym.tsv')
    zipobj.write('namerelation.tsv')
    zipobj.write('reference.tsv')

os.chdir('/home/niels/code/jupyter-notebooks/nsl_export')