# NSL export to CoLDP: bryophytes

In [22]:
import pandas as pd
import numpy as np

taxonfile = 'data/bryophytes/APC-taxon-2023-06-23-5923.csv'
namefile = 'data/bryophytes/AusMoss-names-2023-06-23-5914.csv'

### Names

In [23]:
df_names = pd.read_csv(namefile)

# Remove excess columns
columns = [
    'scientificNameID', 
    'nameAccordingToID', 
    'taxonRank', 
    'scientificName', 
    'genericName', 
    'specificEpithet', 
    'infraspecificEpithet', 
    'scientificNameAuthorship', 
    'nomenclaturalStatus',
    'originalNameUsageID', 
    'originalNameUsage'
]

df1 = df_names[columns]

# Instance ID is in nameAccordingToID for reasons unfathomable; rename to taxonID
df1 = df1.rename(columns={'nameAccordingToID': 'taxonID'})

# Filter for names with originalNameUsage
# Replace instance IDs in originalNameUsageID with scientificNameIDs; we will not need taxonID anymore after that
df2 = df1[~df1['originalNameUsageID'].isna()].merge(df1, how='left', left_on='originalNameUsageID', right_on='taxonID')[[
    'scientificNameID_x', 
    'taxonRank_x', 
    'scientificName_x', 
    'genericName_x', 
    'specificEpithet_x', 
    'infraspecificEpithet_x', 
    'scientificNameAuthorship_x', 
    'nomenclaturalStatus_x', 
    'scientificNameID_y', 
    'scientificName_y'
]]

column_mappings = {
    'scientificNameID_x': 'scientificNameID',
    'scientificName_x': 'scientificName',
    'taxonRank_x': 'taxonRank',
    'genericName_x': 'genericName',
    'specificEpithet_x': 'specificEpithet',
    'infraspecificEpithet_x': 'infraspecificEpithet',
    'scientificNameAuthorship_x': 'scientificNameAuthorship',
    'nomenclaturalStatus_x': 'nomenclaturalStatus',
    'scientificNameID_y': 'originalNameUsageID',
    'scientificName_y': 'originalNameUsage'
}

df2 = df2.rename(columns=column_mappings)

# Filter for names without originalNameUsage
df3 = df_names[df_names['originalNameUsage'].isna()][columns]
df3.drop(columns=['nameAccordingToID'], inplace=True)

# Merge dataframes
df5 = pd.concat([df2, df3])
df5 = df5.sort_values(by='scientificName')

# Make taxonRank lowercase
df5['taxonRank'] = df5['taxonRank'].str.lower()

# Map Latin rank names to less wankerish English ones
ranks = {
    'genus': 'genus',
    'species': 'species',
    'familia': 'family',
    'subfamilia': 'subfamily',
    'ordo': 'order',
    'subordo': 'suborder',
    'superordo': 'superorder',
    'subspecies': 'subspecies',
    'classis': 'class',
    'subclassis': 'subclass',
    'subdivision': 'subphylum',
    'subbdivision': 'subphylum',
    'varietas': 'variety',
    'subgenus': 'subgenus',
    'superspecies': 'superspecies',
    'forma': 'form',
    'division': 'phylum',
    'regnum': 'kingdom',
    'special form': 'special form',
    'sectio': 'section',
    'regio': 'domain',
    '[unknown]': '[unknown]'
}

taxon_ranks = []
uninomials = []
generic_names = []
for index, row in df5.iterrows():
    taxon_ranks.append(ranks[row['taxonRank']])

    uninomial = np.NaN
    generic_name = row['genericName']

    if not isinstance(row['genericName'], str):
        if not isinstance(row['scientificNameAuthorship'], str):
            uninomial = row['scientificName']
        else:
            uninomial = row['scientificName'][0:len(row['scientificName'])-len(row['scientificNameAuthorship'])-1]

    if row['taxonRank'] == 'genus':
        uninomial = row['genericName']
        generic_name = np.NaN

    uninomials.append(uninomial)
    generic_names.append(generic_name)

df5['taxonRank'] = taxon_ranks
df5['genericName'] = generic_names
df5['uninomial'] = uninomials

# Add nomenclaturalCode; 'ICN' in GBIF vocab. but 'botanical' in CoLDP
df5['code'] = 'botanical'

df5 = df5[[
    'scientificNameID', 
    'taxonRank', 
    'scientificName', 
    'uninomial',
    'genericName', 
    'specificEpithet', 
    'infraspecificEpithet', 
    'scientificNameAuthorship', 
    'code', 
    'nomenclaturalStatus', 
    'originalNameUsageID', 
    'originalNameUsage'
  ]]

df_nam = df5
df5

Unnamed: 0,scientificNameID,taxonRank,scientificName,uninomial,genericName,specificEpithet,infraspecificEpithet,scientificNameAuthorship,code,nomenclaturalStatus,originalNameUsageID,originalNameUsage
8124,https://id.biodiversity.org.au/name/ausmoss/10...,genus,Acanthocladium Mitt.,Acanthocladium,,,,Mitt.,botanical,nom. illeg.,,
2775,https://id.biodiversity.org.au/name/ausmoss/10...,species,Acanthocladium crinitum (Hook.f. & Wilson) Bro...,,Acanthocladium,crinitum,,(Hook.f. & Wilson) Broth. ex Paris,botanical,,https://id.biodiversity.org.au/name/ausmoss/10...,Hypnum crinitum Hook.f. & Wilson
7553,https://id.biodiversity.org.au/name/ausmoss/10...,species,Acanthocladium crossii Broth. & Geh. ex Broth.,,Acanthocladium,crossii,,Broth. & Geh. ex Broth.,botanical,,,
2776,https://id.biodiversity.org.au/name/ausmoss/10...,species,Acanthocladium extenuatum (Brid.) Mitt.,,Acanthocladium,extenuatum,,(Brid.) Mitt.,botanical,,https://id.biodiversity.org.au/name/ausmoss/10...,Hypnum extenuatum Brid.
7554,https://id.biodiversity.org.au/name/ausmoss/10...,form,Acanthocladium extenuatum f. flagellaris Broth...,,Acanthocladium,extenuatum,flagellaris,Broth. ex Watts,botanical,"nom. inval., nom. nud.",,
...,...,...,...,...,...,...,...,...,...,...,...,...
7803,https://id.biodiversity.org.au/name/ausmoss/10...,species,Zygodon remotidens Müll.Hal.,,Zygodon,remotidens,,Müll.Hal.,botanical,,,
6263,https://id.biodiversity.org.au/name/ausmoss/10...,species,Zygodon rodwayi Broth.,,Zygodon,rodwayi,,Broth.,botanical,,,
2510,https://id.biodiversity.org.au/name/ausmoss/10...,species,Zygodon scaber Müll.Hal. ex Geh.,,Zygodon,scaber,,Müll.Hal. ex Geh.,botanical,"nom. inval., nom. nud.",https://id.biodiversity.org.au/name/ausmoss/10...,Triquetrella scabra Müll.Hal.
2511,https://id.biodiversity.org.au/name/ausmoss/10...,species,Zygodon schwaegrichenii Müll.Hal.,,Zygodon,schwaegrichenii,,Müll.Hal.,botanical,nom. illeg.,https://id.biodiversity.org.au/name/ausmoss/10...,Codonoblepharon schwaegrichenii A.Jaeger


### Name relations

In [24]:
df_name_relationships = df5[~df5['originalNameUsageID'].isna()][['scientificNameID', 'scientificName', 'originalNameUsageID', 'originalNameUsage']]

df_namerel = df_name_relationships[['scientificNameID', 'originalNameUsageID']]
df_namerel['type'] = 'basionym'
df_namerel.rename(columns={'scientificNameID': 'nameID', 'originalNameUsageID': 'relatedNameID'}, inplace=True)

# Remove originalNameUsage columns from Names
drop_columns = [
    'originalNameUsageID',
    'originalNameUsage'
]

rename_columns = {
    'scientificNameID': 'ID',
    'taxonRank': 'rank',
    'scientificName': 'scientificName',
    'genericName': 'genus',
    'scientificNameAuthorship': 'authorship',
    'nomenclaturalStatus': 'status'
}

df5.drop(columns=drop_columns, inplace=True)
df5.rename(columns=rename_columns, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_namerel['type'] = 'basionym'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_namerel.rename(columns={'scientificNameID': 'nameID', 'originalNameUsageID': 'relatedNameID'}, inplace=True)


### Taxa

In [25]:
# Read Taxon export into dataframe
df_taxa = pd.read_csv(taxonfile)
df_taxa = df_taxa[df_taxa['nameType'] == 'scientific']

# Create dataframe with IDs
# This is used later to replace tree element IDs (in taxonID) with instance IDs (in taxonConceptID)
df_id = df_taxa[['taxonID', 'taxonConceptID']]

In [26]:
# Filter on accepted names; these are the taxa
df_tax = df_taxa[df_taxa['taxonomicStatus'] == 'accepted']

# Replace tree element IDs in taxonID with instance IDs (see above)
df_tax['taxonID'] = df_tax['taxonConceptID']

# Replace tree element IDs in parentNameUsageID with instance IDs
df_tax = df_tax.merge(df_id, how='left', left_on='parentNameUsageID', right_on='taxonID')
df_tax['parentNameUsageID'] = df_tax['taxonConceptID_y']
df_tax.drop(columns=['taxonID_y', 'taxonConceptID_y'], inplace=True)
df_tax.rename(columns={'taxonID_x': 'taxonID', 'taxonConceptID_x': 'taxonConceptID'}, inplace=True)

df_tax = df_tax[['taxonID',
 'scientificNameID',
 'scientificName',
 'scientificNameAuthorship',
 'nameAccordingTo',
 'nameAccordingToID',
 'parentNameUsageID',
 'taxonRank',
 'taxonRankSortOrder',
 'kingdom',
 'class',
 'subclass',
 'family',
 'taxonConceptID',
 'taxonRemarks',
 'higherClassification'
]]

# Translate ranks into English
df_tax['taxonRank'] = df_tax['taxonRank'].str.lower()
ranks = {
    'genus': 'genus',
    'species': 'species',
    'familia': 'family',
    'subfamilia': 'subfamily',
    'ordo': 'order',
    'subordo': 'suborder',
    'superordo': 'superorder',
    'subspecies': 'subspecies',
    'classis': 'class',
    'subclassis': 'subclass',
    'subdivision': 'subphylum',
    'subbdivision': 'subphylum',
    'varietas': 'variety',
    'subgenus': 'subgenus',
    'superspecies': 'superspecies',
    'forma': 'form',
    'division': 'phylum',
    'regnum': 'kingdom',
    'special form': 'special form',
    'sectio': 'section',
    'regio': 'domain',
    '[unknown]': '[unknown]'
}

taxon_ranks = []
for index, row in df_tax.iterrows():
    taxon_ranks.append(ranks[row['taxonRank']])

df_tax['taxonRank'] = taxon_ranks


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_tax['taxonID'] = df_tax['taxonConceptID']


#### Higher classification

In [27]:
# Create dictionary with higher taxa from instance ID
def get_higher_taxa(id, higher={}):
    parent = df_tax.loc[df_tax['taxonID'] == id][[
      'taxonRank', 
      'scientificName', 
      'scientificNameAuthorship', 
      'parentNameUsageID']].to_dict(orient='records')[0]
    
    if isinstance(parent['scientificNameAuthorship'], str):
        higher[parent['taxonRank']] = parent['scientificName'][0:len(parent['scientificName'])-len(parent['scientificNameAuthorship'])-1]
    else:
        higher[parent['taxonRank']] = parent['scientificName']

    if isinstance(parent['parentNameUsageID'], str):
        return get_higher_taxa(parent['parentNameUsageID'], higher)
    else:
        return higher

In [28]:
# Create higher classification
def create_higher_classification(id):
    higher = get_higher_taxa(id, {})
    keys = list(higher.keys())

    hcl = {}

    hcl['taxonID'] = id
    hcl['species'] = higher['species'] if 'species' in keys else np.NaN
    hcl['section'] = higher['section'] if 'section' in keys else np.NaN
    hcl['subgenus'] = higher['subgenus'] if 'subgenus' in keys else np.NaN
    hcl['genus'] = higher['genus'] if 'genus' in keys else np.NaN
    hcl['subtribe'] = higher['subtribe'] if 'subtribe' in keys else np.NaN
    hcl['tribe'] = higher['tribe'] if 'tribe' in keys else np.NaN
    hcl['subfamily'] = higher['subfamily'] if 'subfamily' in keys else np.NaN
    hcl['family'] = higher['family'] if 'family' in keys else np.NaN
    hcl['superfamily'] = higher['superfamily'] if 'superfamily' in keys else np.NaN
    hcl['suborder'] = higher['suborder'] if 'suborder' in keys else np.NaN
    hcl['order'] = higher['order'] if 'order' in keys else np.NaN
    hcl['subclass'] = higher['subclass'] if 'subclass' in keys else np.NaN
    hcl['class'] = higher['class'] if 'class' in keys else np.NaN
    hcl['subphylum'] = higher['subphylum'] if 'subphylum' in keys else np.NaN
    hcl['phylum'] = higher['phylum'] if 'phylum' in keys else np.NaN
    hcl['kingdom'] = higher['kingdom'] if 'kingdom' in keys else np.NaN

    cl = list(higher.values())
    for i in range(len(cl) // 2):
        cl[i], cl[-1 - i] = cl[-1 - i], cl[i]

    hcl['classification'] = ' | '.join(cl)

    return hcl


In [29]:
cl = []
for index, row in df_tax.iterrows():
    cl.append(create_higher_classification(row['taxonID']))

df_higher = pd.DataFrame.from_dict(cl)

df_higher = df_higher.merge(df_tax[['taxonID', 'scientificName', 'taxonRank']], how='left', left_on='taxonID', right_on='taxonID')

df_higher = df_higher[['taxonID',
 'scientificName',
 'taxonRank',
 'kingdom',
 'phylum',
 'subphylum',
 'class',
 'subclass',
 'order',
 'suborder',
 'superfamily',
 'family',
 'tribe',
 'subfamily',
 'subtribe',
 'genus',
 'subgenus',
 'section',
 'species',
 'classification']]

df_higher.rename(columns={'classification': 'higherClassification'}, inplace=True)

df_higher

Unnamed: 0,taxonID,scientificName,taxonRank,kingdom,phylum,subphylum,class,subclass,order,suborder,superfamily,family,tribe,subfamily,subtribe,genus,subgenus,section,species,higherClassification
0,https://id.biodiversity.org.au/instance/ausmos...,Plantae Haeckel,kingdom,Plantae,,,,,,,,,,,,,,,,Plantae
1,https://id.biodiversity.org.au/instance/ausmos...,Anthocerotophyta Rothm. ex Stotler & Crand.-St...,phylum,Plantae,Anthocerotophyta,,,,,,,,,,,,,,,Plantae | Anthocerotophyta
2,https://id.biodiversity.org.au/instance/ausmos...,Anthocerotopsida de Bary ex Jancz.,class,Plantae,Anthocerotophyta,,Anthocerotopsida,,,,,,,,,,,,,Plantae | Anthocerotophyta | Anthocerotopsida
3,https://id.biodiversity.org.au/instance/ausmos...,Anthocerotidae Rosenv.,subclass,Plantae,Anthocerotophyta,,Anthocerotopsida,Anthocerotidae,,,,,,,,,,,,Plantae | Anthocerotophyta | Anthocerotopsida ...
4,https://id.biodiversity.org.au/instance/ausmos...,Anthocerotales Limpr.,order,Plantae,Anthocerotophyta,,Anthocerotopsida,Anthocerotidae,Anthocerotales,,,,,,,,,,,Plantae | Anthocerotophyta | Anthocerotopsida ...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2509,https://id.biodiversity.org.au/instance/ausmos...,Riella halophila Banwell,species,Plantae,Marchantiophyta,,Marchantiopsida,Marchantiidae,Sphaerocarpales,,,Riellaceae,,,,Riella,,,Riella halophila,Plantae | Marchantiophyta | Marchantiopsida | ...
2510,https://id.biodiversity.org.au/instance/ausmos...,Riella spiculata J.Taylor,species,Plantae,Marchantiophyta,,Marchantiopsida,Marchantiidae,Sphaerocarpales,,,Riellaceae,,,,Riella,,,Riella spiculata,Plantae | Marchantiophyta | Marchantiopsida | ...
2511,https://id.biodiversity.org.au/instance/ausmos...,Sphaerocarpaceae Heeg,family,Plantae,Marchantiophyta,,Marchantiopsida,Marchantiidae,Sphaerocarpales,,,Sphaerocarpaceae,,,,,,,,Plantae | Marchantiophyta | Marchantiopsida | ...
2512,https://id.biodiversity.org.au/instance/ausmos...,Sphaerocarpos Boehm.,genus,Plantae,Marchantiophyta,,Marchantiopsida,Marchantiidae,Sphaerocarpales,,,Sphaerocarpaceae,,,,Sphaerocarpos,,,,Plantae | Marchantiophyta | Marchantiopsida | ...


#### Synonyms

In [30]:
# Filter on synonyms
df_syn = df_taxa[df_taxa['taxonomicStatus'].isin(['synonym', 'nomenclatural synonym', 'taxonomic synonym']) & ~df_taxa['acceptedNameUsageID'].isna()][[
    'taxonID',
    'scientificNameID',
    'scientificName',
    'acceptedNameUsageID',
    'acceptedNameUsage',
    'taxonomicStatus'
]]

# Replace tree element IDs in acceptedNameUsageID with instance IDs
df_syn = df_syn.merge(df_id, how='left', left_on='acceptedNameUsageID', right_on='taxonID')

df_syn.drop(columns=['acceptedNameUsageID', 'taxonID_y'], inplace=True)
df_syn.rename(columns={
    'taxonID_x': 'ID',
    'taxonConceptID': 'taxonID',
    'scientificNameID': 'nameID'    
}, inplace=True)
df_syn = df_syn[['ID', 'taxonID', 'nameID', 'scientificName', 'acceptedNameUsage', 'taxonomicStatus']]

# Get accepted names
df_dwc_syn = df_syn.merge(df_tax[['taxonID', 'scientificName']], left_on='taxonID', right_on='taxonID')

df_dwc_syn.drop(columns=['taxonID', 'nameID', 'acceptedNameUsage'], inplace=True)

df_dwc_syn.rename(columns={
    'ID': 'taxonID',
    'scientificName_x': 'scientificName',
    'scientificName_y': 'acceptedNameUsage'
}, inplace=True)

df_syn.drop(columns=['scientificName', 'acceptedNameUsage', 'taxonomicStatus'], inplace=True)

df_syn

Unnamed: 0,ID,taxonID,nameID
0,https://id.biodiversity.org.au/instance/apni/9...,https://id.biodiversity.org.au/instance/ausmos...,https://id.biodiversity.org.au/name/ausmoss/20...
1,https://id.biodiversity.org.au/instance/apni/8...,https://id.biodiversity.org.au/instance/ausmos...,https://id.biodiversity.org.au/name/ausmoss/12...
2,https://id.biodiversity.org.au/instance/apni/8...,https://id.biodiversity.org.au/instance/ausmos...,https://id.biodiversity.org.au/name/ausmoss/20...
3,https://id.biodiversity.org.au/instance/apni/8...,https://id.biodiversity.org.au/instance/ausmos...,https://id.biodiversity.org.au/name/ausmoss/21...
4,https://id.biodiversity.org.au/instance/apni/8...,https://id.biodiversity.org.au/instance/ausmos...,https://id.biodiversity.org.au/name/ausmoss/17...
...,...,...,...
2920,https://id.biodiversity.org.au/instance/apni/9...,,https://id.biodiversity.org.au/name/ausmoss/21...
2921,https://id.biodiversity.org.au/instance/apni/8...,https://id.biodiversity.org.au/instance/ausmos...,https://id.biodiversity.org.au/name/ausmoss/21...
2922,https://id.biodiversity.org.au/instance/apni/8...,https://id.biodiversity.org.au/instance/ausmos...,https://id.biodiversity.org.au/name/ausmoss/21...
2923,https://id.biodiversity.org.au/instance/apni/8...,https://id.biodiversity.org.au/instance/ausmos...,https://id.biodiversity.org.au/name/ausmoss/15...


#### Remove excess columns

In [31]:
df_tax = df_tax[['taxonID',
 'scientificNameID',
 'nameAccordingToID',
 'parentNameUsageID',
 'taxonRemarks']]

df_tax.rename(columns={
    'taxonID': 'ID',
    'scientificNameID': 'nameID',
    'nameAccordingToID': 'accordingToID',
    'parentNameUsageID': 'parentID',
    'taxonRemarks': 'remarks'
}, inplace=True)

df_tax

Unnamed: 0,ID,nameID,accordingToID,parentID,remarks
0,https://id.biodiversity.org.au/instance/ausmos...,https://id.biodiversity.org.au/name/ausmoss/10...,https://id.biodiversity.org.au/reference/ausmo...,,
1,https://id.biodiversity.org.au/instance/ausmos...,https://id.biodiversity.org.au/name/ausmoss/23...,https://id.biodiversity.org.au/reference/ausmo...,https://id.biodiversity.org.au/instance/ausmos...,
2,https://id.biodiversity.org.au/instance/ausmos...,https://id.biodiversity.org.au/name/ausmoss/23...,https://id.biodiversity.org.au/reference/ausmo...,https://id.biodiversity.org.au/instance/ausmos...,
3,https://id.biodiversity.org.au/instance/ausmos...,https://id.biodiversity.org.au/name/ausmoss/24...,https://id.biodiversity.org.au/reference/ausmo...,https://id.biodiversity.org.au/instance/ausmos...,
4,https://id.biodiversity.org.au/instance/ausmos...,https://id.biodiversity.org.au/name/ausmoss/14...,https://id.biodiversity.org.au/reference/ausmo...,https://id.biodiversity.org.au/instance/ausmos...,
...,...,...,...,...,...
2509,https://id.biodiversity.org.au/instance/ausmos...,https://id.biodiversity.org.au/name/ausmoss/21...,https://id.biodiversity.org.au/reference/ausmo...,https://id.biodiversity.org.au/instance/ausmos...,
2510,https://id.biodiversity.org.au/instance/ausmos...,https://id.biodiversity.org.au/name/ausmoss/21...,https://id.biodiversity.org.au/reference/ausmo...,https://id.biodiversity.org.au/instance/ausmos...,
2511,https://id.biodiversity.org.au/instance/ausmos...,https://id.biodiversity.org.au/name/ausmoss/14...,https://id.biodiversity.org.au/reference/ausmo...,https://id.biodiversity.org.au/instance/ausmos...,
2512,https://id.biodiversity.org.au/instance/ausmos...,https://id.biodiversity.org.au/name/ausmoss/13...,https://id.biodiversity.org.au/reference/ausmo...,https://id.biodiversity.org.au/instance/ausmos...,


### Reference

In [32]:
df_ref = df_taxa[['nameAccordingToID', 'nameAccordingTo']]
df_ref.drop_duplicates(inplace=True)
df_ref.rename(columns={'nameAccordingToID': 'ID', 'nameAccordingTo': 'citation'}, inplace=True)
df_ref

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_ref.drop_duplicates(inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_ref.rename(columns={'nameAccordingToID': 'ID', 'nameAccordingTo': 'citation'}, inplace=True)


Unnamed: 0,ID,citation
0,https://id.biodiversity.org.au/reference/ausmo...,"Klazenga, N. (2015), AusMoss: Catalogue of Aus..."
1,https://id.biodiversity.org.au/reference/ausmo...,"Renzaglia, K.S., Villarreal, J.C. & Duff, R.J...."
5,https://id.biodiversity.org.au/reference/ausmo...,"CHAH (2011), Australian Plant Census"
6,https://id.biodiversity.org.au/reference/ausmo...,"CHAH (2010), Australian Plant Census"
7,https://id.biodiversity.org.au/reference/ausmo...,"McCarthy, P.M. (2003), Catalogue of Australian..."
...,...,...
5348,https://id.biodiversity.org.au/reference/ausmo...,"Hewson, H.J. (1970), The family Aneuraceae in ..."
5414,https://id.biodiversity.org.au/reference/ausmo...,"Schiffner, V. (1893), Nova Acta Academiae Caes..."
5672,https://id.biodiversity.org.au/reference/ausmo...,"Linnaeus, C. (1753), Species Plantarum 2"
5713,https://id.biodiversity.org.au/reference/ausmo...,"Muller, K. (1941), Hedwigia 80"


### Create CoLDP

In [33]:
import os
from zipfile import ZipFile

os.chdir('/home/niels/code/jupyter-notebooks/nsl_export/coldp/bryophytes')

df_tax.to_csv('taxon.tsv', sep='\t', index=False)
df_syn.to_csv('synonym.tsv', sep='\t', index=False)
df_nam.to_csv('name.tsv', sep='\t', index=False)
df_name_relationships.to_csv('dwc_basionyms.tsv', sep='\t', index=False)
df_namerel.to_csv('namerelation.tsv', sep='\t', index=False)
df_higher.to_csv('dwc_higherclassification.tsv', sep='\t', index=False)
df_dwc_syn.to_csv('dwc_synonym.tsv', sep='\t', index=False)
df_ref.to_csv('reference.tsv', sep='\t', index='False')

with ZipFile('nsl_bryophytes_coldp.zip', 'w') as zipobj:
    zipobj.write('taxon.tsv')
    zipobj.write('name.tsv')
    zipobj.write('synonym.tsv')
    zipobj.write('namerelation.tsv')
    zipobj.write('reference.tsv')

os.chdir('/home/niels/code/jupyter-notebooks/nsl_export')

