In [2]:
import rpy2.robjects as ro
from rpy2.robjects.packages import importr
from rpy2.robjects import pandas2ri
pandas2ri.activate()

import rpy2.robjects.packages as rpackages
from rpy2.robjects.vectors import StrVector

import pandas as pd
import numpy as np
import re
from unidecode import unidecode

  from pandas.core.index import Index as PandasIndex


### installing and importing relevant R packages

In [3]:
# import R's utility package
utils = rpackages.importr('utils')

# select a mirror for R packages
utils.chooseCRANmirror(ind=1) # select the first mirror in the list

# R packages to download:
packnames = ('data.table', 'utils', 'stringr', 'dplyr', 'WorldFlora')
names_to_install = [x for x in packnames if not rpackages.isinstalled(x)]
if len(names_to_install) > 0:
    utils.install_packages(StrVector(names_to_install))

In [4]:
WorldFlora = importr("WorldFlora")

### importing inputs and making modifications

#### mouterde

In [5]:
vol1_path = '../output/local/index_output/vol1_nonitalics.csv'
vol2_path = '../output/local/index_output/vol2_nonitalics.csv'
vol3_path = '../output/local/index_output/vol3_nonitalics.csv'

vol1_df = pd.read_csv(vol1_path)
vol2_df = pd.read_csv(vol2_path)
vol3_df = pd.read_csv(vol3_path)

#changing name of columns of mout. indecies 
vol1_df.rename(columns={'closest_genus': 'mouterde_genus', 'closest_epithet': 'mouterde_epithet', 'authors':'mouterde_author', 'closest_infra_name':'mouterde_infra'}, inplace=True)
vol2_df.rename(columns={'closest_genus': 'mouterde_genus', 'closest_epithet': 'mouterde_epithet', 'authors':'mouterde_author', 'closest_infra_name':'mouterde_infra'}, inplace=True)
vol3_df.rename(columns={'closest_genus': 'mouterde_genus', 'closest_epithet': 'mouterde_epithet', 'authors':'mouterde_author', 'closest_infra_name':'mouterde_infra'}, inplace=True)


In [6]:
vol1_df.replace(np.NaN, "", inplace=True)
vol2_df.replace(np.NaN, "", inplace=True)
vol3_df.replace(np.NaN, "", inplace=True)

In [7]:
def get_taxon_name(row):
    name = ""
    mouterde_genus = row['mouterde_genus']
    mouterde_epithet = row['mouterde_epithet']
    mouterde_infra = row['mouterde_infra']
    if mouterde_genus != "":
        name += f"{mouterde_genus}"
    if mouterde_epithet != "":
        name += f" {mouterde_epithet}"
    if mouterde_infra != "":
        name += f" {mouterde_infra}"
    return name

In [8]:
vol1_df['mouterde_name'] = vol1_df.apply(get_taxon_name,axis=1).apply(unidecode)
vol2_df['mouterde_name'] = vol2_df.apply(get_taxon_name,axis=1).apply(unidecode)
vol3_df['mouterde_name'] = vol3_df.apply(get_taxon_name,axis=1).apply(unidecode)

In [9]:
vol1_df['mouterde_author'] = vol1_df['mouterde_author'].apply(unidecode)
vol2_df['mouterde_author'] = vol2_df['mouterde_author'].apply(unidecode)
vol3_df['mouterde_author'] = vol3_df['mouterde_author'].apply(unidecode)

#### wcvp

In [10]:
text_wcvp_path = '../input/input copy/wcvp/wcvp_v9_jun_2022.txt'
wcvp_df = pd.read_csv(text_wcvp_path, sep = '|')

In [11]:
wcvp_df['taxon_name'] = wcvp_df['taxon_name'].str.replace(" × ", " ×").apply(unidecode)
wcvp_df.replace(np.NaN, "", inplace=True)

In [12]:
WCVP_data = WorldFlora.new_backbone(wcvp_df, 
                          taxonID="kew_id",
                          scientificName="taxon_name",
                          scientificNameAuthorship="authors",
                          acceptedNameUsageID = "accepted_kew_id",
                          taxonomicStatus = "taxonomic_status")

  for name, values in obj.iteritems():


In [40]:
keep = WCVP_data[WCVP_data['rank'] == 'GENUS']['scientificName'].str.contains(' ') == True
WCVP_data[(WCVP_data['rank'] == 'GENUS') & (WCVP_data['scientificName'].str.contains(' ')) & (~(WCVP_data['scientificName'].str.contains('x')))]['rank']

321697     GENUS
321698     GENUS
321699     GENUS
321700     GENUS
321701     GENUS
321702     GENUS
333004     GENUS
333005     GENUS
333095     GENUS
335774     GENUS
336319     GENUS
337090     GENUS
337927     GENUS
338515     GENUS
341255     GENUS
341288     GENUS
345186     GENUS
345206     GENUS
347950     GENUS
351801     GENUS
551412     GENUS
1076298    GENUS
1078493    GENUS
1083989    GENUS
1083990    GENUS
1088421    GENUS
1088422    GENUS
1088424    GENUS
Name: rank, dtype: object

### running WorldFlor matching for genera

In [None]:
vol1_genus = vol1_genus = vol1_df[vol1_df['taxon_rank'] == 'genus']

In [None]:

vol1_genus_match_db = WorldFlora.WFO_one(WorldFlora.WFO_match(vol1_genus, 
                                         WFO_data = WCVP_genus_data, 
                                         spec_name = "mouterde_name"))