## pymupdf PDF miner script 
### Extact lines of text from pdf file by page number, convert it into a format for taxonomy update and merge mycobank data. Output saved to excel

In [None]:
pip install pymupdf

In [1]:
import pymupdf
import pandas as pd
import re
import numpy as np

doc = pymupdf.open("2024__The2024OutlineofFungiandfungus-liketaxa.pdf") 
#out = open("output.txt", "wb") # create a text output

def extract_text_from_page_span(doc, start_page, end_page):
    text = ""
    for page in doc.pages(start_page, end_page, 1): 
        #text = page.get_text().encode("utf8") # get plain text (is in UTF-8)
        text += page.get_text(sort=True) #preserves the ws at the start of the line
        #text += page.get_text()  #collapses ws at the start of the line
    return text

def join_indented_lines(text):
    lines = text.splitlines()
    result = []
    previous_line = None

    for line in lines:
        #if line.startswith((' ', '\t')):  # Check for indentation (spaces or tabs)
        if line.startswith(' '):  # Check for indentation (spaces or tabs)
            if previous_line is not None:
                result[-1] = result[-1] + ' ' + line.lstrip()  # Append to previous
            else:
                result.append(line.lstrip()) #if the first line is indented, append it directly
        else:
            result.append(line)
            previous_line = line

    return '\n'.join(result)


start_page = 44 #40
end_page = 332
extracted_text = extract_text_from_page_span(doc, start_page, end_page)
#extracted_text = re.sub(r",\s*\n", " ", extracted_text)  #finds lines ending in a comma and appends them to the previous line
text_lines = extracted_text.splitlines()
result_text = join_indented_lines(extracted_text)
#print(result_text)
revised_text= (re.sub(r"\s\d{4}", "", result_text)) 
#print(revised_text)

### Load text to pandas

In [2]:
pd.set_option('display.width', 2000) 
pd.set_option('display.max_colwidth', 300)
pd.set_option('display.max_columns', None)

text_lines = revised_text.splitlines()

def create_dataframe(text_lines):
    df = pd.DataFrame(text_lines, columns=['Text'])
    df['Text'] = df['Text'].astype(str)
    return df

df = create_dataframe(text_lines)
df.replace('', np.nan, inplace=True)
df.dropna(inplace=True)
#df = df.iloc[6:]
df.head(10)

Unnamed: 0,Text
0,"Johansoniaceae Doilom, Phookamsak & K.D. Hyde*FoF04619"
1,Johansonia Sacc. (13)*FoF07635
2,Orthobellus A.A. Silva & Cavalc. (3)*FoF01955
4,"Neoantennariellaceae Abdollahz. & Crous*Notes 554, 555, FoF12736"
5,"Cippumomyces Crous, Overton & Ricci (2)*Note 759, FoF15609"
6,"Fumiglobus D.R. Reynolds & G.S. Gilbert (9)*Note 555, FoF06947"
7,"Neoantennariella Abdollahz. & Crous (1)*Note 554, FoF11064"
8,"Neoasbolisia Abdollahz. & Crous (1)*Note 555, FoF11066"
10,"Piedraiaceae Viégas ex Cif., Bat. & S. Camposa*FoF06972"
11,Piedraia Fonseca. & Leãno (3)*FoF06972


#### Modify the extracted text now in pandas df

#### Extract rank from text

In [3]:
#df['OrgName'] = np.nan
#df['Authority'] = np.nan

def add_column_if_list_member_found(df, column_to_check, list_to_search, new_column_name):
    for index, row in df.iterrows():
        for item in list_to_search:
            if item in row[column_to_check]:
                df.loc[index, new_column_name] = item
                df.loc[index, column_to_check] = row[column_to_check].replace(item, '').strip()
                break # Stop searching after the first match                
    return df
search_rank = ['Phylum', 'Class', 'Subphylum']

df = add_column_if_list_member_found(df, 'Text', search_rank, 'Rank')
df.head(10)

Unnamed: 0,Text,Rank
0,"Johansoniaceae Doilom, Phookamsak & K.D. Hyde*FoF04619",
1,Johansonia Sacc. (13)*FoF07635,
2,Orthobellus A.A. Silva & Cavalc. (3)*FoF01955,
4,"Neoantennariellaceae Abdollahz. & Crous*Notes 554, 555, FoF12736",
5,"Cippumomyces Crous, Overton & Ricci (2)*Note 759, FoF15609",
6,"Fumiglobus D.R. Reynolds & G.S. Gilbert (9)*Note 555, FoF06947",
7,"Neoantennariella Abdollahz. & Crous (1)*Note 554, FoF11064",
8,"Neoasbolisia Abdollahz. & Crous (1)*Note 555, FoF11066",
10,"Piedraiaceae Viégas ex Cif., Bat. & S. Camposa*FoF06972",
11,Piedraia Fonseca. & Leãno (3)*FoF06972,


In [None]:
#print(df.dtypes)

## Find and populate OrgName and Authority column
#### If Text contains 'incertae sedis' split text so that 'incertae sedis' and all text preceding it go in OrgName. Everything following 'incertae sedis' then goes into Authority
#### Else if Text does not contain 'incertae sedis' split Text at the first space so that the first word goes in OrgName and everything else goes in Authority


In [4]:
additional_cols = ['OrgName','Authority']
df2 = df.reindex(df.columns.tolist() + additional_cols, axis = 1)
df2.head(10)

Unnamed: 0,Text,Rank,OrgName,Authority
0,"Johansoniaceae Doilom, Phookamsak & K.D. Hyde*FoF04619",,,
1,Johansonia Sacc. (13)*FoF07635,,,
2,Orthobellus A.A. Silva & Cavalc. (3)*FoF01955,,,
4,"Neoantennariellaceae Abdollahz. & Crous*Notes 554, 555, FoF12736",,,
5,"Cippumomyces Crous, Overton & Ricci (2)*Note 759, FoF15609",,,
6,"Fumiglobus D.R. Reynolds & G.S. Gilbert (9)*Note 555, FoF06947",,,
7,"Neoantennariella Abdollahz. & Crous (1)*Note 554, FoF11064",,,
8,"Neoasbolisia Abdollahz. & Crous (1)*Note 555, FoF11066",,,
10,"Piedraiaceae Viégas ex Cif., Bat. & S. Camposa*FoF06972",,,
11,Piedraia Fonseca. & Leãno (3)*FoF06972,,,


In [5]:
# Use loc to conditionally split the column

pattern2 = r"(.+?incertae sedis)"
condition = df2['Text'].str.contains('incertae sedis')

for index, row in df2.iterrows():
    if isinstance(row['Text'], str) and 'incertae sedis' in row['Text']:
        #print(f"Row {index}: incertae sedis found {row['Text']}")
        df2['OrgName'] = df2['Text'].str.extract(pattern2, expand=False)
        df2['Authority'] = df2['Text'].str.extract(r'' + re.escape('incertae sedis') + r'(.*)', expand=False)
        #df2[new_column_name] = df2['OrgName'].apply(lambda x: subphylum_apply if search_subphylum in str(x) else '')

    elif isinstance(row['Text'], str) and 'incertae sedis' not in row['Text']:
        df2.loc[~condition, 'OrgName'] = df2.loc[~condition, 'Text'].str.split(' ', n=1, expand=True)[0]
        df2.loc[~condition, 'Authority'] = df2.loc[~condition, 'Text'].str.split(' ', n=1, expand=True)[1]
        
df2.head(25)

 'Notes']' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.
  df2.loc[~condition, 'OrgName'] = df2.loc[~condition, 'Text'].str.split(' ', n=1, expand=True)[0]
 'A.A. Silva & Cavalc. (3)*FoF01955' ...
 'Tedersoo, nom. inval. (1)*Tedersoo et al.'
 'Tedersoo, nom. inval. (1)*Tedersoo et al.'
 'on new genera and higher taxa']' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.
  df2.loc[~condition, 'Authority'] = df2.loc[~condition, 'Text'].str.split(' ', n=1, expand=True)[1]


Unnamed: 0,Text,Rank,OrgName,Authority
0,"Johansoniaceae Doilom, Phookamsak & K.D. Hyde*FoF04619",,Johansoniaceae,"Doilom, Phookamsak & K.D. Hyde*FoF04619"
1,Johansonia Sacc. (13)*FoF07635,,Johansonia,Sacc. (13)*FoF07635
2,Orthobellus A.A. Silva & Cavalc. (3)*FoF01955,,Orthobellus,A.A. Silva & Cavalc. (3)*FoF01955
4,"Neoantennariellaceae Abdollahz. & Crous*Notes 554, 555, FoF12736",,Neoantennariellaceae,"Abdollahz. & Crous*Notes 554, 555, FoF12736"
5,"Cippumomyces Crous, Overton & Ricci (2)*Note 759, FoF15609",,Cippumomyces,"Crous, Overton & Ricci (2)*Note 759, FoF15609"
6,"Fumiglobus D.R. Reynolds & G.S. Gilbert (9)*Note 555, FoF06947",,Fumiglobus,"D.R. Reynolds & G.S. Gilbert (9)*Note 555, FoF06947"
7,"Neoantennariella Abdollahz. & Crous (1)*Note 554, FoF11064",,Neoantennariella,"Abdollahz. & Crous (1)*Note 554, FoF11064"
8,"Neoasbolisia Abdollahz. & Crous (1)*Note 555, FoF11066",,Neoasbolisia,"Abdollahz. & Crous (1)*Note 555, FoF11066"
10,"Piedraiaceae Viégas ex Cif., Bat. & S. Camposa*FoF06972",,Piedraiaceae,"Viégas ex Cif., Bat. & S. Camposa*FoF06972"
11,Piedraia Fonseca. & Leãno (3)*FoF06972,,Piedraia,Fonseca. & Leãno (3)*FoF06972


### Complete populating rank based on text patterns found in Organism Name -mycotina=subphylum, -mycetes=class, -ales=order, -aceae=family

In [6]:
def assign_category(row, search_terms, category_mapping, target_column, new_column_name):
    """
    Assigns a category to a row based on matching text in a specified column.

    Args:
        row (pd.Series): A row of the DataFrame.
        search_terms (dict): Dictionary of categories and their corresponding search terms (regex patterns).
        category_mapping (dict): Dictionary mapping categories to desired output strings.
	    target_column (str): Name of the column to search within.
        new_column_name (str): Name of the new column to create.

    Returns:
        str: The assigned category string, or None if no match is found.
    """
    text = row[target_column]
    for category, patterns in search_terms.items():
        for pattern in patterns:
            if re.search(pattern, text, re.IGNORECASE):
                return category_mapping[category]
    return None

def categorize_dataframe(df, search_terms, category_mapping, target_column, new_column_name):
     """
    Applies the assign_category function to each row of the DataFrame.

    Args:
        df (pd.DataFrame): The input DataFrame.
        search_terms (dict): Dictionary of categories and their corresponding search terms.
        category_mapping (dict): Dictionary mapping categories to desired output strings.
        target_column (str): Name of the column to search within.
        new_column_name (str): Name of the new column to create.

    Returns:
        pd.DataFrame: The DataFrame with the new category column.
    """
     df[new_column_name] = df.apply(assign_category, axis=1, search_terms=search_terms, category_mapping = category_mapping, target_column = target_column, new_column_name = new_column_name)
     return df

search_subphylum = 'mycotina'
search_class = 'mycetes'
search_order = 'ales'
search_family = 'aceae'

search_terms = {
    'subphylum': [r'mycotina'],
    'class': [r'mycetes'],
    'order': [r'ales'],
    'family': [r'aceae']
}

category_mapping = {
    'subphylum': 'subphylum',
    'class': 'class',
    'order': 'order',
    'family': 'family'
}

target_column = 'OrgName'
new_column_name = 'Rank2'

df3 = categorize_dataframe(df2, search_terms, category_mapping, target_column, new_column_name)
print(df3)

                                                                   Text Rank               OrgName                                      Authority   Rank2
0                Johansoniaceae Doilom, Phookamsak & K.D. Hyde*FoF04619  NaN        Johansoniaceae        Doilom, Phookamsak & K.D. Hyde*FoF04619  family
1                                        Johansonia Sacc. (13)*FoF07635  NaN            Johansonia                            Sacc. (13)*FoF07635    None
2                         Orthobellus A.A. Silva & Cavalc. (3)*FoF01955  NaN           Orthobellus              A.A. Silva & Cavalc. (3)*FoF01955    None
4      Neoantennariellaceae Abdollahz. & Crous*Notes 554, 555, FoF12736  NaN  Neoantennariellaceae    Abdollahz. & Crous*Notes 554, 555, FoF12736  family
5            Cippumomyces Crous, Overton & Ricci (2)*Note 759, FoF15609  NaN          Cippumomyces  Crous, Overton & Ricci (2)*Note 759, FoF15609    None
...                                                                 ...  ...

In [7]:
df3['merged_col'] = df3['Rank'].combine_first(df2['Rank2'])
df3 = df3.drop(['Rank', 'Rank2'], axis=1)
df3 = df3.rename(columns={'merged_col': 'Rank'})
df3

Unnamed: 0,Text,OrgName,Authority,Rank
0,"Johansoniaceae Doilom, Phookamsak & K.D. Hyde*FoF04619",Johansoniaceae,"Doilom, Phookamsak & K.D. Hyde*FoF04619",family
1,Johansonia Sacc. (13)*FoF07635,Johansonia,Sacc. (13)*FoF07635,
2,Orthobellus A.A. Silva & Cavalc. (3)*FoF01955,Orthobellus,A.A. Silva & Cavalc. (3)*FoF01955,
4,"Neoantennariellaceae Abdollahz. & Crous*Notes 554, 555, FoF12736",Neoantennariellaceae,"Abdollahz. & Crous*Notes 554, 555, FoF12736",family
5,"Cippumomyces Crous, Overton & Ricci (2)*Note 759, FoF15609",Cippumomyces,"Crous, Overton & Ricci (2)*Note 759, FoF15609",
...,...,...,...,...
14938,"Riederberga Tedersoo, nom. inval. (1)*Tedersoo et al.",Riederberga,"Tedersoo, nom. inval. (1)*Tedersoo et al.",
14939,"Ruua Tedersoo, nom. inval. (1)*Tedersoo et al.",Ruua,"Tedersoo, nom. inval. (1)*Tedersoo et al.",
14940,"Tammsaarea Tedersoo, nom. inval. (1)*Tedersoo et al.",Tammsaarea,"Tedersoo, nom. inval. (1)*Tedersoo et al.",
14941,"Unemaeea Tedersoo, nom. inval. (1)*Tedersoo et al.",Unemaeea,"Tedersoo, nom. inval. (1)*Tedersoo et al.",


### Splits the Authority column into multiple columns if = is found denoting synonyms

In [8]:
def split_column(df, column_name):
    """
    Splits a DataFrame column into multiple columns based on '=' delimiter.

    Args:
        df (pd.DataFrame): The input DataFrame.
        column_name (str): The name of the column to split.

    Returns:
        pd.DataFrame: The DataFrame with the split columns.
    """
    split_series = df[column_name].str.split(r'\(=', expand=True)
    num_cols = split_series.shape[1]
    new_col_names = [f'{column_name}_{i+1}' for i in range(num_cols)]
    split_series.columns = new_col_names
    df = pd.concat([df, split_series], axis=1)
    df = df.drop(column_name, axis=1)
    return df

# Example Usage
df3 = split_column(df3, 'Authority')
df3 = df3.rename(columns={'Authority_2': 'synonym'})
df3.head(25)

Unnamed: 0,Text,OrgName,Rank,Authority_1,synonym
0,"Johansoniaceae Doilom, Phookamsak & K.D. Hyde*FoF04619",Johansoniaceae,family,"Doilom, Phookamsak & K.D. Hyde*FoF04619",
1,Johansonia Sacc. (13)*FoF07635,Johansonia,,Sacc. (13)*FoF07635,
2,Orthobellus A.A. Silva & Cavalc. (3)*FoF01955,Orthobellus,,A.A. Silva & Cavalc. (3)*FoF01955,
4,"Neoantennariellaceae Abdollahz. & Crous*Notes 554, 555, FoF12736",Neoantennariellaceae,family,"Abdollahz. & Crous*Notes 554, 555, FoF12736",
5,"Cippumomyces Crous, Overton & Ricci (2)*Note 759, FoF15609",Cippumomyces,,"Crous, Overton & Ricci (2)*Note 759, FoF15609",
6,"Fumiglobus D.R. Reynolds & G.S. Gilbert (9)*Note 555, FoF06947",Fumiglobus,,"D.R. Reynolds & G.S. Gilbert (9)*Note 555, FoF06947",
7,"Neoantennariella Abdollahz. & Crous (1)*Note 554, FoF11064",Neoantennariella,,"Abdollahz. & Crous (1)*Note 554, FoF11064",
8,"Neoasbolisia Abdollahz. & Crous (1)*Note 555, FoF11066",Neoasbolisia,,"Abdollahz. & Crous (1)*Note 555, FoF11066",
10,"Piedraiaceae Viégas ex Cif., Bat. & S. Camposa*FoF06972",Piedraiaceae,family,"Viégas ex Cif., Bat. & S. Camposa*FoF06972",
11,Piedraia Fonseca. & Leãno (3)*FoF06972,Piedraia,,Fonseca. & Leãno (3)*FoF06972,


### Merge in Mycobank data from RESP API. Incorporates previously captured data from API and file downloads with the parsed data from the pdf.

In [10]:
MBList = (r'S:\Taxonomy-Maintenance\MBList2025.xlsx')
MBist_df = pd.read_excel(MBList, index_col=None)
MBList_df = MBist_df.rename(columns={"ID": "id", "Taxon name": "name"}, inplace=True)
MBist_df.set_index('id', inplace=True)
#MBist_df

Unnamed: 0_level_0,name,Authors,Rank.Rank name,Year of effective publication,Name status,MycoBank #,Hyperlink,Classification,Current name.Taxon name,Synonymy
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
541217,Aabaarnia,Diederich,gen.,2014,Legitimate,810083,https://www.mycobank.org/page/Name details page/541217,"Fungi, Dikarya, Ascomycota, Pezizomycotina, Lecanoromycetes, Ostropomycetidae, Ostropales",-,"Current name: Aabaarnia Diederich, Bull. Soc. Naturalistes Luxemb. 115: 144 (2014) [MB#810083]"
541218,Aabaarnia siphulicola,Diederich,sp.,2014,Legitimate,810084,https://www.mycobank.org/page/Name details page/541218,"Fungi, Dikarya, Ascomycota, Pezizomycotina, Lecanoromycetes, Ostropomycetidae, Ostropales, Aabaarnia",Aabaarnia siphulicola,"Current name: Aabaarnia siphulicola Diederich, Bulletin de la Société des Naturalistes Luxembourgeois 115: 144 (2014) [MB#810084]"
56001,Aaosphaeria,Aptroot,gen.,1995,Legitimate,6184,https://www.mycobank.org/page/Name details page/56001,"Fungi, Dikarya, Ascomycota, Pezizomycotina, Dothideomycetes, Pleosporomycetidae, Pleosporales",Aaosphaeria,"Current name: Aaosphaeria Aptroot, Nova Hedwigia 60 (3-4): 329 (1995) [MB#6184]"
47088,Aaosphaeria arxii,(Aa) Aptroot,sp.,1995,Legitimate,412366,https://www.mycobank.org/page/Name details page/47088,"Fungi, Dikarya, Ascomycota, Pezizomycotina, Dothideomycetes, Pleosporomycetidae, Pleosporales, Aaosphaeria",Aaosphaeria arxii,"Current name: Aaosphaeria arxii (Aa) Aptroot, Nova Hedwigia 60 (3-4): 329 (1995) [MB#412366] Basionym: Didymosphaeria arxii Aa, Studies in Mycology 31: 20 (1989) [MB#135618]"
594682,Aaosphaeria pasadenensis,K. Venkateswaran & A.M. Chander,-,2022,Legitimate,844547,https://www.mycobank.org/page/Name details page/594682,"Fungi, Dikarya, Ascomycota, Pezizomycotina, Dothideomycetes, Pleosporomycetidae, Pleosporales, Aaosphaeria",-,"Current name: Aaosphaeria pasadenensis Venkateswaran & A.M. Chander, Journal of Fungi 9 (1, no. 31): 7 (2022) [MB#844547]"
...,...,...,...,...,...,...,...,...,...,...
594790,Zyzygomyces leucodermiae,"Diederich, Millanes, Ertz, Etayo & Flakus",-,2022,Legitimate,844621,https://www.mycobank.org/page/Name details page/594790,"Fungi, Dikarya, Basidiomycota, Agaricomycotina, Tremellomycetes, Tremellomycetidae, Filobasidiales, Filobasidiaceae, Zyzygomyces",-,"Current name: Zyzygomyces leucodermiae Diederich, Millanes, Ertz, Etayo & Flakus, Flora of Lichenicolous Fungi 1: 91 (2022) [MB#844621]"
594791,Zyzygomyces mobergii,Diederich & Millanes,-,2022,Legitimate,844622,https://www.mycobank.org/page/Name details page/594791,"Fungi, Dikarya, Basidiomycota, Agaricomycotina, Tremellomycetes, Tremellomycetidae, Filobasidiales, Filobasidiaceae, Zyzygomyces",-,"Current name: Zyzygomyces mobergii Diederich & Millanes, Flora of Lichenicolous Fungi 1: 92 (2022) [MB#844622]"
594792,Zyzygomyces physciacearum,"(Diederich) Diederich, Millanes & Wedin",-,2022,Legitimate,844623,https://www.mycobank.org/page/Name details page/594792,"Fungi, Dikarya, Basidiomycota, Agaricomycotina, Tremellomycetes, Tremellomycetidae, Filobasidiales, Filobasidiaceae, Zyzygomyces",-,"Current name: Zyzygomyces physciacearum (Diederich) Diederich, Millanes & Wedin, Flora of Lichenicolous Fungi 1: 93 (2022) [MB#844623] Basionym: Syzygospora physciacearum Diederich, Bibliotheca Lichenologica 61: 38 (1996) [MB#415265] Obligate synonym: Heterocephalacria physciacearum (Diederich..."
594793,Zyzygomyces physconiae,"Diederich, Millanes, P. Pinault & Brackel",-,2022,Legitimate,844624,https://www.mycobank.org/page/Name details page/594793,"Fungi, Dikarya, Basidiomycota, Agaricomycotina, Tremellomycetes, Tremellomycetidae, Filobasidiales, Filobasidiaceae, Zyzygomyces",-,"Current name: Zyzygomyces physconiae Diederich, Millanes, P. Pinault & Brackel, Flora of Lichenicolous Fungi 1: 96 (2022) [MB#844624]"


In [12]:
Mycobank_api = (r'S:\Taxonomy-Maintenance\mycobank449.xlsx')
Mycobank_api_df = pd.read_excel(Mycobank_api, index_col=None)
Mycobank_api_df.set_index('id', inplace=True)
#Mycobank_api_df

In [13]:
MBdata_df = pd.merge(Mycobank_api_df, right=MBist_df, left_on='id', right_on='id', how='outer')
MBdata_df

Unnamed: 0_level_0,name_x,creationDate,lastChangeDate,ownerEmail,lastChangeUserEmail,version,authors,authorsAbbreviation,ethymology,rank,gender,typeOfOrganism,protolog,yearOfEffectivePublication,nameStatus,mycobankNr,checkedByCurator,classification,descriptionTable,bibliographyinfo,specimeninfo,synonymy.currentNameId,synonymy.basionymId,synonymy,name_y,Authors,Rank.Rank name,Year of effective publication,Name status,MycoBank #,Hyperlink,Classification,Current name.Taxon name,Synonymy
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1
1,Abortiporus biennis,01/01/2000,19/01/2024 14:41:33,v.robert@cbs.knaw.nl,v.robert@cbs.knaw.nl,2.024012e+13,(Bulliard) Singer,(Bull.) Singer,,sp.,Masculine,,[],1944,,283905.0,,"[0, 455206, 432186, 92345, 431129, 430993, 58781, 58917, 56002]","[7012, 7293, 7351, 43855]",[],"[56206, 259295, 56207, 56208, 56209, 58036, 58038, 58039, 257555, 257552, 259293, 259296, 257553, 257554]",1.0,72639.0,,Abortiporus biennis,(Bulliard) Singer,sp.,1944,Legitimate,283905.0,https://www.mycobank.org/page/Name details page/1,"Fungi, Dikarya, Basidiomycota, Agaricomycotina, Agaricomycetes, Polyporales, Meruliaceae, Abortiporus",Abortiporus biennis,"Current name: Abortiporus biennis (Bull.) Singer, Mycologia 36 (1): 68 (1944) [MB#283905] Basionym: Boletus biennis Bull., Herbier de la France 10: t. 449:1 (1790) [MB#206101] Obligate synonyms: - Polyporus biennis (Bull.) Fr., Epicrisis Systematis Mycologici: 433 (1838) [MB#191010] -..."
2,Polyporus biennis,01/01/2000,09/01/2023 10:59:58,a.decock@cbs.knaw.nl,v.robert@cbs.knaw.nl,2.023011e+13,(Bulliard) Fries,(Bull.) Fr.,,sp.,Masculine,,[],1838,,191010.0,,"[0, 455206, 432186, 92345, 431129, 430993, 58781, 58942, 39877]","[7012, 7293, 7351, 43855]",[747],[],1.0,72639.0,,Polyporus biennis,(Bulliard) Fries,sp.,1838,Legitimate,191010.0,https://www.mycobank.org/page/Name details page/2,"Fungi, Dikarya, Basidiomycota, Agaricomycotina, Agaricomycetes, Polyporales, Polyporaceae, Polyporus",Abortiporus biennis,"Current name: Abortiporus biennis (Bull.) Singer, Mycologia 36 (1): 68 (1944) [MB#283905] Basionym: Boletus biennis Bull., Herbier de la France 10: t. 449:1 (1790) [MB#206101] Obligate synonyms: - Polyporus biennis (Bull.) Fr., Epicrisis Systematis Mycologici: 433 (1838) [MB#191010] -..."
9,Absidia anomala,01/01/2000,09/01/2023 10:41:02,a.decock@cbs.knaw.nl,v.robert@cbs.knaw.nl,2.023011e+13,Hesseltine & J.J. Ellis,Hesselt. & J.J. Ellis,,sp.,Feminine,,[],1964,,325709.0,,"[0, 455206, 566850, 452644, 431097, 452787, 92487, 92771, 56003]","[7560, 7561]",[587],[5],9.0,0.0,,Absidia anomala,Hesseltine & J.J. Ellis,sp.,1964,Legitimate,325709.0,https://www.mycobank.org/page/Name details page/9,"Fungi, Mucoromyceta, Mucoromycota, Mucoromycotina, Mucoromycetes, Mucorales, Cunninghamellaceae, Absidia",Absidia anomala,"Current name: Absidia anomala Hesselt. & J.J. Ellis, Mycologia 56 (4): 578 (1964) [MB#325709]"
11,Apophysomyces atrospora,01/01/2000,05/12/2023 14:18:55,a.decock@cbs.knaw.nl,v.robert@cbs.knaw.nl,2.023121e+13,H. Naganishi & Hirahara,H. Nagan. & Hirahara,,sp.,Feminine,,[],1970,Invalid,308042.0,,"[0, 455206, 566850, 452644, 431097, 452787, 92487, 58726, 56081]","[130, 131]",[38984],[30808],455866.0,0.0,,Apophysomyces atrospora,H. Naganishi & Hirahara,sp.,1970,Invalid,308042.0,https://www.mycobank.org/page/Name details page/11,"Fungi, Mucoromyceta, Mucoromycota, Mucoromycotina, Mucoromycetes, Mucorales, Saksenaeaceae, Apophysomyces",Absidia blakesleeana var. atrospora,"Current name: Lichtheimia hyalospora (Saito) Kerst. Hoffman, G. Walther & K. Voigt, Mycol. Res. 113 (3): 278 (2009) [MB#512830] Basionym: Tieghemella hyalospora Saito, Zentralblatt für Bakteriologie und Parasitenkunde, Abteilung 2 17: 103 (1906) [MB#178452] Obligate synonyms: - Absidia hy..."
13,Absidia blakesleeana,01/01/2000,22/11/2022 10:54:58,a.decock@cbs.knaw.nl,k.bensch@mycobank.org,2.022112e+13,Lendner,Lendn.,,sp.,Feminine,,[],1924,,258556.0,,"[0, 455206, 566850, 452644, 431097, 452787, 92487, 92771, 56003]","[188, 189]",[31511],"[6, 178795]",455866.0,0.0,,Absidia blakesleeana,Lendner,sp.,1924,Legitimate,258556.0,https://www.mycobank.org/page/Name details page/13,"Fungi, Mucoromyceta, Mucoromycota, Mucoromycotina, Mucoromycetes, Mucorales, Cunninghamellaceae, Absidia",Lichtheimia blakesleeana,"Current name: Lichtheimia hyalospora (Saito) Kerst. Hoffman, G. Walther & K. Voigt, Mycol. Res. 113 (3): 278 (2009) [MB#512830] Basionym: Tieghemella hyalospora Saito, Zentralblatt für Bakteriologie und Parasitenkunde, Abteilung 2 17: 103 (1906) [MB#178452] Obligate synonyms: - Absidia hy..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
614666,Umbilicaria tylorhiza,21/01/2025 11:15:37,21/01/2025 11:18:20,k.bensch@mycobank.org,k.bensch@mycobank.org,2.025012e+13,(Nylander) Nylander,(Nyl.) Nyl.,,sp.,Feminine,,[],1869,,648194.0,,"[0, 455206, 432186, 452647, 430998, 431005, 519211, 453007, 93131, 98816]",[],[],[10020878],614666.0,614665.0,,,,,,,,,,,
614667,Gyrophora tylorhiza,21/01/2025 11:19:49,21/01/2025 11:22:02,k.bensch@mycobank.org,k.bensch@mycobank.org,2.025012e+13,(Nylander) Nylander,(Nyl.) Nyl.,,sp.,Feminine,,[],1887,,648195.0,,"[0, 455206, 432186, 452647, 430998, 431005, 519211, 453007, 93131, 442425]",[],[],[],614666.0,614665.0,,,,,,,,,,,
614668,Cladonia sandstedei subsp. sandstedei,21/01/2025 11:43:33,21/01/2025 12:00:17,k.bensch@mycobank.org,k.bensch@mycobank.org,2.025012e+13,,,,subsp.,Feminine,Filamentous fungus,[],,,857397.0,Yes,"[0, 455206, 432186, 452647, 430998, 431005, 92403, 92473, 92744, 56295, 286047]",[],[],[],286047.0,286047.0,,,,,,,,,,,
614669,Candelariella terrigena var. placodimorpha,21/01/2025 12:35:13,21/01/2025 12:39:12,k.bensch@mycobank.org,k.bensch@mycobank.org,2.025012e+13,Hakulinen,Hakul.,,var.,Masculine,,[],1958,,660813.0,,"[0, 455206, 432186, 452647, 430998, 431007, 556652, 432189, 92719, 94071, 191693]",[],[],[10024484],304269.0,0.0,,,,,,,,,,,


#### Next code block works but was found to inherit a problem from the excel to csv conversion done in the API script due to the large file size. Avoiding this intermediate step of saving and reading a file generated by Mycobankparser.ipynb alleviated the issue.

In [23]:
#works but imports some data issues from excel
#MBdata = (r'mycobank_combined.xlsx')
#MBdata_df = pd.read_excel(MBdata, index_col=None)
#MBdata_df.head(5)

Unnamed: 0,id,name_x,creationDate,lastChangeDate,ownerEmail,lastChangeUserEmail,version,authors,authorsAbbreviation,ethymology,rank,gender,typeOfOrganism,protolog,yearOfEffectivePublication,nameStatus,mycobankNr,checkedByCurator,classification,descriptionTable,bibliographyinfo,specimeninfo,synonymy.currentNameId,synonymy.basionymId,synonymy,name_y,Authors,Rank.Rank name,Year of effective publication,Name status,MycoBank #,Hyperlink,Classification,Current name.Taxon name,Synonymy
0,1,Abortiporus biennis,2000-01-01 00:00:00,19/01/2024 14:41:33,v.robert@cbs.knaw.nl,v.robert@cbs.knaw.nl,20240120000000.0,(Bulliard) Singer,(Bull.) Singer,,sp.,Masculine,,[],1944,,283905.0,,"[0, 455206, 432186, 92345, 431129, 430993, 58781, 58917, 56002]","[7012, 7293, 7351, 43855]",[],"[56206, 259295, 56207, 56208, 56209, 58036, 58038, 58039, 257555, 257552, 259293, 259296, 257553, 257554]",1.0,72639.0,,Abortiporus biennis,(Bulliard) Singer,sp.,1944,Legitimate,283905.0,https://www.mycobank.org/page/Name details page/1,"Fungi, Dikarya, Basidiomycota, Agaricomycotina, Agaricomycetes, Polyporales, Meruliaceae, Abortiporus",Abortiporus biennis,"Current name: Abortiporus biennis (Bull.) Singer, Mycologia 36 (1): 68 (1944) [MB#283905] Basionym: Boletus biennis Bull., Herbier de la France 10: t. 449:1 (1790) [MB#206101] Obligate synonyms: - Polyporus biennis (Bull.) Fr., Epicrisis Systematis Mycologici: 433 (1838) [MB#191010] -..."
1,2,Polyporus biennis,2000-01-01 00:00:00,2023-09-01 10:59:58,a.decock@cbs.knaw.nl,v.robert@cbs.knaw.nl,20230110000000.0,(Bulliard) Fries,(Bull.) Fr.,,sp.,Masculine,,[],1838,,191010.0,,"[0, 455206, 432186, 92345, 431129, 430993, 58781, 58942, 39877]","[7012, 7293, 7351, 43855]",[747],[],1.0,72639.0,,Polyporus biennis,(Bulliard) Fries,sp.,1838,Legitimate,191010.0,https://www.mycobank.org/page/Name details page/2,"Fungi, Dikarya, Basidiomycota, Agaricomycotina, Agaricomycetes, Polyporales, Polyporaceae, Polyporus",Abortiporus biennis,"Current name: Abortiporus biennis (Bull.) Singer, Mycologia 36 (1): 68 (1944) [MB#283905] Basionym: Boletus biennis Bull., Herbier de la France 10: t. 449:1 (1790) [MB#206101] Obligate synonyms: - Polyporus biennis (Bull.) Fr., Epicrisis Systematis Mycologici: 433 (1838) [MB#191010] -..."
2,9,Absidia anomala,2000-01-01 00:00:00,2023-09-01 10:41:02,a.decock@cbs.knaw.nl,v.robert@cbs.knaw.nl,20230110000000.0,Hesseltine & J.J. Ellis,Hesselt. & J.J. Ellis,,sp.,Feminine,,[],1964,,325709.0,,"[0, 455206, 566850, 452644, 431097, 452787, 92487, 92771, 56003]","[7560, 7561]",[587],[5],9.0,0.0,,Absidia anomala,Hesseltine & J.J. Ellis,sp.,1964,Legitimate,325709.0,https://www.mycobank.org/page/Name details page/9,"Fungi, Mucoromyceta, Mucoromycota, Mucoromycotina, Mucoromycetes, Mucorales, Cunninghamellaceae, Absidia",Absidia anomala,"Current name: Absidia anomala Hesselt. & J.J. Ellis, Mycologia 56 (4): 578 (1964) [MB#325709]"
3,11,Apophysomyces atrospora,2000-01-01 00:00:00,2023-05-12 14:18:55,a.decock@cbs.knaw.nl,v.robert@cbs.knaw.nl,20231210000000.0,H. Naganishi & Hirahara,H. Nagan. & Hirahara,,sp.,Feminine,,[],1970,Invalid,308042.0,,"[0, 455206, 566850, 452644, 431097, 452787, 92487, 58726, 56081]","[130, 131]",[38984],[30808],455866.0,0.0,,Apophysomyces atrospora,H. Naganishi & Hirahara,sp.,1970,Invalid,308042.0,https://www.mycobank.org/page/Name details page/11,"Fungi, Mucoromyceta, Mucoromycota, Mucoromycotina, Mucoromycetes, Mucorales, Saksenaeaceae, Apophysomyces",Absidia blakesleeana var. atrospora,"Current name: Lichtheimia hyalospora (Saito) Kerst. Hoffman, G. Walther & K. Voigt, Mycol. Res. 113 (3): 278 (2009) [MB#512830] Basionym: Tieghemella hyalospora Saito, Zentralblatt fÃ¼r Bakteriologie und Parasitenkunde, Abteilung 2 17: 103 (1906) [MB#178452] Obligate synonyms: - Absidia h..."
4,13,Absidia blakesleeana,2000-01-01 00:00:00,22/11/2022 10:54:58,a.decock@cbs.knaw.nl,k.bensch@mycobank.org,20221120000000.0,Lendner,Lendn.,,sp.,Feminine,,[],1924,,258556.0,,"[0, 455206, 566850, 452644, 431097, 452787, 92487, 92771, 56003]","[188, 189]",[31511],"[6, 178795]",455866.0,0.0,,Absidia blakesleeana,Lendner,sp.,1924,Legitimate,258556.0,https://www.mycobank.org/page/Name details page/13,"Fungi, Mucoromyceta, Mucoromycota, Mucoromycotina, Mucoromycetes, Mucorales, Cunninghamellaceae, Absidia",Lichtheimia blakesleeana,"Current name: Lichtheimia hyalospora (Saito) Kerst. Hoffman, G. Walther & K. Voigt, Mycol. Res. 113 (3): 278 (2009) [MB#512830] Basionym: Tieghemella hyalospora Saito, Zentralblatt fÃ¼r Bakteriologie und Parasitenkunde, Abteilung 2 17: 103 (1906) [MB#178452] Obligate synonyms: - Absidia h..."


In [14]:
MBdata1_df = MBdata_df.rename(columns={"name_x": "OrgName"})
MBdata1_df.head(10)

Unnamed: 0_level_0,OrgName,creationDate,lastChangeDate,ownerEmail,lastChangeUserEmail,version,authors,authorsAbbreviation,ethymology,rank,gender,typeOfOrganism,protolog,yearOfEffectivePublication,nameStatus,mycobankNr,checkedByCurator,classification,descriptionTable,bibliographyinfo,specimeninfo,synonymy.currentNameId,synonymy.basionymId,synonymy,name_y,Authors,Rank.Rank name,Year of effective publication,Name status,MycoBank #,Hyperlink,Classification,Current name.Taxon name,Synonymy
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1
1,Abortiporus biennis,01/01/2000,19/01/2024 14:41:33,v.robert@cbs.knaw.nl,v.robert@cbs.knaw.nl,20240120000000.0,(Bulliard) Singer,(Bull.) Singer,,sp.,Masculine,,[],1944.0,,283905.0,,"[0, 455206, 432186, 92345, 431129, 430993, 58781, 58917, 56002]","[7012, 7293, 7351, 43855]",[],"[56206, 259295, 56207, 56208, 56209, 58036, 58038, 58039, 257555, 257552, 259293, 259296, 257553, 257554]",1.0,72639.0,,Abortiporus biennis,(Bulliard) Singer,sp.,1944,Legitimate,283905.0,https://www.mycobank.org/page/Name details page/1,"Fungi, Dikarya, Basidiomycota, Agaricomycotina, Agaricomycetes, Polyporales, Meruliaceae, Abortiporus",Abortiporus biennis,"Current name: Abortiporus biennis (Bull.) Singer, Mycologia 36 (1): 68 (1944) [MB#283905] Basionym: Boletus biennis Bull., Herbier de la France 10: t. 449:1 (1790) [MB#206101] Obligate synonyms: - Polyporus biennis (Bull.) Fr., Epicrisis Systematis Mycologici: 433 (1838) [MB#191010] -..."
2,Polyporus biennis,01/01/2000,09/01/2023 10:59:58,a.decock@cbs.knaw.nl,v.robert@cbs.knaw.nl,20230110000000.0,(Bulliard) Fries,(Bull.) Fr.,,sp.,Masculine,,[],1838.0,,191010.0,,"[0, 455206, 432186, 92345, 431129, 430993, 58781, 58942, 39877]","[7012, 7293, 7351, 43855]",[747],[],1.0,72639.0,,Polyporus biennis,(Bulliard) Fries,sp.,1838,Legitimate,191010.0,https://www.mycobank.org/page/Name details page/2,"Fungi, Dikarya, Basidiomycota, Agaricomycotina, Agaricomycetes, Polyporales, Polyporaceae, Polyporus",Abortiporus biennis,"Current name: Abortiporus biennis (Bull.) Singer, Mycologia 36 (1): 68 (1944) [MB#283905] Basionym: Boletus biennis Bull., Herbier de la France 10: t. 449:1 (1790) [MB#206101] Obligate synonyms: - Polyporus biennis (Bull.) Fr., Epicrisis Systematis Mycologici: 433 (1838) [MB#191010] -..."
9,Absidia anomala,01/01/2000,09/01/2023 10:41:02,a.decock@cbs.knaw.nl,v.robert@cbs.knaw.nl,20230110000000.0,Hesseltine & J.J. Ellis,Hesselt. & J.J. Ellis,,sp.,Feminine,,[],1964.0,,325709.0,,"[0, 455206, 566850, 452644, 431097, 452787, 92487, 92771, 56003]","[7560, 7561]",[587],[5],9.0,0.0,,Absidia anomala,Hesseltine & J.J. Ellis,sp.,1964,Legitimate,325709.0,https://www.mycobank.org/page/Name details page/9,"Fungi, Mucoromyceta, Mucoromycota, Mucoromycotina, Mucoromycetes, Mucorales, Cunninghamellaceae, Absidia",Absidia anomala,"Current name: Absidia anomala Hesselt. & J.J. Ellis, Mycologia 56 (4): 578 (1964) [MB#325709]"
11,Apophysomyces atrospora,01/01/2000,05/12/2023 14:18:55,a.decock@cbs.knaw.nl,v.robert@cbs.knaw.nl,20231210000000.0,H. Naganishi & Hirahara,H. Nagan. & Hirahara,,sp.,Feminine,,[],1970.0,Invalid,308042.0,,"[0, 455206, 566850, 452644, 431097, 452787, 92487, 58726, 56081]","[130, 131]",[38984],[30808],455866.0,0.0,,Apophysomyces atrospora,H. Naganishi & Hirahara,sp.,1970,Invalid,308042.0,https://www.mycobank.org/page/Name details page/11,"Fungi, Mucoromyceta, Mucoromycota, Mucoromycotina, Mucoromycetes, Mucorales, Saksenaeaceae, Apophysomyces",Absidia blakesleeana var. atrospora,"Current name: Lichtheimia hyalospora (Saito) Kerst. Hoffman, G. Walther & K. Voigt, Mycol. Res. 113 (3): 278 (2009) [MB#512830] Basionym: Tieghemella hyalospora Saito, Zentralblatt für Bakteriologie und Parasitenkunde, Abteilung 2 17: 103 (1906) [MB#178452] Obligate synonyms: - Absidia hy..."
13,Absidia blakesleeana,01/01/2000,22/11/2022 10:54:58,a.decock@cbs.knaw.nl,k.bensch@mycobank.org,20221120000000.0,Lendner,Lendn.,,sp.,Feminine,,[],1924.0,,258556.0,,"[0, 455206, 566850, 452644, 431097, 452787, 92487, 92771, 56003]","[188, 189]",[31511],"[6, 178795]",455866.0,0.0,,Absidia blakesleeana,Lendner,sp.,1924,Legitimate,258556.0,https://www.mycobank.org/page/Name details page/13,"Fungi, Mucoromyceta, Mucoromycota, Mucoromycotina, Mucoromycetes, Mucorales, Cunninghamellaceae, Absidia",Lichtheimia blakesleeana,"Current name: Lichtheimia hyalospora (Saito) Kerst. Hoffman, G. Walther & K. Voigt, Mycol. Res. 113 (3): 278 (2009) [MB#512830] Basionym: Tieghemella hyalospora Saito, Zentralblatt für Bakteriologie und Parasitenkunde, Abteilung 2 17: 103 (1906) [MB#178452] Obligate synonyms: - Absidia hy..."
18,Absidia californica,01/01/2000,04/08/2021 10:09:59,a.decock@cbs.knaw.nl,v.robert@cbs.knaw.nl,20210800000000.0,J.J. Ellis & Hesseltine,J.J. Ellis and Hesselt.,,sp.,Feminine,,[],1965.0,,325710.0,,"[0, 455206, 566850, 452644, 431097, 452787, 92487, 92771, 56003]","[324, 325]",[116],[8],18.0,0.0,,Absidia californica,J.J. Ellis & Hesseltine,sp.,1965,Legitimate,325710.0,https://www.mycobank.org/page/Name details page/18,"Fungi, Mucoromyceta, Mucoromycota, Mucoromycotina, Mucoromycetes, Mucorales, Cunninghamellaceae, Absidia",Absidia californica,"Current name: Absidia californica J.J. Ellis and Hesselt., Mycologia 57 (2): 230 (1965) [MB#325710]"
21,Absidia coerulea,01/01/2000,13/02/2023 22:08:29,a.decock@cbs.knaw.nl,v.robert@cbs.knaw.nl,20230210000000.0,Bainier,Bainier,,sp.,Feminine,,[],1889.0,,221354.0,,"[0, 455206, 566850, 452644, 431097, 452787, 92487, 92771, 56003]","[326, 11403, 13804]",[44210],[],21.0,0.0,,Absidia coerulea,Bainier,sp.,1889,Legitimate,221354.0,https://www.mycobank.org/page/Name details page/21,"Fungi, Mucoromyceta, Mucoromycota, Mucoromycotina, Mucoromycetes, Mucorales, Cunninghamellaceae, Absidia",Absidia coerulea,"Current name: Absidia coerulea Bainier, Bull. Soc. Bot. France 36: 184 (1889) [MB#221354] Taxonomic synonyms: - Tieghemella orchidis Vuill., Bulletin de la Société Mycologique de France 19: 122 (1903) [MB#179106] - Absidia orchidis (Vuill.) Hagem, Skrifter udgivne af Videnskabs-S..."
29,Absidia corymbifera,01/01/2000,04/08/2021 09:48:10,a.decock@cbs.knaw.nl,v.robert@cbs.knaw.nl,20210800000000.0,(Cohn) Saccardo & Trotter,(Cohn) Sacc. & Trotter,,sp.,Feminine,,[],1912.0,,221175.0,,"[0, 455206, 566850, 452644, 431097, 452787, 92487, 92771, 56003]","[423, 11404, 13805, 13935, 14820, 24924]",[3650],"[26, 204932]",162924.0,35523.0,,Absidia corymbifera,(Cohn) Saccardo & Trotter,sp.,1912,Legitimate,221175.0,https://www.mycobank.org/page/Name details page/29,"Fungi, Mucoromyceta, Mucoromycota, Mucoromycotina, Mucoromycetes, Mucorales, Cunninghamellaceae, Absidia",Lichtheimia corymbifera,"Current name: Lichtheimia corymbifera (Cohn) Vuill., Bull. Soc. Mycol. France 19: 126 (1903) [MB#416447] Basionym: Mucor corymbifer Cohn, Z. Klin. Med.: 147 (1884) [MB#229838] Obligate synonyms: - Absidia corymbifera (Cohn) Sacc. & Trotter, Sylloge Fungorum 21: 825 (1912) [MB#221175] ..."
47,Absidia cuneospora,01/01/2000,13/02/2023 22:08:30,a.decock@cbs.knaw.nl,v.robert@cbs.knaw.nl,20230210000000.0,G.F. Orr & Plunkett,G.F. Orr & Plunkett,,sp.,Feminine,,[],1959.0,,292052.0,,"[0, 455206, 566850, 452644, 431097, 452787, 92487, 92771, 56003]",[],[7509],[28],47.0,0.0,,Absidia cuneospora,G.F. Orr & Plunkett,sp.,1959,Legitimate,292052.0,https://www.mycobank.org/page/Name details page/47,"Fungi, Mucoromyceta, Mucoromycota, Mucoromycotina, Mucoromycetes, Mucorales, Cunninghamellaceae, Absidia",Absidia cuneospora,"Current name: Absidia cuneospora G.F. Orr & Plunkett, Mycologia 51: 203 (1959) [MB#292052]"
50,Absidia cylindrospora var. cylindrospora,01/01/2000,07/04/2021 14:33:02,a.decock@cbs.knaw.nl,v.robert@cbs.knaw.nl,20210410000000.0,,,,var.,Feminine,,[],,,427391.0,,"[0, 455206, 566850, 452644, 431097, 452787, 92487, 92771, 56003, 58196]","[7570, 13806]",[],[32961],50.0,0.0,,Absidia cylindrospora var. cylindrospora,,var.,?,Legitimate,427391.0,https://www.mycobank.org/page/Name details page/50,"Fungi, Mucoromyceta, Mucoromycota, Mucoromycotina, Mucoromycetes, Mucorales, Cunninghamellaceae, Absidia, Absidia cylindrospora",Absidia cylindrospora var. cylindrospora,"Current name: Absidia cylindrospora var. cylindrospora (?) [MB#427391] Taxonomic synonyms: Tieghemella cylindrospora (Hagem) Naumov, Opredelitel Mukorovykh (Mucorales): 83 (1935) [MB#251728]"


In [15]:
merged_df = pd.merge(df3, MBdata1_df, on='OrgName', how='inner')
merged_df.head(10)

Unnamed: 0,Text,OrgName,Rank,Authority_1,synonym,creationDate,lastChangeDate,ownerEmail,lastChangeUserEmail,version,authors,authorsAbbreviation,ethymology,rank,gender,typeOfOrganism,protolog,yearOfEffectivePublication,nameStatus,mycobankNr,checkedByCurator,classification,descriptionTable,bibliographyinfo,specimeninfo,synonymy.currentNameId,synonymy.basionymId,synonymy,name_y,Authors,Rank.Rank name,Year of effective publication,Name status,MycoBank #,Hyperlink,Classification,Current name.Taxon name,Synonymy
0,"Johansoniaceae Doilom, Phookamsak & K.D. Hyde*FoF04619",Johansoniaceae,family,"Doilom, Phookamsak & K.D. Hyde*FoF04619",,21/09/2018 13:21:59,22/10/2020 09:26:31,k.bensch@mycobank.org,v.robert@cbs.knaw.nl,20201020000000.0,"Doilom, Phookamsak & K.D. Hyde","Doilom, Phookamsak & K.D. Hyde",,fam.,,,[],2018,,554793.0,,"[0, 455206, 432186, 452647, 430998, 431106, 92397, 92432]",[],[],[],568773.0,0.0,,Johansoniaceae,"Doilom, Phookamsak & K.D. Hyde",fam.,2018,Legitimate,554793.0,https://www.mycobank.org/page/Name details page/568773,"Fungi, Dikarya, Ascomycota, Pezizomycotina, Dothideomycetes, Dothideomycetidae, Capnodiales",-,"Current name: Johansoniaceae Doilom, Phookamsak & K.D. Hyde, Mycosphere 9 (4): 659 (2018) [MB#554793]"
1,Johansonia Sacc. (13)*FoF07635,Johansonia,,Sacc. (13)*FoF07635,,16/01/2004 12:20:30,05/01/2022 07:14:51,a.decock@cbs.knaw.nl,k.bensch@mycobank.org,20220110000000.0,Saccardo,Sacc.,,gen.,Feminine,,[],1889,,2533.0,,"[0, 455206, 432186, 452647, 430998, 431106, 92397, 92432, 568773]",[],[3638],[],104865.0,0.0,,Johansonia,Saccardo,gen.,1889,Legitimate,2533.0,https://www.mycobank.org/page/Name details page/104865,"Fungi, Dikarya, Ascomycota, Pezizomycotina, Dothideomycetes, Dothideomycetidae, Capnodiales, Johansoniaceae",Johansonia,"Current name: Johansonia Sacc., Sylloge Fungorum 8: 785 (1889) [MB#2533]"
2,Orthobellus A.A. Silva & Cavalc. (3)*FoF01955,Orthobellus,,A.A. Silva & Cavalc. (3)*FoF01955,,01/01/2000,07/04/2021 16:24:25,a.decock@cbs.knaw.nl,v.robert@cbs.knaw.nl,20210410000000.0,A.A. Silva & Cavalcanti,A.A. Silva & Cavalc.,,gen.,Masculine,,[],1973,,3639.0,,"[0, 455206, 432186, 452647, 430998, 431106, 92397, 92489, 93082]",[],[45336],[],96842.0,0.0,,Orthobellus,A.A. Silva & Cavalcanti,gen.,1973,Legitimate,3639.0,https://www.mycobank.org/page/Name details page/96842,"Fungi, Dikarya, Ascomycota, Pezizomycotina, Dothideomycetes, Dothideomycetidae, Mycosphaerellales, Schizothyriaceae",Orthobellus,"Current name: Orthobellus A.A. Silva & Cavalc., Publicações do Instituto de Micologia da Universidade do Recife 691: 4 (1973) [MB#3639]"
3,"Neoantennariellaceae Abdollahz. & Crous*Notes 554, 555, FoF12736",Neoantennariellaceae,family,"Abdollahz. & Crous*Notes 554, 555, FoF12736",,25/10/2019 15:40:16,22/10/2020 09:35:47,j.abdollahzadeh@yahoo.com,v.robert@cbs.knaw.nl,20201020000000.0,J. Abdollahzadeh & P.W. Crous,Abdollahz. & Crous,Name refers to the genus Neoantennariella.,fam.,,,[],2020,,833165.0,Yes,"[0, 455206, 432186, 452647, 430998, 431106, 92397, 92432]",[86035],[],[],577294.0,0.0,,Neoantennariellaceae,J. Abdollahzadeh & P.W. Crous,fam.,2020,Legitimate,833165.0,https://www.mycobank.org/page/Name details page/577294,"Fungi, Dikarya, Ascomycota, Pezizomycotina, Dothideomycetes, Dothideomycetidae, Capnodiales",-,"Current name: Neoantennariellaceae Abdollahz. & Crous, Studies in Mycology 95: 404 (2020) [MB#833165]"
4,"Cippumomyces Crous, Overton & Ricci (2)*Note 759, FoF15609",Cippumomyces,,"Crous, Overton & Ricci (2)*Note 759, FoF15609",,24/10/2021 14:35:02,28/12/2021 14:30:57,p.crous@wi.knaw.nl,k.bensch@mycobank.org,20211230000000.0,"P.W. Crous, B.E. Overton & G.M. Ricci","Crous, Overton & Ricci",Name refers to the fact that it was isolated from a granite tombstone ‘cippum’.,gen.,Masculine,,[],2021,,841836.0,Yes,"[0, 455206, 432186, 452647, 430998, 431106, 92397, 92432, 577294]",[93752],[],[],589697.0,0.0,,Cippumomyces,"P.W. Crous, B.E. Overton & G.M. Ricci",-,2021,Legitimate,841836.0,https://www.mycobank.org/page/Name details page/589697,"Fungi, Dikarya, Ascomycota, Pezizomycotina, Dothideomycetes, Dothideomycetidae, Capnodiales, Neoantennariellaceae",-,"Current name: Cippumomyces Crous, Overton & Ricci, Persoonia 47: 269 (2021) [MB#841836]"
5,"Fumiglobus D.R. Reynolds & G.S. Gilbert (9)*Note 555, FoF06947",Fumiglobus,,"D.R. Reynolds & G.S. Gilbert (9)*Note 555, FoF06947",,07/01/2008 11:42:02,03/03/2021 17:47:33,a.decock@cbs.knaw.nl,v.robert@cbs.knaw.nl,20210300000000.0,D.R. Reynolds & G.S. Gilbert,D.R. Reynolds & G.S. Gilbert,,gen.,Masculine,,[],2006,,29065.0,,"[0, 455206, 432186, 452647, 430998, 431106, 92397, 92432, 577294]",[],[37693],[121312],443033.0,0.0,,Fumiglobus,D.R. Reynolds & G.S. Gilbert,gen.,2006,Legitimate,29065.0,https://www.mycobank.org/page/Name details page/443033,"Fungi, Dikarya, Ascomycota, Pezizomycotina, Dothideomycetes, Dothideomycetidae, Capnodiales, Neoantennariellaceae",Fumiglobus,"Current name: Fumiglobus D.R. Reynolds & G.S. Gilbert, Cryptogamie Mycologie 27 (3): 252 (2006) [MB#29065] Taxonomic synonyms: Fumeiglobus D.R. Reynolds & G.S. Gilbert (2006) [MB#529702]"
6,"Neoantennariella Abdollahz. & Crous (1)*Note 554, FoF11064",Neoantennariella,,"Abdollahz. & Crous (1)*Note 554, FoF11064",,25/10/2019 15:43:29,22/10/2020 09:35:47,j.abdollahzadeh@yahoo.com,v.robert@cbs.knaw.nl,20201020000000.0,J. Abdollahzadeh & P.W. Crous,Abdollahz. & Crous,Name reflects its morphological similarity to the genus Antennariella.,gen.,,,[],2020,,833166.0,Yes,"[0, 455206, 432186, 452647, 430998, 431106, 92397, 92432, 577294]",[86036],[],[],577295.0,0.0,,Neoantennariella,J. Abdollahzadeh & P.W. Crous,gen.,2020,Legitimate,833166.0,https://www.mycobank.org/page/Name details page/577295,"Fungi, Dikarya, Ascomycota, Pezizomycotina, Dothideomycetes, Dothideomycetidae, Capnodiales, Neoantennariellaceae",Neoantennariella,"Current name: Neoantennariella Abdollahz. & Crous, Studies in Mycology 95: 405 (2020) [MB#833166]"
7,"Neoasbolisia Abdollahz. & Crous (1)*Note 555, FoF11066",Neoasbolisia,,"Abdollahz. & Crous (1)*Note 555, FoF11066",,25/10/2019 15:59:25,22/10/2020 09:35:47,j.abdollahzadeh@yahoo.com,v.robert@cbs.knaw.nl,20201020000000.0,J. Abdollahzadeh & P.W. Crous,Abdollahz. & Crous,Name reflects its morphological similarity to the genus Asbolisia Bat. & Cif.,gen.,,,[],2020,,833168.0,Yes,"[0, 455206, 432186, 452647, 430998, 431106, 92397, 92432, 577294]",[86038],[],[],577297.0,0.0,,Neoasbolisia,J. Abdollahzadeh & P.W. Crous,gen.,2020,Legitimate,833168.0,https://www.mycobank.org/page/Name details page/577297,"Fungi, Dikarya, Ascomycota, Pezizomycotina, Dothideomycetes, Dothideomycetidae, Capnodiales, Neoantennariellaceae",Neoasbolisia,"Current name: Neoasbolisia Abdollahz. & Crous, Studies in Mycology 95: 406 (2020) [MB#833168]"
8,"Piedraiaceae Viégas ex Cif., Bat. & S. Camposa*FoF06972",Piedraiaceae,family,"Viégas ex Cif., Bat. & S. Camposa*FoF06972",,01/01/2000,07/04/2021 16:17:39,a.decock@cbs.knaw.nl,v.robert@cbs.knaw.nl,20210410000000.0,"Viégas ex Ciferri, Batista & S. Camposa","Viégas ex Cif., Bat. & S. Camposa",,fam.,,,[],1956,,82066.0,,"[0, 455206, 432186, 452647, 430998, 431106, 92397, 92432]",[],[53164],[],93023.0,0.0,,Piedraiaceae,"Viégas ex Ciferri, Batista & S. Camposa",fam.,1956,Legitimate,82066.0,https://www.mycobank.org/page/Name details page/93023,"Fungi, Dikarya, Ascomycota, Pezizomycotina, Dothideomycetes, Dothideomycetidae, Capnodiales",-,"Current name: Piedraiaceae Viégas ex Cif., Bat. & S. Camposa, Publicações do Instituto de Micologia da Universidade do Recife 45: 7 (1956) [MB#82066]"
9,"Piedraiaceae Viégas ex Cif., Bat. & S. Camposa*FoF06972",Piedraiaceae,family,"Viégas ex Cif., Bat. & S. Camposa*FoF06972",,15/01/2007 11:55:29,03/03/2021 17:43:42,a.decock@cbs.knaw.nl,v.robert@cbs.knaw.nl,20210300000000.0,Viégas ex M.E. Barr,Viégas ex M.E. Barr,,fam.,,,[],1979,Illegitimate,81173.0,,"[0, 455206, 507885]",[],[4569],[],433108.0,0.0,,Piedraiaceae,Viégas ex M.E. Barr,fam.,1979,Illegitimate,81173.0,https://www.mycobank.org/page/Name details page/433108,"Fungi, Incertae sedis",-,"Current name: Piedraiaceae Viégas ex M.E. Barr, Mycologia 71: 939 (1979) [MB#81173]"


### Save final output to Excel

In [24]:
df3.to_excel (r'C:\Users\mcveigh\Documents\PythonPC\PDFminertest2.xlsx', index = False, header=True)

In [16]:
merged_df.to_excel (r'C:\Users\mcveigh\Documents\PythonPC\PDFminer_merged.xlsx', index = False, header=True)

### TEST code that can be ignored

In [None]:


# Sample DataFrame
data = {'col': ['apple pie', 'banana', 'cherry tart', 'date pudding', 'elderberry']}
df2 = pd.DataFrame(data)

for index, row in df2.iterrows():
    if isinstance(row['col'], str) and 'berry' in row['col']:
        # Do something if 'col' is a string and contains 'berry'
        print(f"Row {index}: Contains 'berry': {row['col']}")
    else:
        # Do something else if 'col' is not a string or doesn't contain 'berry'
        print(f"Row {index}: Does not contain 'berry': {row['col']}")
        

In [None]:
# Sample DataFrame
data = {'text_column': ['apple pie', 'banana', 'cherry tart', 'date']}
df = pd.DataFrame(data)

# Text to search for
search_text = 'pie'

# Create new columns 'first_part' and 'second_part'
df['first_part']  = ''
df['second_part']  = ''

# Use loc to conditionally split the column
df.loc[df['text_column'].str.contains(search_text), ['first_part', 'second_part']] = df.loc[df['text_column'].str.contains(search_text), 'text_column'].str.split('', n=1, expand=True)

df.loc[~df['text_column'].str.contains(search_text), 'second_part'] = df.loc[~df['text_column'].str.contains(search_text), 'text_column']

print(df)
