## Nordicmicroalgae NOMAC 2017.

Prepares the NOMAC 2017 species list.

In [1]:
import pandas as pd
import numpy as np
import datetime

#### Import species files downloaded from Artsnavnebase.

In [2]:
def read_from_artsnavnebase(file_name):
    df = pd.read_csv('./in_data/' + file_name, 
#                 usecols=['Rike', 'Rekke', 'Klasse', 'Orden', 'Familie', 'Slekt', 'Art', 'Underart', 
#                          'Varietet', 'Form', 'Autorstreng', 'Hovedstatus', 'Bistatus'], 
                          sep=';', encoding='cp1252',
                          na_values=['nan'], keep_default_na=False, 
                          )
    #
    old_header=['Rike', 'Rekke', 'Klasse', 'Orden', 'Familie', 'Slekt', 'Art', 'Underart', 
                'Varietet', 'Form', 'Autorstreng', 'Hovedstatus', 'Bistatus'] 
    new_header=['kingdom', 'phylum', 'class_name', 'order', 'family', 'genus', 'species', 'subspecies', 
                'variety', 'forma', 'author', 'status', 'status-2'] 
    df.rename(columns=dict(zip(old_header, new_header)), inplace=True)#df.fillna('', inplace=True)
    #
    df['source'] = 'Artsnavnebase 20170303'
    return df

In [3]:
df1 = read_from_artsnavnebase('Chlorophyta_ArtsnavnebaseCSV_20170303.txt')
df2 = read_from_artsnavnebase('Chromista_ArtsnavnebaseCSV_20170303.txt')
df3 = read_from_artsnavnebase('Cyanobacteria_ArtsnavnebaseCSV_20170303.txt')
df4 = read_from_artsnavnebase('Plantae_Glaucophyta_ArtsnavnebaseCSV_20170303.txt')
df5 = read_from_artsnavnebase('Plantae_Rhodophyta_ArtsnavnebaseCSV_20170303.txt')
df6 = read_from_artsnavnebase('Protozoa_ArtsnavnebaseCSV_20170303.txt')

In [4]:
print('Length df1: ' + str(len(df1)))
print('Length df2: ' + str(len(df2)))
print('Length df3: ' + str(len(df3)))
print('Length df4: ' + str(len(df4)))
print('Length df5: ' + str(len(df5)))
print('Length df6: ' + str(len(df6)))
df = pd.concat([df1, df2, df3, df4, df5, df6])
print('Length total df: ' + str(len(df)))

Length df1: 1289
Length df2: 5388
Length df3: 831
Length df4: 10
Length df5: 501
Length df6: 720
Length total df: 8739


In [5]:
# For test.
#df.to_csv('./out_data/' + 'all_species_from_artsnavnebase.txt',
#         sep='\t', encoding='cp1252', index = False)

#### Remove rows not ending in a used rank (for example "Underklasse" or "Overorden"). 

In [6]:
df = df.assign(last_used_rank="")

In [7]:
ranks = ['Underrike','phylum','Underrekke',
         'Overklasse','class_name','Underklasse',
         'Infraklasse','Kohort','Overorden',
         'order','Underorden','Infraorden',
         'Overfamilie','family','Underfamilie',
         'Tribus','Undertribus','genus','Underslekt',
         'Seksjon','species','subspecies','variety','forma']
#
for index, row in df.iterrows():    
    last_used_rank = 'kingdom'
    for rank in ranks:
        value = row.loc[rank]
        if value:
            last_used_rank = rank
    #
    df.ix[index, 'last_used_rank'] = last_used_rank
#
print('Length df: ' + str(len(df)))
#df.head()

Length df: 8739


In [8]:
rank_not_in_kofsa = ['Underrike','Underrekke',
         'Overklasse','Underklasse',
         'Infraklasse','Kohort','Overorden',
         'Underorden','Infraorden',
         'Overfamilie','Underfamilie',
         'Tribus','Undertribus','Underslekt',
         'Seksjon']
df = df[~df.last_used_rank.isin(rank_not_in_kofsa)]
#
print('Length df: ' + str(len(df)))
#df.head()

Length df: 8522


#### Remove columns not needed.

In [9]:
used_columns = ['kingdom', 'phylum', 'class_name', 'order', 'family', 'genus', 'species', 'subspecies', 
                'variety', 'forma', 'author', 'status', 'source'] 
for column in list(df):
    if column not in used_columns:
        df.drop(column, axis=1, inplace=True)
#
df.head()

Unnamed: 0,kingdom,phylum,class_name,order,family,genus,species,subspecies,variety,forma,author,status,source
0,Plantae,Chlorophyta,,,,,,,,,Rchb.,Gyldig,Artsnavnebase 20170303
1,Plantae,Chlorophyta,Bryopsidophyceae,,,,,,,,Bessey,Gyldig,Artsnavnebase 20170303
2,Plantae,Chlorophyta,Chlorophyceae,,,,,,,,Wille,Gyldig,Artsnavnebase 20170303
3,Plantae,Chlorophyta,Nephrophyceae,,,,,,,,"Cavalier-Smith, 1993",Gyldig,Artsnavnebase 20170303
4,Plantae,Chlorophyta,Pedinophyceae,,,,,,,,Moestrup,Gyldig,Artsnavnebase 20170303


#### Filter on status = 'Gyldig' to get valid taxa.

In [10]:
df_valid = df[df.status=='Gyldig']
print('Length df_valid: ' + str(len(df_valid)))

Length df_valid: 8522


In [11]:
# For test.
#df_valid.to_csv('./out_data/' + 'valid_species_from_artsnavnebase.txt',
#               sep='\t', encoding='cp1252', index = False)

#### Extract kingdom and phylum and add to NOMAC 2017.
Note: Not taxa below in rank.

In [12]:
#add_kingdom_list = ["Bacteria", "Chromista", "Plantae", "Protozoa"]

In [13]:
#add_phylum_list = ["Cyanobacteria", "Bacillariophyta", "Cercozoa", "Haptophyta", "Miozoa", 
#                   "Not_assigned_3409", "Ochrophyta", "Oomycota", "Charophyta", "Chlorophyta", 
#                   "Cyanidiophyta", "Glaucophyta", "Rhodophyta", "Streptophyta", 
#                   "(Rekke) Incertae sedis", "Choanozoa", "Euglenozoa", "Metamonada", 
#                   "Myzozoa"]

In [14]:
#df_top_kingdom = df_valid[df_valid.phylum=='']
#df_top_kingdom = df_top_kingdom[df_top_kingdom.kingdom.isin(add_kingdom_list)]
#print('Length df_top_kingdom: ' + str(len(df_top_kingdom)))
#df_top_kingdom

In [15]:
#df_top_phylum = df_valid[df_valid.class_name=='']
#df_top_phylum = df_top_phylum[df_top_phylum.phylum.isin(add_phylum_list)]
#print('Length df_valid_phylum: ' + str(len(df_top_phylum)))
##df_top_phylum

#### Add phylum and class recursively.

In [16]:
##phylum_list = ["Tracheophyta", "Chromista phylum incertae sedis", "Ciliophora", "Hyphochytriomycota"]
#add_all_phylum_list = ["Chromista phylum incertae sedis", "Ciliophora", "Hyphochytriomycota"]
##phylum_list

In [17]:
#add_all_class_list = ["Charophyceae", "Chlorokybophyceae", "Coleochaetophyceae", "Conjugatophyceae ", 
#"Klebsormidiophyceae", "Mesostigmatophyceae", "Chlorodendrophyceae", "Chlorophyceae", 
#"Chlorophyta incertae sedis", "Mamiellophyceae", "Nephrophyceae", "Palmophyllophyceae", 
#"Pedinophyceae", "Pyramimonadophyceae", "Trebouxiophyceae", "Ulvophyceae", "Glaucophyceae", 
#"Bangiophyceae", "Compsopogonophyceae", "Cyanidiophyceae", "Florideophyceae", 
#"Porphyridiophyceae", "Rhodellophyceae", "Rhodophyta incertae sedis", "Stylonematophyceae", 
#"Bacillariophyta classis incertae sedis", "Bacillariophyceae", "Coscinodiscophyceae", 
#"Mediophyceae", "Bikosea", "Chlorarachniophyceae", "Filosa", "Imbricatea", "Cryptophyceae", 
#"Polythalamea", "Foraminifera incertae sedis", "Globothalamea", "Monothalamea", "Tubothalamea", 
#"Coccolithophyceae", "Pavlovophyceae", "Katablepharidophyceae", "Apicomonadea", "Colponemea", 
#"Dinophyceae", "Ellobiopsea", "Myzomonadea", "Noctilucea", "Oxyrrhida", "Perkinsea", "Syndinea", 
#"Bolidophyceae", "Chrysomerophyceae", "Chrysophyceae", "Dictyochophyceae", "Pelagophyceae", 
#"Phaeophyceae", "Phaeothamniophyceae", "Picophagophyceae", "Pinguiophyceae", "Placidiophyceae", 
#"Raphidophyceae", "Schizocladiophyceae", "Synchromophyceae", "Synurophyceae", "Xanthophyceae", 
#"Oomycetes", "Telonemia classis ineditae", "Choanoflagellatea", "Cristidiscoidia", "Diplonemea", 
#"Euglenophyceae", "Kinetoplastea", "Postgaardea", "Jakobea", "Trepomonadea", "Heterolobosea", 
#"Picomonadea", "Ebriophyceae", "Cyanophyceae"]
##class_list

In [18]:
#df_add_all_phylum = df_valid[df_valid.phylum.isin(add_all_phylum_list)]
#print('Length df_add_all_phylum: ' + str(len(df_add_all_phylum)))
##df_add_all_phylum.head()

In [19]:
#df_add_all_class = df_valid[df_valid.class_name.isin(add_all_class_list)]
#print('Length df_add_all_class: ' + str(len(df_add_all_class)))
##df_add_all_class.head()

#### Concatenate to NOMAC 2017.

In [20]:
#df_nomac2017 = pd.concat([df_top_kingdom, df_top_phylum, df_add_all_phylum, df_add_all_class])
df_nomac2017 = df_valid
print('Length df_nomac2017: ' + str(len(df_nomac2017)))

Length df_nomac2017: 8522


#### Test if the add_all_phylum_list and add_all_class_list content was found in NOMAC 2017.

In [21]:
#for phylum in add_all_phylum_list:
#    df_test = df_nomac2017[df_nomac2017.phylum == phylum]
#    print("Test phylum: " + phylum + "          Number of rows: " + str(len(df_test)) )

In [22]:
#for class_name in add_all_class_list:
#    df_test = df_nomac2017[df_nomac2017.class_name == class_name]
#    print("Test class: " + class_name + "          Number of rows: " + str(len(df_test)) )

#### Remove taxa recursively.

In [23]:
remove_phylum_list = ['Rhodophyta', 'Oomycota', ]
remove_class_list = ['Ulvophyceae', 'Schizocladiophyceae']
remove_order_list = ['Vaucheriales', 'Tribonemiales', 'Bangiophyceae', 'Compsogonophyceae', 
                     'Compsopogonophyceae', 'Florideophyceae', 'Phaeophyceae', 'Phaeothamniophyceae', ]
remove_family_list = []
remove_genus_list = ['Prasiola', 'Rosenvingiella', ]

In [24]:
print('Length df_nomac2017 before: ' + str(len(df_nomac2017)))
df_nomac2017 = df_nomac2017[~df_nomac2017.phylum.isin(remove_phylum_list)]
print('Length df_nomac2017 after phylym removed : ' + str(len(df_nomac2017)))
df_nomac2017 = df_nomac2017[~df_nomac2017.class_name.isin(remove_class_list)]
print('Length df_nomac2017 after class removed : ' + str(len(df_nomac2017)))
df_nomac2017 = df_nomac2017[~df_nomac2017.order.isin(remove_order_list)]
print('Length df_nomac2017 after order removed : ' + str(len(df_nomac2017)))
df_nomac2017 = df_nomac2017[~df_nomac2017.family.isin(remove_family_list)]
print('Length df_nomac2017 after family removed : ' + str(len(df_nomac2017)))
df_nomac2017 = df_nomac2017[~df_nomac2017.genus.isin(remove_genus_list)]
print('Length df_nomac2017 after genus removed : ' + str(len(df_nomac2017)))

Length df_nomac2017 before: 8522
Length df_nomac2017 after phylym removed : 7820
Length df_nomac2017 after class removed : 7597
Length df_nomac2017 after order removed : 7561
Length df_nomac2017 after family removed : 7561
Length df_nomac2017 after genus removed : 7550


#### Calculate new columns for NOMAC 2017.

In [25]:
# Add columns.
df_nomac2017 = df_nomac2017.assign(nomac_scientific_name="")
df_nomac2017 = df_nomac2017.assign(nomac_rank="")
df_nomac2017 = df_nomac2017.assign(nomac_author=df_nomac2017.author)
df_nomac2017 = df_nomac2017.assign(nomac_parent="")
df_nomac2017 = df_nomac2017.assign(nomac_classification="")
df_nomac2017 = df_nomac2017.assign(nomac_source=df_nomac2017.source)
df_nomac2017.head()

Unnamed: 0,kingdom,phylum,class_name,order,family,genus,species,subspecies,variety,forma,author,status,source,nomac_scientific_name,nomac_rank,nomac_author,nomac_parent,nomac_classification,nomac_source
0,Plantae,Chlorophyta,,,,,,,,,Rchb.,Gyldig,Artsnavnebase 20170303,,,Rchb.,,,Artsnavnebase 20170303
1,Plantae,Chlorophyta,Bryopsidophyceae,,,,,,,,Bessey,Gyldig,Artsnavnebase 20170303,,,Bessey,,,Artsnavnebase 20170303
2,Plantae,Chlorophyta,Chlorophyceae,,,,,,,,Wille,Gyldig,Artsnavnebase 20170303,,,Wille,,,Artsnavnebase 20170303
3,Plantae,Chlorophyta,Nephrophyceae,,,,,,,,"Cavalier-Smith, 1993",Gyldig,Artsnavnebase 20170303,,,"Cavalier-Smith, 1993",,,Artsnavnebase 20170303
4,Plantae,Chlorophyta,Pedinophyceae,,,,,,,,Moestrup,Gyldig,Artsnavnebase 20170303,,,Moestrup,,,Artsnavnebase 20170303


In [26]:
def calc_scientific_name(kingdom, phylum, class_name, order, family, genus, species, subspecies, variety, forma):
    #
    scientific_name = ''
    rank = ''
    classification = ''
    #
    if len(kingdom) > 0:
        scientific_name = kingdom
        rank = 'Kingdom'
        classification = kingdom
        parent = ''
    if len(phylum) > 0:
        scientific_name = phylum
        rank = 'Phylum'
        classification = classification + ' - ' + phylum
        parent = kingdom
    if len(class_name) > 0:
        scientific_name = class_name
        rank = 'Class'
        classification = classification + ' - ' + class_name
        parent = phylum
    if len(order) > 0:
        scientific_name = order
        rank = 'Order'
        classification = classification + ' - ' + order
        parent = class_name
    if len(family) > 0:
        scientific_name = family
        rank = 'Family'
        classification = classification + ' - ' + family
        parent = order
    if len(genus) > 0:
        scientific_name = genus
        rank = 'Genus'
        classification = classification + ' - ' + genus
        parent = family
    if len(species) > 0:
        scientific_name = genus + ' ' + species
        rank = 'Species'
        classification = classification + ' ' + species
        parent = genus
    if len(subspecies) > 0:
        scientific_name = genus + ' ' + species + ' spp. ' + subspecies
        rank = 'Subspecies'
        classification = classification + ' spp. ' + subspecies
        parent = species
    if len(variety) > 0:
        scientific_name = genus + ' ' + species + ' var. ' + variety
        rank = 'Variety'
        classification = classification + ' var. ' + variety
        parent = species
    if len(forma) > 0:
        scientific_name = genus + ' ' + species + ' f. ' + forma
        rank = 'Forma'
        classification = classification + ' f. ' + forma
        parent = species
    #
    return scientific_name, rank, parent, classification
#
for index, row in df_nomac2017.iterrows():    
    #row.ix[index, 'nomac_scientific_name'], \
    #row.ix[index, 'nomac_rank'], \
    #row.ix[index, 'nomac_parent'], \
    #row.ix[index, 'nomac_classification'] = calc_scientific_name(
    row.loc['nomac_scientific_name'], \
    row.loc['nomac_rank'], \
    row.loc['nomac_parent'], \
    row.loc['nomac_classification'] = calc_scientific_name(
        row.loc['kingdom'], 
        row.loc['phylum'], 
        row.loc['class_name'], 
        row.loc['order'], 
        row.loc['family'], 
        row.loc['genus'], 
        row.loc['species'], 
        row.loc['subspecies'], 
        row.loc['variety'], 
        row.loc['forma'])
#
print('Length df_nomac2017: ' + str(len(df_nomac2017)))
df_nomac2017.head()

Length df_nomac2017: 7550


Unnamed: 0,kingdom,phylum,class_name,order,family,genus,species,subspecies,variety,forma,author,status,source,nomac_scientific_name,nomac_rank,nomac_author,nomac_parent,nomac_classification,nomac_source
0,Plantae,Chlorophyta,,,,,,,,,Rchb.,Gyldig,Artsnavnebase 20170303,Chlorophyta,Phylum,Rchb.,Plantae,Plantae - Chlorophyta,Artsnavnebase 20170303
1,Plantae,Chlorophyta,Bryopsidophyceae,,,,,,,,Bessey,Gyldig,Artsnavnebase 20170303,Bryopsidophyceae,Class,Bessey,Chlorophyta,Plantae - Chlorophyta - Bryopsidophyceae,Artsnavnebase 20170303
2,Plantae,Chlorophyta,Chlorophyceae,,,,,,,,Wille,Gyldig,Artsnavnebase 20170303,Chlorophyceae,Class,Wille,Chlorophyta,Plantae - Chlorophyta - Chlorophyceae,Artsnavnebase 20170303
3,Plantae,Chlorophyta,Nephrophyceae,,,,,,,,"Cavalier-Smith, 1993",Gyldig,Artsnavnebase 20170303,Nephrophyceae,Class,"Cavalier-Smith, 1993",Chlorophyta,Plantae - Chlorophyta - Nephrophyceae,Artsnavnebase 20170303
4,Plantae,Chlorophyta,Pedinophyceae,,,,,,,,Moestrup,Gyldig,Artsnavnebase 20170303,Pedinophyceae,Class,Moestrup,Chlorophyta,Plantae - Chlorophyta - Pedinophyceae,Artsnavnebase 20170303


In [27]:
# For test.
#date_iso = datetime.datetime.now().date().isoformat()
#df_nomac2017.to_csv('./out_data/' + 'NOMAC_2017_and_Artsnavnebase_version_' + date_iso + '.txt',
#                    sep='\t', encoding='cp1252', index = False)

In [28]:
used_columns = ['nomac_scientific_name', 'nomac_rank', 'nomac_author', 'nomac_parent', 
                'nomac_classification', 'nomac_source'] 
for column in list(df_nomac2017):
    if column not in used_columns:
        df_nomac2017.drop(column, axis=1, inplace=True)
#
df_nomac2017.head()

Unnamed: 0,nomac_scientific_name,nomac_rank,nomac_author,nomac_parent,nomac_classification,nomac_source
0,Chlorophyta,Phylum,Rchb.,Plantae,Plantae - Chlorophyta,Artsnavnebase 20170303
1,Bryopsidophyceae,Class,Bessey,Chlorophyta,Plantae - Chlorophyta - Bryopsidophyceae,Artsnavnebase 20170303
2,Chlorophyceae,Class,Wille,Chlorophyta,Plantae - Chlorophyta - Chlorophyceae,Artsnavnebase 20170303
3,Nephrophyceae,Class,"Cavalier-Smith, 1993",Chlorophyta,Plantae - Chlorophyta - Nephrophyceae,Artsnavnebase 20170303
4,Pedinophyceae,Class,Moestrup,Chlorophyta,Plantae - Chlorophyta - Pedinophyceae,Artsnavnebase 20170303


In [29]:
old_header=['nomac_scientific_name', 'nomac_rank', 'nomac_author', 'nomac_parent', 
            'nomac_classification', 'nomac_source'] 
new_header=['scientific_name', 'rank', 'author', 'parent', 'classification', 'source'] 
df_nomac2017.rename(columns=dict(zip(old_header, new_header)), inplace=True)
#df_nomac2017.fillna('', inplace=True)
df_nomac2017.head()

Unnamed: 0,scientific_name,rank,author,parent,classification,source
0,Chlorophyta,Phylum,Rchb.,Plantae,Plantae - Chlorophyta,Artsnavnebase 20170303
1,Bryopsidophyceae,Class,Bessey,Chlorophyta,Plantae - Chlorophyta - Bryopsidophyceae,Artsnavnebase 20170303
2,Chlorophyceae,Class,Wille,Chlorophyta,Plantae - Chlorophyta - Chlorophyceae,Artsnavnebase 20170303
3,Nephrophyceae,Class,"Cavalier-Smith, 1993",Chlorophyta,Plantae - Chlorophyta - Nephrophyceae,Artsnavnebase 20170303
4,Pedinophyceae,Class,Moestrup,Chlorophyta,Plantae - Chlorophyta - Pedinophyceae,Artsnavnebase 20170303


In [30]:
file_name = 'NOMAC_2017_top_taxa.txt'
df_top_taxa = pd.read_csv('./in_data/' + file_name, 
                          sep='\t', encoding='cp1252',
                          na_values=['nan'], keep_default_na=False, 
                          )
df_top_taxa['source'] = 'NOMAC_2017_top_taxa'

df_top_taxa

Unnamed: 0,scientific_name,rank,author,parent,classification,source
0,Biota,Top,,,,NOMAC_2017_top_taxa
1,Eukaryota,Empire,Chatton,Biota,,NOMAC_2017_top_taxa
2,Prokaryota,Empire,Allsopp,Biota,,NOMAC_2017_top_taxa
3,Bacteria,Kingdom,Cavalier-Smith,Prokaryota,,NOMAC_2017_top_taxa
4,Chromista,Kingdom,Cavalier-Smith,Eukaryota,,NOMAC_2017_top_taxa
5,Plantae,Kingdom,Haeckel,Eukaryota,,NOMAC_2017_top_taxa
6,Protozoa,Kingdom,R.Owen,Eukaryota,,NOMAC_2017_top_taxa
7,Fungi,Kingdom,T.L.Jahn & F.F.Jahn ex R.T.Moore,Eukaryota,,NOMAC_2017_top_taxa
8,Eukaryota unassigned,Kingdom,,Eukaryota,,NOMAC_2017_top_taxa


In [31]:
file_name = 'NOMAC_2017_extra_taxa.txt'
df_extra_taxa = pd.read_csv('./in_data/' + file_name, 
                          sep='\t', encoding='cp1252',
                          na_values=['nan'], keep_default_na=False, 
                          )
df_extra_taxa['source'] = 'NOMAC_2017_extra_taxa'

df_extra_taxa

Unnamed: 0,scientific_name,rank,author,parent,classification,source
0,Noctilucea,Class,Haeckel,Miozoa,Chromista - Miozoa - Noctilucea,NOMAC_2017_extra_taxa
1,Noctilucales,Order,Haeckel,Noctilucea,Chromista - Miozoa - Noctilucea - Noctilucales,NOMAC_2017_extra_taxa
2,Noctilucaceae,Family,Kent,Noctilucales,Chromista - Miozoa - Noctilucea - Noctilucales...,NOMAC_2017_extra_taxa
3,Noctiluca,Genus,Suriray,Noctilucaceae,Chromista - Miozoa - Noctilucea - Noctilucales...,NOMAC_2017_extra_taxa
4,Noctiluca scintillans,Species,(Macartney) Kofoid & Swezy,Noctiluca,Chromista - Miozoa - Noctilucea - Noctilucales...,NOMAC_2017_extra_taxa


In [32]:
df_test = df_nomac2017[df_nomac2017['rank'].isin(['Kingdom'])]
df_test

Unnamed: 0,scientific_name,rank,author,parent,classification,source
0,Chromista,Kingdom,Caval.-Sm.,,Chromista,Artsnavnebase 20170303
0,Protozoa,Kingdom,,,Protozoa,Artsnavnebase 20170303


In [33]:
print(str(len(df_nomac2017)))
df_nomac2017 = df_nomac2017[~df_nomac2017['rank'].isin(['Kingdom'])]
print(str(len(df_nomac2017)))

7550
7548


In [34]:
df_nomac2017 = df_nomac2017.sort_values(by='classification', ascending=True)
#
df_nomac2017 = pd.concat([df_top_taxa, df_nomac2017, df_extra_taxa])
print(str(len(df_nomac2017)))

7562


In [35]:
date_iso = datetime.datetime.now().date().isoformat()
df_nomac2017.to_csv('./out_data/' + 'NOMAC_2017_version_' + date_iso + '.txt',
                    sep='\t', encoding='cp1252', index = False)