working file

import statements

In [1]:
import pandas as pd
import datetime as dt
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
import os
import sys

load the data files

In [12]:
here = os.getcwd()

#loads gps-location name correspondance file
afilename = here+"/data/2020/map-keys-2020.csv"
places = pd.read_csv(afilename)

#loads species data set
afilename = here+"/data/2020/species2020_full.csv"
samples = pd.read_csv(afilename)

#loads the latest (2014) list of black list/watch list of species for Switzerland from Flora Helvetica, available here: https://www.infoflora.ch/fr/neophytes/listes-et-fiches.html

afilename = here+"/data/florahelvetica/BL_WL_2014_modified.csv"
invasives = pd.read_csv(afilename)

#loads the Welten-Sutter list of species for the plots containing surveys, all hosted by Infoflora
#the two observations from the Doubs are not included in these counts.

afilename = here+"/data/florahelvetica/AtlasWS_151_Biel.csv"
ws151 = pd.read_csv(afilename, encoding = "utf-16")
afilename = here+"/data/florahelvetica/AtlasWS_151_Biel.csv"
ws226 = pd.read_csv(afilename, encoding = "utf-16")
afilename = here+"/data/florahelvetica/AtlasWS_151_Biel.csv"
ws252 = pd.read_csv(afilename, encoding = "utf-16")
afilename = here+"/data/florahelvetica/AtlasWS_151_Biel.csv"
ws572 = pd.read_csv(afilename, encoding = "utf-16")
afilename = here+"/data/florahelvetica/AtlasWS_151_Biel.csv"
ws573 = pd.read_csv(afilename, sep = ";", encoding = "utf-16")

#loads the list of under identified species that Infloflora requests be announced
afilename = here+"/data/florahelvetica/taxa_sous_echantillonnes.csv"
taxase = pd.read_csv(afilename)

#loads the list of species identified in the 5X5 kilomter square around Biel that corresponds well to the majority of the survey locations
afilename = here+"/data/florahelvetica/Atlas5x5_585_220.csv"
fivexfive_biel = pd.read_csv(afilename, encoding = "utf-16")

fivexfive_biel.head()

Unnamed: 0,Taxon_Id,ISFS,Espèce,Code WS,Introduite,Nb. Obs.,Années (min),Années (max),Douteuse (?)
0,1000000,100.0,Abies alba Mill.,Fréquent,Indigène,9,1955,2020,
1,1000020,150.0,Abutilon theophrasti Medik.,-,Néophyte,1,2018,2018,
2,1000070,300.0,Acer campestre L.,Fréquent,Indigène,32,1953,2020,
3,1000110,600.0,Acer opalus Mill.,Fréquent,Indigène,26,1901,2020,
4,1000120,700.0,Acer platanoides L.,Fréquent,Indigène,24,1982,2020,


date data

In [3]:
#check date data for missing values and converts to strptime format.
def check_dates(x):
    try:
        data = dt.datetime.strptime(x, "%Y-%m-%d")
    except:
        data = 'no date'
    return data

samples['date_stamp'] = samples['date'].map(lambda x: check_dates(x))
samples.sort_values('date', ascending = True)
samples['month'] = samples.date_stamp.dt.month


#basic date statistics
first_date = samples.date.min()
last_date = samples.date.max()
mode_date = samples.date.mode() #highest number of species identified on this day.
#number of locations per date (not the number of species identified)
#access lists of species by date / by month

print(mode_date)
print(samples.date.describe())
print(samples.month.describe())
print("the unique months are:")
print(samples.month.unique())

0    2020-09-02
dtype: object
count            935
unique            65
top       2020-09-02
freq              55
Name: date, dtype: object
count    935.000000
mean       7.898396
std        0.901668
min        6.000000
25%        7.000000
50%        8.000000
75%        9.000000
max        9.000000
Name: month, dtype: float64
the unique months are:
[9 7 6 8]


Species data

In [4]:
#format species names to be compatible

invasives.rename(columns={"Latin":"name"}, inplace = True)
def change_species(x):
    try: 
        int_data = x.split()
        data = int_data[:2]
        data = " ".join(data)
        data = data.lower()
    except:
        data = F"wrong {x}"
    return data

invasives["species"] = invasives.name.map(lambda x: change_species(x))
ws151["species1"] = ws151.species.map(lambda x: change_species(x))
taxase["species"] = taxase.taxon.map(lambda x: change_species(x))

#pull out watch list and black list from invasives df to compare to my list of species identified.
#samples["danger_list"] = samples.species.map(lambda x: add_lists(x))



In [5]:
#comparison with invasive lists

#returns list and count of unique species identified
my_species = samples.species.unique()
their_species_inv = invasives.species.unique()
inv_number = len(invasives["species"])

#returns the list and count of species in the sample that are also in the invasives list.

int_inv = list(set(my_species)&set(their_species_inv))
int_inv_count = len(int_inv)

int_inv

['robinia pseudoacacia',
 'rubus armeniacus',
 'symphoricarpos albus',
 'impatiens glandulifera',
 'solidago canadensis',
 'artemisia verlotiorum',
 'reynoutria japonica',
 'erigeron annuus',
 'senecio inaequidens',
 'buddleja davidii']

In [6]:
#Welten-Sutter list from area 151
their_species_ws151 = ws151.species1.unique()

#get the species areas for thoune

int_ws151 = list(set(my_species)&set(their_species_ws151))
com_ws151 = [x for x in samples.species.unique() if x not in ws151.species1.unique()]

In [7]:
#under sampled taxa list

their_species_taxase = taxase.species.unique()
int_taxase = list(set(my_species)&set(their_species_taxase))
int_taxase


['stachys recta',
 'leontodon hispidus',
 'hypericum maculatum',
 'malus sylvestris',
 'trifolium pratense',
 'vicia cracca',
 'verbascum thapsus',
 'heracleum sphondylium',
 'centaurea scabiosa']

In [8]:
ws151.head()

Unnamed: 0,id number,ISFS,species,Code WS,Introduite,Nb. Obs.,Années (min),Années (max),Douteuse (?),species1
0,1000000,100.0,Abies alba Mill.,Fréquent,Indigène,71,1979,2021,,abies alba
1,1000020,150.0,Abutilon theophrasti Medik.,-,Néophyte,1,2018,2018,,abutilon theophrasti
2,1000060,250.0,Acanthus mollis L.,-,Subspontané,1,2006,2006,,acanthus mollis
3,1000070,300.0,Acer campestre L.,Fréquent,Indigène,234,1940,2020,,acer campestre
4,1000100,500.0,Acer negundo L.,-,Introduit,1,2009,2009,,acer negundo


location data

In [9]:
places.head()
places.waterbody.unique()

#using append both the waterbody name and the gps coordinates to the 

SyntaxError: invalid syntax (<ipython-input-9-a0b869db0529>, line 5)