working file

import statements

In [1]:
import pandas as pd
import datetime as dt
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
import os
import sys

Loads the data files that will be used in this analysis

In [2]:
here = os.getcwd()

#loads gps-location name correspondance file
afilename = here+"/data/2020/map-keys-2020.csv"
places = pd.read_csv(afilename)

#loads species data set
afilename = here+"/data/2020/species2020_full.csv"
samples = pd.read_csv(afilename)

#loads the latest (2014) list of black list/watch list of species for Switzerland from Flora Helvetica, available here: https://www.infoflora.ch/fr/neophytes/listes-et-fiches.html

afilename = here+"/data/florahelvetica/BL_WL_2014_modified.csv"
invasives = pd.read_csv(afilename)

#loads the Welten-Sutter list of species for the plots containing surveys, all hosted by Infoflora
#the two observations from the Doubs are not included in these counts.

afilename = here+"/data/florahelvetica/AtlasWS_151_Biel.csv"
ws151 = pd.read_csv(afilename, sep = ";", encoding = "utf-16")
afilename = here+"/data/florahelvetica/AtlasWS_226_Estavayer.csv"
ws226 = pd.read_csv(afilename, sep = ";", encoding = "utf-16")
afilename = here+"/data/florahelvetica/AtlasWS_252_Erlach.csv"
ws252 = pd.read_csv(afilename, sep = ";", encoding = "utf-16")
afilename = here+"/data/florahelvetica/AtlasWS_572_Beatenberg.csv"
ws572 = pd.read_csv(afilename, sep = ";", encoding = "utf-16")
afilename = here+"/data/florahelvetica/AtlasWS_573_Interlaken.csv"
ws573 = pd.read_csv(afilename, sep = ";", encoding = "utf-16")

#loads the list of under identified species that Infloflora requests be announced
afilename = here+"/data/florahelvetica/taxa_sous_echantillonnes.csv"
taxase = pd.read_csv(afilename)

#loads the list of species identified in the 5X5 kilomter square around Biel that corresponds well to the majority of the survey locations
afilename = here+"/data/florahelvetica/Atlas5x5_585_220.csv"
fivexfive_biel = pd.read_csv(afilename, encoding = "utf-16")

In [3]:
#harmonize species data format in imported dataframes.

#species column
invasives.rename(columns={"Latin":"name"}, inplace = True)

#lambda function keeps the first two words (the taxonomic binomial) in the species name.
def change_species(x):
    try: 
        int_data = x.split()
        data = int_data[:2]
        data = " ".join(data)
        data = data.lower()
    except:
        data = F"wrong {x}"
    return data
invasives["species"] = invasives.name.map(lambda x: change_species(x))
ws151["species1"] = ws151.species.map(lambda x: change_species(x))
taxase["species"] = taxase.taxon.map(lambda x: change_species(x))
ws226["species1"] = ws226.species.map(lambda x: change_species(x))
ws252["species1"] = ws252.species.map(lambda x: change_species(x))
ws572["species1"] = ws572.species.map(lambda x: change_species(x))
ws573["species1"] = ws573.species.map(lambda x: change_species(x))
fivexfive_biel["species1"] = fivexfive_biel.species.map(lambda x: change_species(x))

#pull out watch list and black list from invasives df to compare to my list of species identified.
#samples["danger_list"] = samples.species.map(lambda x: add_lists(x))

In [4]:
#add place name column to Welten-Sutter lists of data

def add_placename(x,name):
    if (name == "biel"):
        data = "biel"
    elif (name == "estavayer"):
        data = "estavayer"
    elif (name == "erlach"):
        data = "erlach"
    elif (name == "beatenberg"):
        data = "beatenberg"
    elif (name == "interlaken"):
        data = "interlaken"
    else:
        data = "bad"
    return data

ws151["newplace"] = ws151.species1.map(lambda x: add_placename(x, "biel"))
ws226["newplace"] = ws226.species1.map(lambda x: add_placename(x, "estavayer"))
ws252["newplace"] = ws252.species1.map(lambda x: add_placename(x, "erlach"))
ws572["newplace"] = ws572.species1.map(lambda x: add_placename(x, "beatenberg"))
ws573["newplace"] = ws573.species1.map(lambda x: add_placename(x, "interlaken"))
fivexfive_biel["newplace"] = fivexfive_biel.Taxon_Id.map(lambda x: add_placename(x, "biel"))

In [37]:
#add waterbody column to make grouping easier

def add_waterbody(x):
    pattern1 = "sund"
    pattern2 = "frinv"
    pattern3 = "orvine"
    pattern4 = "cheyre"
    pattern5 = "ligerz"
    pattern6 = "twann"
    pattern7 = "tauben"
    pattern8 = "lucherz"
    pattern9 = "rondc"
    pattern10 = "weisse"
    if pattern1 in x or pattern10 in x:
        data = "thunersee"
    elif pattern2 in x or pattern7 in x or pattern9 in x:
        data = "suze_u"
    elif pattern3 in x:
        data = "orvine"
    elif pattern4 in x:
        data = "neuenburgersee"
    elif pattern5 in x or pattern6 in x or pattern8 in x:
        data = "bielersee"
    else:
        data = "suze_b"
    return data
samples["waterbody"] = samples.place.map(lambda x: add_waterbody(x))

Unnamed: 0,place,date,species,name,slug,waterbody
930,weissenau,2020-08-10,allium carinatum,ail caréné,allium-carinatum,thunersee
931,weissenau,2020-08-10,viburnum lantana,"mancienne, viorne lantane",viburnum-lantana,thunersee
932,weissenau,2020-08-10,filipendula ulmaria,"reine des prés, spirée",filipendula-ulmaria,thunersee
933,weissenau,2020-08-10,lythrum salicaria,salicaire commune,lythrum-salicaria,thunersee
934,weissenau,2020-08-10,lysimachia vulgaris,lysimaque commune,lysimachia-vulgaris,thunersee


In [45]:
#add welten-sutter number to sample
def addwsnum(x):
    pattern1 = "sund"
    pattern2 = "frinv"
    pattern3 = "orvine"
    pattern4 = "cheyre"
    pattern5 = "ligerz"
    pattern6 = "twann"
    pattern7 = "tauben"
    pattern8 = "lucherz"
    pattern9 = "rondc"
    pattern10 = "weisse"
    pattern11 = "falls"
    if pattern1 in x:
        data = "ws572"
    elif pattern2 in x or pattern3 in x or pattern5 in x or pattern6 in x or pattern7 in x or pattern9 in x:
        data = "ws151"
    elif pattern4 in x:
        data = "ws226"
    elif pattern8 in x:
        data = "ws252"
    elif pattern10 in x:
        data = "ws573"
    elif pattern11 in x:
        data = "not-relevant"
    else:
        data = "ws151"
    return data

samples["wsnum"] = samples.place.map(lambda x: addwsnum(x))
samples.head()
samples.tail()

Unnamed: 0,place,date,species,name,slug,waterbody,wsnum
930,weissenau,2020-08-10,allium carinatum,ail caréné,allium-carinatum,thunersee,ws573
931,weissenau,2020-08-10,viburnum lantana,"mancienne, viorne lantane",viburnum-lantana,thunersee,ws573
932,weissenau,2020-08-10,filipendula ulmaria,"reine des prés, spirée",filipendula-ulmaria,thunersee,ws573
933,weissenau,2020-08-10,lythrum salicaria,salicaire commune,lythrum-salicaria,thunersee,ws573
934,weissenau,2020-08-10,lysimachia vulgaris,lysimaque commune,lysimachia-vulgaris,thunersee,ws573


Species data

In [26]:
#comparison with invasive list

#returns list of the unique species identified in the sample, list and number of species on watch/black list

my_species = samples.species.unique()
their_species_inv = invasives.species.unique()
inv_number = len(invasives["species"])

#returns the list and count of species in the sample that are also in the invasives list.
int_inv = list(set(my_species)&set(their_species_inv))
int_inv_count = len(int_inv)

In [27]:
#get the species areas 
their_species_ws151 = ws151.species1.unique()
int_ws151 = list(set(my_species)&set(their_species_ws151))
com_ws151 = [x for x in samples.species.unique() if x not in ws151.species1.unique()]

In [8]:
#under sampled taxa list

their_species_taxase = taxase.species.unique()
int_taxase = list(set(my_species)&set(their_species_taxase))
int_taxase


['stachys recta',
 'vicia cracca',
 'centaurea scabiosa',
 'leontodon hispidus',
 'heracleum sphondylium',
 'verbascum thapsus',
 'malus sylvestris',
 'hypericum maculatum',
 'trifolium pratense']

In [9]:
ws151.head()

Unnamed: 0,Taxon_Id,ISFS,species,Code WS,Introduite,Nb. Obs.,Années (min),Années (max),Douteuse (?),species1,newplace
0,1000000,100.0,Abies alba Mill.,Fréquent,Indigène,71,1979,2021,,abies alba,biel
1,1000020,150.0,Abutilon theophrasti Medik.,-,Néophyte,1,2018,2018,,abutilon theophrasti,biel
2,1000060,250.0,Acanthus mollis L.,-,Subspontané,1,2006,2006,,acanthus mollis,biel
3,1000070,300.0,Acer campestre L.,Fréquent,Indigène,234,1940,2020,,acer campestre,biel
4,1000100,500.0,Acer negundo L.,-,Introduit,1,2009,2009,,acer negundo,biel


location data

In [10]:
places.head()
places.waterbody.unique()

#using append both the waterbody name and the gps coordinates to the 

array([' suze', ' thunersee', ' suze f', ' suze p', ' orvine',
       ' neuenburgersee', ' bielersee', ' leugene', ' doubs'],
      dtype=object)