In [1]:
# math and data packages
import pandas as pd
import numpy as np
import math

# charting and graphics
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns

# os and file types
import os
import sys
import datetime as dt
import json
import csv

# images and display
import base64, io, IPython
from PIL import Image as PILImage
from IPython.display import Markdown as md
from IPython.display import display, Math, Latex

# import the master data
spd=pd.read_csv("resources/inprocess/master.csv")
# drop unneeded columns
spd = spd.drop(axis = 1,columns = ["Unnamed: 0","mattenstrasse-area-1"])

In [30]:
# functions that will be used in each subgroup, part of analysis and in the whole data

def top_y_species(x,y):
    "returns list of top y number of unique species in passed df x"
    return x

def top_y_genus(x,y):
    "returns list of top y number of unique genus in passed df x"
    return x

def top_y_group(x,y,col):
    "returns list of top y number of unique elements in column col in passed df x "
    return x

def red_list(x,y):
    """returns dictionary of species lists keyed on their redlist codes with a list of the species in passed df x in biogeo region y"""
    """e.g.: {"nt": [carduus-crispus, alcea-rosea], vu = ["cardamine-hirsuta"], LC = ....}"""
    return x

def not_indy(x):
    """returns a dictionary of species keyed on their non-native status in the passed df"""
    return x

In [3]:
spd.lot.unique()

array(['X', 'none', nan, 'schwanen', 'flaess', 'taubenloch', 'port'],
      dtype=object)

In [4]:
spd.columns

Index(['date', 'species', 'year', 'location', 'biogeo', 'canton', 'river',
       'lot', 'forest', 'project', 'city', 'rch', 'rmp', 'rju', 'fam', 'res',
       'pri', 'inv', 'ode', 'ns', 'ongen', 'ni', 'cdf', 'biel', 'locname'],
      dtype='object')

In [5]:
spd.head()

Unnamed: 0,date,species,year,location,biogeo,canton,river,lot,forest,project,...,res,pri,inv,ode,ns,ongen,ni,cdf,biel,locname
0,2020-09-02,plantago-lanceolata,1,c,mp,be,madretschkanal,X,X,X,...,X,X,X,X,X,X,X,cdf,biel,alleestrasse-madretschkanal-1
1,2020-09-02,centaurea-nigra,1,c,mp,be,madretschkanal,X,X,X,...,X,X,X,X,ns,X,X,X,X,alleestrasse-madretschkanal-1
2,2020-09-02,plantago-media,1,c,mp,be,madretschkanal,X,X,X,...,X,X,X,X,X,X,X,cdf,biel,alleestrasse-madretschkanal-1
3,2020-09-02,chenopodium-album,1,c,mp,be,madretschkanal,X,X,X,...,X,X,X,X,X,X,X,cdf,biel,alleestrasse-madretschkanal-1
4,2020-09-02,centaurea-jacea,1,c,mp,be,madretschkanal,X,X,X,...,2.0,3.0,X,X,X,X,X,cdf,biel,alleestrasse-madretschkanal-1


In [6]:
# summary statistics on the key categories - to discuss !


In [None]:
# Date summary, year 1 and year 2 basic differences

Year 2 saw around 5x as many plants identified as in Year 1 due to the extra experience of the surveyor in identifying plants. In Year 3, given the extra experience of the surveyor, the expected number of observations will triple to around 18'000 - 22'000 observations through the year.

Not only has experience improved plant recognition significantly, but the survey method and data pipeline are now much smoother. On top of this the processes to organize and analyze the data are now in place so the survey team is much freer to focus on identifying plants and entering them in the infoflora app.

In [None]:
# through time

A number of species blah blah

In [7]:
### Comparing lots, describing lots, there are four pulled out of the data here

sch = spd.loc[spd['lot'] == "schwanen"]

por = spd.loc[spd['lot'] == "port"]

tau = spd.loc[spd['lot'] == "taubenloch"]

fla = spd.loc[spd["lot"] == "flaess"]


# Compare numbers in key categories (here, not indigenous, watch list, black list, near threatened in MP)
nisch = sch.loc[sch['ni'] =='ni']
wlsch = sch.loc[sch['inv'] =='WL']
blsch = sch.loc[sch['inv'] =='BL']
ntmpsch =  sch.loc[sch['rmp'] =='NT']
vumpsch =  sch.loc[sch['rmp'] =='VU']

print(F"there is {ntmpsch.species.nunique()} species on the NT list")
print(F"there is {wlsch.species.nunique()} species on the watch list")
print(F"there is {blsch.species.nunique()} species on the black list")
print(F"there is {vumpsch.species.nunique()} species on the vulnerable list")
print(F"there are {nisch.species.nunique()} non indigenous non invasive species")


there is 1 species on the NT list
there is 0 species on the watch list
there is 4 species on the black list
there is 0 species on the vulnerable list
there are 3 non indigenous non invasive species


In [11]:
spd.project.unique()

array(['X', 'renat-suze', nan, 'renat-cygnes', 'pronat', 'renat-neumatt',
       'renat-sund', 'renat-vor'], dtype=object)

In [14]:
# Describe and compare the renaturalization areas

cyg = spd.loc[spd['project'] == "renat-cygnes"]

suz = spd.loc[spd['project'] == "renat-suze"]

pro = spd.loc[spd['project'] == "pronat"]

neu = spd.loc[spd["project"] == "renat-neumatt"]

array(['medicago-lupulina', 'erophila-verna', 'senecio-vulgaris',
       'muscari-neglectum', 'taraxacum', 'primula-acaulis',
       'glechoma-hederacea', 'vinca-minor', 'prunus-spinosa',
       'veronica-persica', 'salix-caprea', 'ranunculus-acris',
       'cardamine-flexuosa', 'lamium-purpureum',
       'capsella-bursa-pastoris', 'ranunculus-bulbosus',
       'erodium-cicutarium', 'potentilla-verna', 'veronica-hederifolia',
       'geranium-pyrenaicum', 'valerianella-locusta',
       'arabidopsis-thaliana', 'sinapis-arvensis', 'cerastium-glomeratum',
       'salvia-pratensis', 'sisymbrium-officinale', 'sanguisorba-minor',
       'reseda-lutea', 'galium-mollugo', 'urtica-dioica',
       'erigeron-annuus', 'plantago-lanceolata', 'trifolium-pratense',
       'achillea-millefolium', 'veronica-arvensis', 'myosotis-arvensis',
       'galium-aparine', 'papaver-rhoeas', 'kolkwitzia-amabilis',
       'veronica-serpyllifolia', 'prunus-serotina', 'mahonia-aquifolium',
       'geum-urbanum', 'al

In [16]:
spd.river.unique()

array(['madretschkanal', 'schuessinsel', 'suze', nan, 'neuenburgersee',
       'bielersee', 'orvine', 'leugene', 'emme', 'doubs', 'thunersee',
       'X', 'none', 'thun'], dtype=object)

In [27]:
# describe and compare the rivers: madretschkanal, suze, schuessinsel, and all suze length combined vs total

suz = spd.loc[spd['river'] == "suze"]

mad = spd.loc[spd['river'] == "madretschkanal"]

sch = spd.loc[spd['river'] == "schuessinsel"]

suz.head()

Unnamed: 0,date,species,year,location,biogeo,canton,river,lot,forest,project,...,res,pri,inv,ode,ns,ongen,ni,cdf,biel,locname
66,09.06.2020,medicago-lupulina,1,c,mp,be,suze,X,X,X,...,X,X,X,X,X,X,X,cdf,biel,buerenstrasse-suze-1
67,09.06.2020,cichorium-intybus,1,c,mp,be,suze,X,X,X,...,X,X,X,X,X,X,X,cdf,biel,buerenstrasse-suze-1
68,09.06.2020,potentilla-reptans,1,c,mp,be,suze,X,X,X,...,X,X,X,X,X,X,X,X,biel,buerenstrasse-suze-1
69,09.06.2020,papaver-rhoeas,1,c,mp,be,suze,X,X,X,...,X,X,X,X,X,X,X,X,biel,buerenstrasse-suze-1
70,09.06.2020,hypericum-maculatum,1,c,mp,be,suze,X,X,X,...,X,X,X,X,X,X,X,cdf,biel,buerenstrasse-suze-1


In [None]:
# describe pro nature renaturalization areas, compare to lots, to other renats, to everything else "berm"

In [51]:
sch.species.nunique()

98

2