# Preprocessing

In [1]:
# math and data packages
import pandas as pd
import numpy as np
import math

# charting and graphics
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns

# os and file types
import os
import sys
import datetime as dt
import json
import csv

# images and display
import base64, io, IPython
from PIL import Image as PILImage
from IPython.display import Markdown as md
from IPython.display import display, Math, Latex



## Import data

Fix any known formatting problems here

In [4]:
# make slugs for species and places

# function to make the species slugs
def to_slug(x):
    try: 
        int_data = x.split()
        data = int_data[:2]
        data = "-".join(data)
        data = data.lower()
    except:
        data = "none"
    return data

# chaux de fonds 5x5 kilometer = 550_215
# Biel = the other four of them.
# with Biel, just covers the general survey area. Make new list of unique values from each of the 4 biel lists.

cdf5x5 = pd.read_csv("resources/atlas5x5/Atlas5x5_550_215.csv", encoding = "utf-16")
cdf5x5.head()

Unnamed: 0,Taxon_Id,SISF,species,WS-Code,Eingeschleppt,Obs.Anz.,Jahre (min),Jahre (max),Unsicher (?)
0,1000000,100.0,Abies alba Mill.,Häufig,Indigen,31,1955,2021,
1,1000070,300.0,Acer campestre L.,Häufig,Indigen,5,1955,2001,
2,1000120,700.0,Acer platanoides L.,Häufig,Indigen,7,1955,2001,
3,1000130,800.0,Acer pseudoplatanus L.,Häufig,Indigen,33,1955,2021,
4,1000160,900.0,Aceras anthropophorum (L.) W. T. Aiton,Häufig,Indigen,1,1982,1999,


In [None]:
def make_copy(df,col, to_slug):
    df['acopy'] = df[col]
    df["species"] = df.acopy.map(lambda x: to_slug(x))
    return df
invasives = make_copy(invasives, "Latin", to_slug)
priority = make_copy(priority, "species", to_slug)
redlist = make_copy(redlist, "species", to_slug)

In [None]:
invasives.head()

In [None]:
# Keep the following columns: ODE (ODE means officially regulated, "-" means it is not), list_2014 (BL = black list / WL = watch list, BL is more threat than WL), species,  

invcolumns = ["species", "list_2014", "Ordonnonce sur la dissémination des organismes (ODE)"]
invasives = invasives.rename(columns={invcolumns[2]:"ode"})
invasives[["species", "list_2014","ode"]].head()

In [None]:
priority.info()

pricolumns = ["species", "Jura", "Plateau", "responsabilite", "priorite"]
priority[pricolumns].head()
# keep the following columns: species, jura (JU if present, 0 otherwise), plateau (MP if present, 0 otherwise), priorite ( 4 = high, 1 = low), responsabilite (4= high, 0 = none)
# https://www.infoflora.ch/fr/conservation-des-especes/liste-rouge.html#especes-prioritaires

In [None]:
redlist.info()

# keep the following columns: family, genus, species, CH (national status), JU (status in Jura), MP (status in central plateau)

In [None]:
surveys.info()

In [None]:


invasives["species"] = invasives.acopy.map(lambda x: to_slug(x))
priority["species"] = priority.acopy.map(lambda x: to_slug(x))
redlist["species"] = redlist.acopy.map(lambda x: to_slug(x))

In [None]:
priority.species

In [None]:
count = {"priority":0, "invasive":0, "redlist":0}
names = {"priority":[], "invasive":[], "redlist":[]}
for plant in surveys.species.unique():
    x = len(priority.loc[priority.species == plant])
    y = len(invasives.loc[invasives.species == plant])
    z = len(redlist.loc[redlist.species == plant])
    
    
    if x > 0:
        count["priority"] += x
        names["priority"].append(plant)
    elif y > 0:
        count["invasive"] += y
        names["invasive"].append(plant)
    elif z > 0:
        count["redlist"] += z
        names["redlist"].append(plant)
    else:
        pass
    
    
   

In [None]:
replacedict = {
    'verbanum bonariensis ':'verbena bonariensis',
    'medicago varia':'medicago sativa',
    "oenothera":"oenothera biennis",
    "geranium pratens":"geranium pratense",
    "oenothera biennis ": "oenothera biennis",
    "oenothera biennis agg.": "oenothera biennis",
    "solidalgo canadensis": "solidago canadensis",
    "verbascum lynchitis":"verbascum lychnitis",
    "verbascum negris":"verbascum nigrum",
    "securigea varia": "securigera varia",
    "melilotus officianalis": "melilotus officinalis",
    "knautia maxima": "knautia dipsacifolia",
    "hieracium aurantiacum":"pilosella aurantiaca",
    "sysimbrium officinale":"sisymbrium officinale",
    "geranium robertanium":"geranium robertianum",
    "mycelis muralis": "lactuca muralis",
    "calamintha-nepeta":"clinopodium nepeta",
    "polygonum-persicaria":"persicaria maculosa",
    "sorbus-aria":"aria edulis",
    "taraxacum": "taraxacum officinale",
    "jacobaea vulgaris" : "senecio jacobaea",
    "erigeron canadensis" : "conyza canadensis",
    "rorippa islandica" : "rorippa palustris",
    "malus sylvestris" : "malus domestica",
    "hylotelephium telephium" : "sedum telephium",
    "lactuca muralis": "mycelis muralis",
    "chaenorhinum minus": "chaenorrhinum minus",
    "erigeron canadensis": "conzya canadensis",
    "erigeron canadensis": "conzya canadensis",
    "borkhausenia intermedia": "scandosorbus intermedia",
    "centaurea nigra" : "centaurea jacea"
}

In [None]:
invasives.head()

In [None]:
count

In [None]:
names['priority']

## Determine wether or not a species was detected within a geographic limit

The territory is divided into different segments. Flora-helvitica and WS maps have different geographic bounds. Here the presence or not of a species within the confines of one of the different boundaries is determined.

### Key the species to the different maps it was identified in


## Format date column to ISO standard