Scratch workbook for calculations to avoid cluttering the main analysis file.

In [1]:
# import packages, files and the data set needed

# Import packages and files

# math and data packages
import pandas as pd
import numpy as np
import math
from scipy import ndimage

# charting and graphics
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib import colors
import seaborn as sns
import matplotlib.ticker as ticker
import matplotlib.dates as mdates
from matplotlib.gridspec import GridSpec
import matplotlib.image as mpimg

# os and file types
import os
import sys
import datetime as dt
import json
import csv

# images and display
import base64, io, IPython
from PIL import Image as PILImage
from IPython.display import Markdown as md
from IPython.display import display

# import survey data and initialize datetime
spd=pd.read_csv("resources/survey_data_lists_02_22.csv")
spd["date"] = pd.to_datetime(spd['date'], errors='coerce', format='%Y-%m-%d')
spd["month"] = spd["date"].dt.month

# create column of mutually exclusive plant characteristics from dataset
spdint1 = spd[spd["inv"]!="X"].copy()
spdint1["allstats"] = spdint1["inv"]
spdint2 = spd[spd["ns"]!="X"].copy()
spdint2["allstats"] = spdint2["ns"]
spdint3 = spd[spd["rch"]!="X"].copy()
spdint3["allstats"] = spdint3["rch"]
spdint4 = spd[spd["ni"]!="X"].copy()
spdint4["allstats"] = spdint4["ni"]
spdint5 = spd[spd["ongen"]!="X"].copy()
spdint5["allstats"] = spdint5["ongen"]
spd_allstats = pd.concat([spdint1, spdint2, spdint3, spdint4, spdint5])

# create yearly data for later charting use
spd1 = spd_allstats.loc[spd_allstats.year == 1].copy()
spd2 = spd_allstats.loc[(spd_allstats["year"] == 2)].copy()

#import 5X5 data
biel51 = pd.read_csv("resources/atlas5x5/Atlas5x5_580_220.csv", encoding = "utf-16")
biel52 = pd.read_csv("resources/atlas5x5/Atlas5x5_580_215.csv", encoding = "utf-16")
biel53 = pd.read_csv("resources/atlas5x5/Atlas5x5_580_220.csv", encoding = "utf-16")
biel54 = pd.read_csv("resources/atlas5x5/Atlas5x5_580_220.csv", encoding = "utf-16")

ger=pd.read_csv("resources/germ_data.csv")
ger["pdate"] = pd.to_datetime(ger['pdate'], errors='coerce', format='%d/%m/%y')

In [2]:
# comparison with 5x5 observations around biel

# set up the 4 5x5 km observation boxes from infoflora that meet in Biel/Bienne into one database indexed on species.

# convert the infoflora species name to species slugs to be compatible
def to_slug(x):
    try: 
        int_data = x.split()
        data = int_data[:2]
        data = "-".join(data)
        data = data.lower()
    except:
        data = "none"
    return data

for a in biel51.species:
    biel51["species"] = biel51["species"].map(lambda x: to_slug(x))
biel51["biel51"] = "biel51"
biel51.set_index("species", inplace=True, drop=True)

for a in biel52.species:
    biel52["species"] = biel52["species"].map(lambda x: to_slug(x))
biel52["biel51"] = "biel52"
biel52.set_index("species", inplace=True, drop=True)

for a in biel53.species:
    biel53["species"] = biel53["species"].map(lambda x: to_slug(x))
biel53["biel53"] = "biel53"
biel53.set_index("species", inplace=True, drop=True)

for a in biel54.species:
    biel54["species"] = biel54["species"].map(lambda x: to_slug(x))
biel54["biel54"] = "biel54"
biel54.set_index("species", inplace=True, drop=True)
a = biel51.index

# combine the observations into a dataframe indexed on species
biel_species = set([*biel51.index, *biel52.index, *biel53.index, *biel54.index])
bs = pd.DataFrame(index=biel_species)
bs["biel"] = "biel"


# remove the "ongen" observations from the survey dataset as they won't be in the infoflora observations

spd_ongenno = spd_allstats[spd_allstats["species"].str.contains("-")]

# make list in data of species identified in infoflora that were not identified in surveys

data = []

for a in spd_ongenno.species.unique():
    if a not in bs.index:
        data.append(a)
notin5x5 = pd.DataFrame({"col":data})


In [3]:
# calculate species refinement
biel = spd_ongenno.biel.value_counts()
cdf = spd_ongenno.cdf.value_counts()
noneint = spd_ongenno[spd_ongenno.biel=="X"]
none = noneint[noneint.cdf=="X"]
specieslist = none.species.unique()

In [4]:
# germination containers
cont = ger.loc[ger.container == "plastic-tray"].copy()
conc = ger.loc[ger.container == "concrete-container"].copy()

In [14]:
conc.columns

Index(['species', 'recs', 'recn', 'recp', 'collection', 'origin', 'geo',
       'colm', 'coly', 'sloc', 'storage-comment', 'pdate', 'pmet', 'container',
       'area', 'density', '2021-10-10', '2021-10-12', '2021-10-14',
       '2021-10-16', '2021-10-18', '2021-10-20', '2021-10-22', '2021-10-24',
       '2021-10-26', '2021-10-28', '2021-10-30', '2021-11-03', '2021-11-10',
       '2021-11-18', '2021-11-23', '2021-11-28', 'genus', 'int'],
      dtype='object')

In [13]:
conc.replace("full", 200)

Unnamed: 0,species,recs,recn,recp,collection,origin,geo,colm,coly,sloc,...,2021-10-26,2021-10-28,2021-10-30,2021-11-03,2021-11-10,2021-11-18,2021-11-23,2021-11-28,genus,int
12,felseck-flower,dd,dd,dd,personal,felseck,biel,9,2021,home,...,18,19,22,24,24,24,26,26,felseck,felseck
13,felseck-grass,dd,dd,dd,personal,felseck,biel,9,2021,home,...,31,32,37,40,50,50,60,60,felseck,felseck
48,helianthemum-nummularium,yes,yes,no,port 34,port-34,port 34,x,2020,port 34,...,0,2,2,2,1,0,2,2,helianthemum,port-34
49,silene-arvensis,x,x,x,port 34,rue-de-l'avenir,biel,x,2020,home,...,200,200,200,200,200,200,42,42,silene,rue-de-l'avenir
50,matricaria-chamomilla,x,x,x,port 34,cygnes-lot,biel,x,2020,home,...,19,19,20,19,18,10,5,5,matricaria,cygnes-lot
51,malva-sylvestris,x,yes,x,port 34,cygnes-lot,biel,x,2020,home,...,200,200,200,200,200,200,15,15,malva,cygnes-lot
52,hypericum-perforatum,yes,yes,no,port 34,port-34,port 34,x,2020,port 34,...,200,200,200,200,200,200,200,200,hypericum,port-34
53,campanula-perscifolia,x,no,yes,port 34,port-34,port 34,x,2020,port 34,...,1,7,0,0,3,25,26,26,campanula,port-34
54,cichorium-intybus,x,yes,yes,port 34,ursula-albrecht,port 34,x,2020,port 34,...,200,200,200,200,200,200,200,200,cichorium,ursula-albrecht
55,aster-linosyrus,x,no,no,port 34,ticino,ticino,x,2019,port 34,...,10,15,18,28,27,27,27,27,aster,ticino


In [26]:
concs = conc[['2021-10-10', '2021-10-12', '2021-10-14',
       '2021-10-16', '2021-10-18', '2021-10-20', '2021-10-22', '2021-10-24',
       '2021-10-26', '2021-10-28', '2021-10-30', '2021-11-03', '2021-11-10',
       '2021-11-18', '2021-11-23', '2021-11-28']].copy()


concs.columns = pd.to_datetime(concs.columns)
conc2 = concs

In [5]:
cont["area"] = cont["area"].astype(int)

In [6]:
cont["test"] = cont["2021-10-12"]/cont["area"]

In [None]:
ger.columns = ger

In [7]:
conc["2021-10-30"].unique()

array(['22', '37', '2', 'full', '20', '0', '18', '1', '21', '8', '31',
       '5', '4', '15', '7', '3'], dtype=object)

## Germination

### non-date columns information

In [8]:
# sloc = storage location column; indicates where the seeds were stored, grosso modo
# pmet = planting-method column; indicates how the seeds were planted. All followed about the same protocol so maybe not necessary, only rhinanthus differed.
# density = density of seeds used (small, medium, high)
# area = indicates how many germination cells or how what fraction of a container was dedicated to the plants
# colm = collection month
# coly = collection year
# collection column indicates whether the seed originated with volo or was collected by me
# container column indicates which container the seeds were planted in. plastic trays used cells, concrete containers were divided into 6 or into 2.
# origin column indicates where the seed came from
# geo column provides a useful grouping: Biel (cygnes-lot, port-lot, alt-stadt, biel-cff, a-5, felseck, zentralplatz), 
# port 34 (ursula-albrecht + port-34),
# canton-bern (lobsigen, pres-la-patte, jorat, cdf-forest, cdf-cff, pfeidli, bern, rubigen),
# ticino (ticino)

In [9]:
## start to play with berms and stuff (second part of location string e.g. Bermenstrasse-berm-1)

In [10]:
## project column, lots column, river column, forest column,