<a href="https://colab.research.google.com/github/samsoe/mpg_notebooks/blob/master/YVP_Vegetation_Cover_Data_Wrangle.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

*R Notebook*

# README

* [Readme fixed grid plot vegetation data](https://docs.google.com/document/d/16-Aq8u9Rudd78fSzfjvpCXyQgE-BstC-d2PjYfmLtcw/edit?usp=sharing)

# Load Tools

In [0]:
library(tidyverse)

# Source

In [0]:
# 2020-04-28_yvp_vegetation_cover
src = 'https://drive.google.com/uc?id=1pemnlKIlfAQw2JSMN7yDlYMG5QhUW-NP'

In [0]:
df <- read.csv(file = src)

In [146]:
head(df, n=2)

Unnamed: 0_level_0,plot_code,date,subplot,species_code,cover_pct
Unnamed: 0_level_1,<fct>,<fct>,<int>,<fct>,<int>
1,YVP 10,2017-06-09,1,BOESPP,1
2,YVP 10,2017-06-09,1,CREINT,1


# Wrangle

## Structure columns

### plot_code

In [0]:
# coerce to string
df$plot_code <- as.character(df$plot_code)

### plot_ loc

In [0]:
# detect "N" in 'plot_code' and write to new column 'plot_loc'
df <- df %>%
  mutate(plot_loc = ifelse(str_detect(plot_code, "N"), "N", NA))

In [0]:
# strip "N" from 'plot_code' if present
df$plot_code <- str_remove(df$plot_code, "N")

In [0]:
# reorder columns
df <- df[,c(1,6,2,3,4,5)]

### plot_rep

In [0]:
# detect "A", "B", "C" characters in plot_code and if present write to 'plot_rep'
df <- df %>%
  mutate(plot_rep = case_when(str_detect(plot_code, "A")~"A",
                              str_detect(plot_code, "B")~"B",
                              str_detect(plot_code, "C")~"C"))

In [0]:
# strip "A", "B", "C" from plot_code
df$plot_code <- str_remove(df$plot_code, "[ABC]")

In [0]:
# reorder columns
df <- df[,c(1,2,7,3,4,5,6)]

### plot_num

In [0]:
# use digital values from 'plot_code' and to populate 'plot_num'
df <- df %>%
  mutate(plot_num = str_extract(plot_code, "[:digit:].*"))

In [0]:
# reorder columns
df <- df[,c(1,2,3,8,4,5,6,7)]

### date

In [0]:
# convert to date
df$date <- as.Date(df$date)

### subplot

In [0]:
# convert to integer
df$subplot <- as.integer(df$subplot)

### species_key

This will be imported from the plant species metadata table, and we can use it to join and correct species codes in the future


### species_code

In [0]:
# coerce to string
df$species_code <- as.character(df$species_code)

## Identify Double Counting

In [0]:
# Find instances where a plant species is counted twice in the same year-plot-subplot combination
df %>%
  group_by(year = as.numeric(substring(date,0,4)), plot_code, subplot, species_code) %>%
  summarize(counted = n()) %>%
  arrange(year, plot_code, subplot, desc(counted)) %>%
  filter(counted > 1)

year,plot_code,subplot,species_code,counted
<dbl>,<chr>,<int>,<chr>,<int>
2017,YVP 144,2,VERVER,2
2017,YVP 180,7,FRIPUD,2
2017,YVP 203,4,COLLIN,2
2017,YVP 355,10,PSESPI,2
2017,YVP 44,9,ORTTEN,2
2017,YVP N111,2,DRAVER,2
2017,YVP NB294,8,MICGRA,2
2018,YVP 112,9,ALYALY,2
2018,YVP 12,4,HOLUMB,2
2018,YVP 144,10,ACHMIL,2


In [0]:
# example where cover_pct value is different
# 2017	YVP 144	2	VERVER
df %>%
  filter(as.numeric(substring(date,0,4)) == 2017,
          plot_code == 'YVP 144',
          subplot == '2',
          species_code == 'VERVER')

plot_code,date,subplot,species_code,cover_pct
<chr>,<date>,<int>,<chr>,<int>
YVP 144,2017-05-30,2,VERVER,3
YVP 144,2017-05-30,2,VERVER,4


In [0]:
# example where cover_pct value is the same
# 2017	YVP 180	7	FRIPUD
df %>%
  filter(as.numeric(substring(date,0,4)) == 2017,
          plot_code == 'YVP 180',
          subplot == '7',
          species_code == 'FRIPUD')

plot_code,date,subplot,species_code,cover_pct
<chr>,<date>,<int>,<chr>,<int>
YVP 180,2017-05-31,7,FRIPUD,1
YVP 180,2017-05-31,7,FRIPUD,1


In [0]:
# example where cover_pct value is different
# 2019	YVP NC294	6	ERIPUM
df %>%
  filter(as.numeric(substring(date,0,4)) == 2019,
          plot_code == 'YVP NC294',
          subplot == '6',
          species_code == 'ERIPUM')

plot_code,date,subplot,species_code,cover_pct
<chr>,<date>,<int>,<chr>,<int>
YVP NC294,2019-05-09,6,ERIPUM,1
YVP NC294,2019-05-09,6,ERIPUM,2


## Address Double Counting

In [0]:
# ?