<a href="https://colab.research.google.com/github/samsoe/mpg_notebooks/blob/master/YVP_Additional_Species_Wrangle.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

*R Notebook*

# README

* Readme fixed plot vegetation data - [Additional Species Data](https://docs.google.com/document/d/16-Aq8u9Rudd78fSzfjvpCXyQgE-BstC-d2PjYfmLtcw/edit#heading=h.t9gebon1aetd)

# Load Tools

In [10]:
library(tidyverse)

── [1mAttaching packages[22m ─────────────────────────────────────── tidyverse 1.3.0 ──

[32m✔[39m [34mggplot2[39m 3.3.0     [32m✔[39m [34mdplyr  [39m 0.8.5
[32m✔[39m [34mtibble [39m 3.0.0     [32m✔[39m [34mstringr[39m 1.4.0
[32m✔[39m [34mreadr  [39m 1.3.1     [32m✔[39m [34mforcats[39m 0.5.0
[32m✔[39m [34mpurrr  [39m 0.3.3     

── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()



# Source

In [0]:
# 2020-04-28_yvp_additional_species.csv
src = 'https://drive.google.com/uc?id=1GWDvhXIHsrOUaRveq5SoozgZ7oUW9XJy'

In [0]:
df <- read.csv(file = src)

In [24]:
head(df, n=2)

Unnamed: 0_level_0,plot_code,date,species_code,cover_pct
Unnamed: 0_level_1,<fct>,<fct>,<fct>,<int>
1,YVP 10,2017-06-09,BALSAG,1
2,YVP 10,2017-06-09,ERICOR,1


# Wrangle

## Structure Columns

### plot_code

In [0]:
# convert to string
df$plot_code <- as.character(df$plot_code)

### plot_loc

In [0]:
# detect "N" in 'plot_code' and write to new column 'plot_loc'
df <- df %>%
  mutate(plot_loc = ifelse(str_detect(plot_code, "N"), "N", NA))

In [0]:
# strip "N" from 'plot_code' if present
df$plot_code <- str_remove(df$plot_code, "N")

In [0]:
# reorder columns
df <- df[,c(1,5,2,3,4)]

### plot_rep

In [0]:
# detect "A", "B", "C" characters in plot_code and if present write to 'plot_rep'
df <- df %>%
  mutate(plot_rep = case_when(str_detect(plot_code, "A")~"A",
                              str_detect(plot_code, "B")~"B",
                              str_detect(plot_code, "C")~"C"))

In [0]:
# strip "A", "B", "C" from plot_code
df$plot_code <- str_remove(df$plot_code, "[ABC]")

In [0]:
# reorder columns
df <- df[,c(1,2,6,3,4,5)]

### plot_num

In [0]:
# use digital values from 'plot_code' and to populate 'plot_num'
df <- df %>%
  mutate(plot_num = str_extract(plot_code, "[:digit:].*"))

In [0]:
df <- df[,c(1,2,3,7,4,5,6)]

### date

In [0]:
# convert to date
df$date <- as.Date(df$date)

### subplot

In [0]:
# not present in source dataset

### species_key

This will be imported from the plant species metadata table, and we can use it to join and correct species codes in the future

In [0]:
# set to NA for now
df$species_key <- NA

In [0]:
# convert to string
df$species_key <- as.character(df$species_key)

In [0]:
# reorder columns
df <- df[,c(1,2,3,4,5,8,6,7)]

### species_code

In [0]:
# convert to string
df$species_code <- as.character(df$species_code)

### cover_pct

In [72]:
typeof(df$cover_pct)

In [75]:
head(df)

Unnamed: 0_level_0,plot_code,plot_loc,plot_rep,plot_num,date,species_key,species_code,cover_pct
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<chr>,<date>,<chr>,<chr>,<int>
1,YVP 10,,,10,2017-06-09,,BALSAG,1
2,YVP 10,,,10,2017-06-09,,ERICOR,1
3,YVP 10,,,10,2017-06-09,,ERINAU,2
4,YVP 10,,,10,2017-06-09,,ERIPUM,1
5,YVP 10,,,10,2017-06-09,,LEWRED,1
6,YVP 10,,,10,2017-06-09,,PURVIR,10
