<a href="https://colab.research.google.com/github/samsoe/mpg_notebooks/blob/master/yvp_plant_functional_groups_WRANGLE_2020.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Documentation

[Readme fixed plot vegetation data](https://docs.google.com/document/d/16-Aq8u9Rudd78fSzfjvpCXyQgE-BstC-d2PjYfmLtcw/edit?usp=sharing)

# Security

* The user must load a `json` file containing the BigQuery API key into the local directory `/content/...`
* The user must have a Google Maps API key to enable mapping. 
   * CAUTION make sure the key is deleted from the current instance of the notebook before sharing

# Tools

In [None]:
library(tidyverse)

── [1mAttaching packages[22m ─────────────────────────────────────── tidyverse 1.3.0 ──

[32m✔[39m [34mggplot2[39m 3.3.2     [32m✔[39m [34mpurrr  [39m 0.3.4
[32m✔[39m [34mtibble [39m 3.0.4     [32m✔[39m [34mdplyr  [39m 1.0.2
[32m✔[39m [34mtidyr  [39m 1.1.2     [32m✔[39m [34mstringr[39m 1.4.0
[32m✔[39m [34mreadr  [39m 1.4.0     [32m✔[39m [34mforcats[39m 0.5.0

── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()



In [None]:
library(lubridate)


Attaching package: ‘lubridate’


The following objects are masked from ‘package:base’:

    date, intersect, setdiff, union




* Remember that the file containing authorization keys for Big Query must be loaded into the virutual envrionment manually.

In [None]:
install.packages("bigrquery")
library(bigrquery)

Installing package into ‘/usr/local/lib/R/site-library’
(as ‘lib’ is unspecified)

also installing the dependencies ‘bit’, ‘bit64’, ‘gargle’, ‘rapidjsonr’




# Source

## Database Connection

In [None]:
# BigQuery API Key
bq_auth(path = "/content/mpg-data-warehouse-api_key-master.json")

In [None]:
Sys.setenv(BIGQUERY_TEST_PROJECT = "mpg-data-warehouse")

In [None]:
billing <- bq_test_project()

## Database Query

### yvp_vegetation_cover


In [None]:
sql_vegetation_cover <- "SELECT *
               FROM `mpg-data-warehouse.vegetation_fixed_plot_yvp.yvp_vegetation_cover`"

In [None]:
# What does this look like without removing the NULL values? 
sql_vegetation_cover <- "SELECT *
               FROM `mpg-data-warehouse.vegetation_fixed_plot_yvp.yvp_vegetation_cover`"

In [None]:
bq_vegetation_cover <- bq_project_query(billing, sql_vegetation_cover)

In [None]:
tb_vegetation_cover <- bq_table_download(bq_vegetation_cover)

In [None]:
df_vegetation_cover <- as.data.frame(tb_vegetation_cover) %>% glimpse()

Rows: 30,820
Columns: 9
$ plot_code         [3m[90m<chr>[39m[23m "YVP N348", "YVP N348", "YVP N348", "YVP N348", "YV…
$ plot_loc          [3m[90m<chr>[39m[23m "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "…
$ plot_rep          [3m[90m<chr>[39m[23m NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ grid_point        [3m[90m<int>[39m[23m 348, 348, 348, 348, 348, 348, 348, 348, 348, 348, 3…
$ date              [3m[90m<date>[39m[23m 2020-07-01, 2020-07-01, 2020-07-01, 2020-07-01, 20…
$ subplot           [3m[90m<int>[39m[23m 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, …
$ key_plant_species [3m[90m<int>[39m[23m 3, 24, 54, 161, 167, 199, 230, 410, 414, 433, 492, …
$ key_plant_code    [3m[90m<chr>[39m[23m "ACEGLA", "AMEALN", "ARNCOR", "CRADOU", "CYNOFF", "…
$ cover_pct         [3m[90m<dbl>[39m[23m 1, 1, 5, 1, 1, 15, 1, 15, 0, 1, 4, 35, 1, 5, 1, 3, …


In [None]:
df_vegetation_cover %>% distinct(cover_pct) %>% arrange(cover_pct)

cover_pct
<dbl>
0.0
0.5
1.0
1.5
2.0
2.5
3.0
3.5
4.0
4.5


### vegetation_species_metadata

In [None]:
sql_species_metadata <- "SELECT *
                         FROM `mpg-data-warehouse.vegetation_species_metadata.vegetation_species_metadata`"

In [None]:
bq_species_metadata <- bq_project_query(billing, sql_species_metadata)

In [None]:
tb_species_metadata <- bq_table_download(bq_species_metadata)

In [None]:
df_species_metadata <- as.data.frame(tb_species_metadata) %>% glimpse()

Rows: 754
Columns: 9
$ key_plant_species   [3m[90m<int>[39m[23m 360, 13, 26, 53, 738, 75, 76, 746, 83, 88, 86, 87…
$ key_plant_code      [3m[90m<chr>[39m[23m "NV", "AGRSCA", "ANDGER", "ARIPUR", "BOUCUR", "BO…
$ plant_name_sci      [3m[90m<chr>[39m[23m "no vegetation", "Agrostis scabra", "Andropogon g…
$ plant_name_syn      [3m[90m<chr>[39m[23m NA, NA, NA, "Aristida longiseta", NA, NA, NA, NA,…
$ plant_name_common   [3m[90m<chr>[39m[23m "no vegetation", "rough bentgrass", "big bluestem…
$ plant_name_family   [3m[90m<chr>[39m[23m "None", "Poaceae", "Poaceae", "Poaceae", "Poaceae…
$ plant_native_status [3m[90m<chr>[39m[23m "none", "native", "native", "native", "native", "…
$ plant_life_cycle    [3m[90m<chr>[39m[23m "unknown", "perennial", "perennial", "perennial",…
$ plant_life_form     [3m[90m<chr>[39m[23m "none", "graminoid", "graminoid", "graminoid", "g…


### location_position_classification

In [None]:
sql_position_classification <- "
SELECT 
  grid_point,
  aspect_mean_deg,
  elevation_mean_m,
  slope_mean_deg,
  cover_type_2016_gridVeg,
  type3_vegetation_indicators,
  type4_indicators_history
FROM
  `mpg-data-warehouse.grid_point_summaries.location_position_classification`
"

In [None]:
bq_position_classification <- bq_project_query(billing, sql_position_classification)

In [None]:
tb_position_classification <- bq_table_download(bq_position_classification)

In [None]:
df_position_classification <- as.data.frame(tb_position_classification) %>% glimpse()

Rows: 582
Columns: 7
$ grid_point                  [3m[90m<int>[39m[23m 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13…
$ aspect_mean_deg             [3m[90m<dbl>[39m[23m 334.7050, 45.3030, 221.3340, 290.4890, 28…
$ elevation_mean_m            [3m[90m<dbl>[39m[23m 1395.64, 1456.09, 1126.90, 1166.33, 1179.…
$ slope_mean_deg              [3m[90m<dbl>[39m[23m 28.44230, 12.22630, 4.25130, 2.68361, 4.2…
$ cover_type_2016_gridVeg     [3m[90m<chr>[39m[23m "woodland/forest", "non-irrigated grassla…
$ type3_vegetation_indicators [3m[90m<chr>[39m[23m "mixed canopy conifer", "uncultivated gra…
$ type4_indicators_history    [3m[90m<chr>[39m[23m "mixed canopy conifer", "uncultivated gra…


# Wrangle

## Create 'year'

In [None]:
df_vegetation_cover_year <- df_vegetation_cover %>%
  mutate(year = year(date)) %>% glimpse()

Rows: 30,820
Columns: 10
$ plot_code         [3m[90m<chr>[39m[23m "YVP N348", "YVP N348", "YVP N348", "YVP N348", "YV…
$ plot_loc          [3m[90m<chr>[39m[23m "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "…
$ plot_rep          [3m[90m<chr>[39m[23m NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ grid_point        [3m[90m<int>[39m[23m 348, 348, 348, 348, 348, 348, 348, 348, 348, 348, 3…
$ date              [3m[90m<date>[39m[23m 2020-07-01, 2020-07-01, 2020-07-01, 2020-07-01, 20…
$ subplot           [3m[90m<int>[39m[23m 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, …
$ key_plant_species [3m[90m<int>[39m[23m 3, 24, 54, 161, 167, 199, 230, 410, 414, 433, 492, …
$ key_plant_code    [3m[90m<chr>[39m[23m "ACEGLA", "AMEALN", "ARNCOR", "CRADOU", "CYNOFF", "…
$ cover_pct         [3m[90m<dbl>[39m[23m 1, 1, 5, 1, 1, 15, 1, 15, 0, 1, 4, 35, 1, 5, 1, 3, …
$ year              [3m[90m<dbl>[39m[23m 2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020, 

## Join

### vegetation_species_metadata

In [None]:
# vegetation_cover and species_metadata
df_join <- df_vegetation_cover_year %>%
  left_join(df_species_metadata, by = c("key_plant_species" = "key_plant_species")) %>% glimpse()

Rows: 30,820
Columns: 18
$ plot_code           [3m[90m<chr>[39m[23m "YVP N348", "YVP N348", "YVP N348", "YVP N348", "…
$ plot_loc            [3m[90m<chr>[39m[23m "N", "N", "N", "N", "N", "N", "N", "N", "N", "N",…
$ plot_rep            [3m[90m<chr>[39m[23m NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ grid_point          [3m[90m<int>[39m[23m 348, 348, 348, 348, 348, 348, 348, 348, 348, 348,…
$ date                [3m[90m<date>[39m[23m 2020-07-01, 2020-07-01, 2020-07-01, 2020-07-01, …
$ subplot             [3m[90m<int>[39m[23m 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2…
$ key_plant_species   [3m[90m<int>[39m[23m 3, 24, 54, 161, 167, 199, 230, 410, 414, 433, 492…
$ key_plant_code.x    [3m[90m<chr>[39m[23m "ACEGLA", "AMEALN", "ARNCOR", "CRADOU", "CYNOFF",…
$ cover_pct           [3m[90m<dbl>[39m[23m 1, 1, 5, 1, 1, 15, 1, 15, 0, 1, 4, 35, 1, 5, 1, 3…
$ year                [3m[90m<dbl>[39m[23m 2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020

## Recode plant_life_cycle

In [None]:
# recode the levels of plant_life_cycle to simplify them
df_recode_levels <- df_join %>%
  mutate(plant_life_cycle = ifelse(plant_life_cycle == "biennial perennial" |
                                   plant_life_cycle == "annual perennial" |
                                   plant_life_cycle == "annual biennial perennial" |
                                   plant_life_cycle == "annual biennial"
                                   , "multiple", plant_life_cycle)) %>% glimpse()

Rows: 30,820
Columns: 18
$ plot_code           [3m[90m<chr>[39m[23m "YVP N348", "YVP N348", "YVP N348", "YVP N348", "…
$ plot_loc            [3m[90m<chr>[39m[23m "N", "N", "N", "N", "N", "N", "N", "N", "N", "N",…
$ plot_rep            [3m[90m<chr>[39m[23m NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ grid_point          [3m[90m<int>[39m[23m 348, 348, 348, 348, 348, 348, 348, 348, 348, 348,…
$ date                [3m[90m<date>[39m[23m 2020-07-01, 2020-07-01, 2020-07-01, 2020-07-01, …
$ subplot             [3m[90m<int>[39m[23m 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2…
$ key_plant_species   [3m[90m<int>[39m[23m 3, 24, 54, 161, 167, 199, 230, 410, 414, 433, 492…
$ key_plant_code.x    [3m[90m<chr>[39m[23m "ACEGLA", "AMEALN", "ARNCOR", "CRADOU", "CYNOFF",…
$ cover_pct           [3m[90m<dbl>[39m[23m 1, 1, 5, 1, 1, 15, 1, 15, 0, 1, 4, 35, 1, 5, 1, 3…
$ year                [3m[90m<dbl>[39m[23m 2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020

In [None]:
# view levels
df_recode_levels %>%
  # mutate(survey_code = paste(plot_code, date)) %>%
  distinct(plant_native_status, plant_life_cycle, plant_life_form) %>%
  arrange(plant_native_status, plant_life_cycle, plant_life_form)

plant_native_status,plant_life_cycle,plant_life_form
<chr>,<chr>,<chr>
native,annual,forb
native,annual,graminoid
native,biennial,forb
native,multiple,forb
native,perennial,forb
native,perennial,graminoid
native,perennial,shrub
native,perennial,tree
native,unknown,forb
nonnative,annual,forb


## Create 'survey_code'

In [None]:
# create survey_code variable
df_survey_code <- df_recode_levels %>%
  mutate(survey_code = paste(plot_code, date)) %>% glimpse()

Rows: 30,820
Columns: 19
$ plot_code           [3m[90m<chr>[39m[23m "YVP N348", "YVP N348", "YVP N348", "YVP N348", "…
$ plot_loc            [3m[90m<chr>[39m[23m "N", "N", "N", "N", "N", "N", "N", "N", "N", "N",…
$ plot_rep            [3m[90m<chr>[39m[23m NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ grid_point          [3m[90m<int>[39m[23m 348, 348, 348, 348, 348, 348, 348, 348, 348, 348,…
$ date                [3m[90m<date>[39m[23m 2020-07-01, 2020-07-01, 2020-07-01, 2020-07-01, …
$ subplot             [3m[90m<int>[39m[23m 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2…
$ key_plant_species   [3m[90m<int>[39m[23m 3, 24, 54, 161, 167, 199, 230, 410, 414, 433, 492…
$ key_plant_code.x    [3m[90m<chr>[39m[23m "ACEGLA", "AMEALN", "ARNCOR", "CRADOU", "CYNOFF",…
$ cover_pct           [3m[90m<dbl>[39m[23m 1, 1, 5, 1, 1, 15, 1, 15, 0, 1, 4, 35, 1, 5, 1, 3…
$ year                [3m[90m<dbl>[39m[23m 2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020

In [None]:
# use df_survey_code to join _loc _rep etc back into df
# or make df with only variables

## Create 'cover_pct_avg'

In [None]:
# calculate cover_pct_sum
df_cover_sum <- df_survey_code %>%
  group_by(survey_code, subplot, plant_native_status, plant_life_cycle, plant_life_form) %>%
  summarise(cover_pct_sum = (sum(cover_pct))) %>% ungroup() %>% 
  glimpse()

`summarise()` regrouping output by 'survey_code', 'subplot', 'plant_native_status', 'plant_life_cycle' (override with `.groups` argument)



Rows: 13,811
Columns: 6
$ survey_code         [3m[90m<chr>[39m[23m "YVP 10 2017-06-09", "YVP 10 2017-06-09", "YVP 10…
$ subplot             [3m[90m<int>[39m[23m 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4…
$ plant_native_status [3m[90m<chr>[39m[23m "native", "native", "native", "native", "nonnativ…
$ plant_life_cycle    [3m[90m<chr>[39m[23m "annual", "multiple", "perennial", "perennial", "…
$ plant_life_form     [3m[90m<chr>[39m[23m "forb", "forb", "forb", "graminoid", "forb", "for…
$ cover_pct_sum       [3m[90m<dbl>[39m[23m 1, 1, 49, 50, 3, 5, 29, 60, 2, 4, 5, 67, 25, 3, 3…


In [None]:
df_cover_avg <- df_cover_sum %>%
  group_by(survey_code, plant_native_status, plant_life_cycle, plant_life_form) %>%
  summarise(cover_pct_avg = sum(cover_pct_sum) / 10) %>% ungroup() %>% ungroup() %>%
  glimpse()

`summarise()` regrouping output by 'survey_code', 'plant_native_status', 'plant_life_cycle' (override with `.groups` argument)



Rows: 2,088
Columns: 5
$ survey_code         [3m[90m<chr>[39m[23m "YVP 10 2017-06-09", "YVP 10 2017-06-09", "YVP 10…
$ plant_native_status [3m[90m<chr>[39m[23m "native", "native", "native", "native", "nonnativ…
$ plant_life_cycle    [3m[90m<chr>[39m[23m "annual", "multiple", "perennial", "perennial", "…
$ plant_life_form     [3m[90m<chr>[39m[23m "forb", "forb", "forb", "graminoid", "forb", "gra…
$ cover_pct_avg       [3m[90m<dbl>[39m[23m 0.8, 0.2, 32.9, 49.5, 3.7, 2.0, 1.1, 4.5, 4.1, 0.…


## Complete

After simplifying the functional groups and producing transect averages of plant cover, make sure all combinations of functional groups that are found in the data are represented in each survey_code. For those groups which were not detected at a survey_code, fill the detection_rate with 0. This makes it possible to produce meaningful averages of functional group detections within habitat polygons or years without creating a positive bias for rarer functional groups. In other words, zero detection of a functional group at a grid point is a real zero, not a missing value.

In [None]:
df_cover_avg %>% glimpse()

Rows: 2,088
Columns: 5
$ survey_code         [3m[90m<chr>[39m[23m "YVP 10 2017-06-09", "YVP 10 2017-06-09", "YVP 10…
$ plant_native_status [3m[90m<chr>[39m[23m "native", "native", "native", "native", "nonnativ…
$ plant_life_cycle    [3m[90m<chr>[39m[23m "annual", "multiple", "perennial", "perennial", "…
$ plant_life_form     [3m[90m<chr>[39m[23m "forb", "forb", "forb", "graminoid", "forb", "gra…
$ cover_pct_avg       [3m[90m<dbl>[39m[23m 0.8, 0.2, 32.9, 49.5, 3.7, 2.0, 1.1, 4.5, 4.1, 0.…


In [None]:
df_complete <- df_cover_avg %>%
  complete(survey_code,
           nesting(plant_native_status, plant_life_cycle, plant_life_form),
           fill = list(cover_pct_avg = 0)) %>%
  arrange(survey_code, plant_native_status, plant_life_cycle, plant_life_form) %>% glimpse()

Rows: 5,126
Columns: 5
$ survey_code         [3m[90m<chr>[39m[23m "YVP 10 2017-06-09", "YVP 10 2017-06-09", "YVP 10…
$ plant_native_status [3m[90m<chr>[39m[23m "native", "native", "native", "native", "native",…
$ plant_life_cycle    [3m[90m<chr>[39m[23m "annual", "annual", "biennial", "multiple", "pere…
$ plant_life_form     [3m[90m<chr>[39m[23m "forb", "graminoid", "forb", "forb", "forb", "gra…
$ cover_pct_avg       [3m[90m<dbl>[39m[23m 0.8, 0.0, 0.0, 0.2, 32.9, 49.5, 0.0, 0.0, 0.0, 3.…


## year

In [None]:
df_year <- df_complete %>%
  mutate(year = substring(survey_code, nchar(survey_code)-9, nchar(survey_code)-6)) %>% glimpse()

Rows: 5,126
Columns: 6
$ survey_code         [3m[90m<chr>[39m[23m "YVP 10 2017-06-09", "YVP 10 2017-06-09", "YVP 10…
$ plant_native_status [3m[90m<chr>[39m[23m "native", "native", "native", "native", "native",…
$ plant_life_cycle    [3m[90m<chr>[39m[23m "annual", "annual", "biennial", "multiple", "pere…
$ plant_life_form     [3m[90m<chr>[39m[23m "forb", "graminoid", "forb", "forb", "forb", "gra…
$ cover_pct_avg       [3m[90m<dbl>[39m[23m 0.8, 0.0, 0.0, 0.2, 32.9, 49.5, 0.0, 0.0, 0.0, 3.…
$ year                [3m[90m<chr>[39m[23m "2017", "2017", "2017", "2017", "2017", "2017", "…


## plot_code

In [None]:
# bring plot_code back in by removing date
 df_plot_code <- df_year %>%
  mutate(plot_code = substring(survey_code, 1,nchar(survey_code)-11)) %>% glimpse()

Rows: 5,126
Columns: 7
$ survey_code         [3m[90m<chr>[39m[23m "YVP 10 2017-06-09", "YVP 10 2017-06-09", "YVP 10…
$ plant_native_status [3m[90m<chr>[39m[23m "native", "native", "native", "native", "native",…
$ plant_life_cycle    [3m[90m<chr>[39m[23m "annual", "annual", "biennial", "multiple", "pere…
$ plant_life_form     [3m[90m<chr>[39m[23m "forb", "graminoid", "forb", "forb", "forb", "gra…
$ cover_pct_avg       [3m[90m<dbl>[39m[23m 0.8, 0.0, 0.0, 0.2, 32.9, 49.5, 0.0, 0.0, 0.0, 3.…
$ year                [3m[90m<chr>[39m[23m "2017", "2017", "2017", "2017", "2017", "2017", "…
$ plot_code           [3m[90m<chr>[39m[23m "YVP 10", "YVP 10", "YVP 10", "YVP 10", "YVP 10",…


## plot_loc

In [None]:
# detect "N" in 'plot_code' and write to new column 'plot_loc'
df_plot_loc <- df_plot_code %>%
  mutate(plot_loc = ifelse(str_detect(plot_code, "N"), "N", NA)) %>% glimpse()

Rows: 5,126
Columns: 8
$ survey_code         [3m[90m<chr>[39m[23m "YVP 10 2017-06-09", "YVP 10 2017-06-09", "YVP 10…
$ plant_native_status [3m[90m<chr>[39m[23m "native", "native", "native", "native", "native",…
$ plant_life_cycle    [3m[90m<chr>[39m[23m "annual", "annual", "biennial", "multiple", "pere…
$ plant_life_form     [3m[90m<chr>[39m[23m "forb", "graminoid", "forb", "forb", "forb", "gra…
$ cover_pct_avg       [3m[90m<dbl>[39m[23m 0.8, 0.0, 0.0, 0.2, 32.9, 49.5, 0.0, 0.0, 0.0, 3.…
$ year                [3m[90m<chr>[39m[23m "2017", "2017", "2017", "2017", "2017", "2017", "…
$ plot_code           [3m[90m<chr>[39m[23m "YVP 10", "YVP 10", "YVP 10", "YVP 10", "YVP 10",…
$ plot_loc            [3m[90m<chr>[39m[23m NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…


## plot_rep

In [None]:
# detect "A", "B", "C" characters in plot_code and if present write to 'plot_rep'
df_plot_rep <- df_plot_loc %>%
  mutate(plot_rep = case_when(str_detect(plot_code, "A")~"A",
                              str_detect(plot_code, "B")~"B",
                              str_detect(plot_code, "C")~"C")) %>% glimpse()

Rows: 5,126
Columns: 9
$ survey_code         [3m[90m<chr>[39m[23m "YVP 10 2017-06-09", "YVP 10 2017-06-09", "YVP 10…
$ plant_native_status [3m[90m<chr>[39m[23m "native", "native", "native", "native", "native",…
$ plant_life_cycle    [3m[90m<chr>[39m[23m "annual", "annual", "biennial", "multiple", "pere…
$ plant_life_form     [3m[90m<chr>[39m[23m "forb", "graminoid", "forb", "forb", "forb", "gra…
$ cover_pct_avg       [3m[90m<dbl>[39m[23m 0.8, 0.0, 0.0, 0.2, 32.9, 49.5, 0.0, 0.0, 0.0, 3.…
$ year                [3m[90m<chr>[39m[23m "2017", "2017", "2017", "2017", "2017", "2017", "…
$ plot_code           [3m[90m<chr>[39m[23m "YVP 10", "YVP 10", "YVP 10", "YVP 10", "YVP 10",…
$ plot_loc            [3m[90m<chr>[39m[23m NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ plot_rep            [3m[90m<chr>[39m[23m NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…


## grid_point

In [None]:
# use digital values from 'plot_code' to populate 'grid_point'
df_grid_point <- df_plot_rep %>%
  mutate(grid_point = str_extract(plot_code, "[:digit:].*"),
         grid_point = as.integer(grid_point)) %>% glimpse()

Rows: 5,126
Columns: 10
$ survey_code         [3m[90m<chr>[39m[23m "YVP 10 2017-06-09", "YVP 10 2017-06-09", "YVP 10…
$ plant_native_status [3m[90m<chr>[39m[23m "native", "native", "native", "native", "native",…
$ plant_life_cycle    [3m[90m<chr>[39m[23m "annual", "annual", "biennial", "multiple", "pere…
$ plant_life_form     [3m[90m<chr>[39m[23m "forb", "graminoid", "forb", "forb", "forb", "gra…
$ cover_pct_avg       [3m[90m<dbl>[39m[23m 0.8, 0.0, 0.0, 0.2, 32.9, 49.5, 0.0, 0.0, 0.0, 3.…
$ year                [3m[90m<chr>[39m[23m "2017", "2017", "2017", "2017", "2017", "2017", "…
$ plot_code           [3m[90m<chr>[39m[23m "YVP 10", "YVP 10", "YVP 10", "YVP 10", "YVP 10",…
$ plot_loc            [3m[90m<chr>[39m[23m NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ plot_rep            [3m[90m<chr>[39m[23m NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ grid_point          [3m[90m<int>[39m[23m 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,

## Join

### location_position_classification

In [None]:
df_position_classification %>% glimpse()

Rows: 582
Columns: 7
$ grid_point                  [3m[90m<int>[39m[23m 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13…
$ aspect_mean_deg             [3m[90m<dbl>[39m[23m 334.7050, 45.3030, 221.3340, 290.4890, 28…
$ elevation_mean_m            [3m[90m<dbl>[39m[23m 1395.64, 1456.09, 1126.90, 1166.33, 1179.…
$ slope_mean_deg              [3m[90m<dbl>[39m[23m 28.44230, 12.22630, 4.25130, 2.68361, 4.2…
$ cover_type_2016_gridVeg     [3m[90m<chr>[39m[23m "woodland/forest", "non-irrigated grassla…
$ type3_vegetation_indicators [3m[90m<chr>[39m[23m "mixed canopy conifer", "uncultivated gra…
$ type4_indicators_history    [3m[90m<chr>[39m[23m "mixed canopy conifer", "uncultivated gra…


In [None]:
df_join_position_class <- df_grid_point %>%
  left_join(df_position_classification) %>% glimpse()

Joining, by = "grid_point"



Rows: 5,126
Columns: 16
$ survey_code                 [3m[90m<chr>[39m[23m "YVP 10 2017-06-09", "YVP 10 2017-06-09",…
$ plant_native_status         [3m[90m<chr>[39m[23m "native", "native", "native", "native", "…
$ plant_life_cycle            [3m[90m<chr>[39m[23m "annual", "annual", "biennial", "multiple…
$ plant_life_form             [3m[90m<chr>[39m[23m "forb", "graminoid", "forb", "forb", "for…
$ cover_pct_avg               [3m[90m<dbl>[39m[23m 0.8, 0.0, 0.0, 0.2, 32.9, 49.5, 0.0, 0.0,…
$ year                        [3m[90m<chr>[39m[23m "2017", "2017", "2017", "2017", "2017", "…
$ plot_code                   [3m[90m<chr>[39m[23m "YVP 10", "YVP 10", "YVP 10", "YVP 10", "…
$ plot_loc                    [3m[90m<chr>[39m[23m NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ plot_rep                    [3m[90m<chr>[39m[23m NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ grid_point                  [3m[90m<int>[39m[23m 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,

## Reorder

In [None]:
names(df_join_position_class)

In [None]:
df_yvp_plant_functional_groups <-
df_join_position_class[c("survey_code","plot_code","year","plot_loc","plot_rep",
                         "grid_point", "plant_native_status","plant_life_cycle","plant_life_form","cover_pct_avg")] %>%
  glimpse()

Rows: 5,126
Columns: 10
$ survey_code         [3m[90m<chr>[39m[23m "YVP 10 2017-06-09", "YVP 10 2017-06-09", "YVP 10…
$ plot_code           [3m[90m<chr>[39m[23m "YVP 10", "YVP 10", "YVP 10", "YVP 10", "YVP 10",…
$ year                [3m[90m<chr>[39m[23m "2017", "2017", "2017", "2017", "2017", "2017", "…
$ plot_loc            [3m[90m<chr>[39m[23m NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ plot_rep            [3m[90m<chr>[39m[23m NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ grid_point          [3m[90m<int>[39m[23m 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 1…
$ plant_native_status [3m[90m<chr>[39m[23m "native", "native", "native", "native", "native",…
$ plant_life_cycle    [3m[90m<chr>[39m[23m "annual", "annual", "biennial", "multiple", "pere…
$ plant_life_form     [3m[90m<chr>[39m[23m "forb", "graminoid", "forb", "forb", "forb", "gra…
$ cover_pct_avg       [3m[90m<dbl>[39m[23m 0.8, 0.0, 0.0, 0.2, 32.9, 49.5, 0.0, 0.0, 0.0, 

In [None]:
# update year datatype
df_yvp_plant_functional_groups$year <- as.integer(df_yvp_plant_functional_groups$year)

In [None]:
summary(df_yvp_plant_functional_groups)

 survey_code         plot_code              year        plot_loc        
 Length:5126        Length:5126        Min.   :2017   Length:5126       
 Class :character   Class :character   1st Qu.:2018   Class :character  
 Mode  :character   Mode  :character   Median :2019   Mode  :character  
                                       Mean   :2019                     
                                       3rd Qu.:2019                     
                                       Max.   :2020                     
   plot_rep           grid_point  plant_native_status plant_life_cycle  
 Length:5126        Min.   :  7   Length:5126         Length:5126       
 Class :character   1st Qu.:110   Class :character    Class :character  
 Mode  :character   Median :212   Mode  :character    Mode  :character  
                    Mean   :254                                         
                    3rd Qu.:395                                         
                    Max.   :571                    

In [None]:
df_yvp_plant_functional_groups %>% distinct(cover_pct_avg) %>% arrange(cover_pct_avg)

cover_pct_avg
<dbl>
0.00
0.05
0.10
0.20
0.30
0.40
0.50
0.60
0.70
0.80


# Output

In [None]:
# Output 2020-11-09 | ES
write_csv(df_yvp_plant_functional_groups, file="yvp_plant_functional_groups-WRANGLE-2020.csv")