<a href="https://colab.research.google.com/github/samsoe/mpg_notebooks/blob/master/gridVeg_groundCover_intercepts.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Security

* The user must load a `json` file containing the BigQuery API key into the local directory `/content/...`
* The user must have a Google Maps API key to enable mapping. 
   * CAUTION make sure the key is deleted from the current instance of the notebook before sharing

# Tools

In [None]:
library(tidyverse)

── [1mAttaching packages[22m ─────────────────────────────────────── tidyverse 1.3.0 ──

[32m✔[39m [34mggplot2[39m 3.3.2     [32m✔[39m [34mpurrr  [39m 0.3.4
[32m✔[39m [34mtibble [39m 3.0.2     [32m✔[39m [34mdplyr  [39m 1.0.0
[32m✔[39m [34mtidyr  [39m 1.1.0     [32m✔[39m [34mstringr[39m 1.4.0
[32m✔[39m [34mreadr  [39m 1.3.1     [32m✔[39m [34mforcats[39m 0.5.0

── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()



* Remember that the file containing authorization keys for Big Query must be loaded into the virutual envrionment manually.

In [None]:
install.packages("bigrquery")
library(bigrquery)

Installing package into ‘/usr/local/lib/R/site-library’
(as ‘lib’ is unspecified)

also installing the dependencies ‘bit’, ‘bit64’, ‘gargle’, ‘rapidjsonr’




# Source

## Database Connection

In [None]:
# BigQuery API Key
bq_auth(path = "/content/mpg-data-warehouse-api_key-master.json")

In [None]:
Sys.setenv(BIGQUERY_TEST_PROJECT = "mpg-data-warehouse")

In [None]:
billing <- bq_test_project()

### vegetation_point_intercept_gridVeg

In [None]:
con_point_intercept <- dbConnect(
  bigrquery::bigquery(),
  project = "mpg-data-warehouse",
  dataset = "vegetation_point_intercept_gridVeg",
  billing = billing
)

In [None]:
dbListTables(con_point_intercept)

In [None]:
ground_point_intercept_sql <- 
"
  SELECT
    survey_ID,
    grid_point,
    intercept_ground_code,
    COUNT(intercept_ground_code) / 2 AS intercepts_pct
  FROM 
    `mpg-data-warehouse.vegetation_point_intercept_gridVeg.gridVeg_point_intercept_ground`
  GROUP BY
    survey_ID, grid_point, intercept_ground_code
"

In [None]:
bq_ground_point_intercept <- bq_project_query(billing, ground_point_intercept_sql)

In [None]:
tb_ground_point_intercept <- bq_table_download(bq_ground_point_intercept)

In [None]:
df_ground_point_intercept <- as.data.frame(tb_ground_point_intercept)

In [None]:
head(df_ground_point_intercept, n=4)

Unnamed: 0_level_0,survey_ID,grid_point,intercept_ground_code,intercepts_pct
Unnamed: 0_level_1,<chr>,<int>,<chr>,<dbl>
1,2,373,L,27.5
2,2,373,BG,0.5
3,2,373,,71.5
4,2,373,WDT,0.5


### gridVeg_survey_metadata

In [None]:
survey_metadata_sql <-
"
  SELECT
    survey_ID,
    year,
    survey_sequence
  FROM
    `mpg-data-warehouse.vegetation_point_intercept_gridVeg.gridVeg_survey_metadata`
"

In [None]:
bq_survey_metadata <- bq_project_query(billing, survey_metadata_sql)

In [None]:
tb_survey_metadata <- bq_table_download(bq_survey_metadata)

In [None]:
df_survey_metadata <- as.data.frame(tb_survey_metadata)

In [None]:
head(df_survey_metadata, n=4)

Unnamed: 0_level_0,survey_ID,year,survey_sequence
Unnamed: 0_level_1,<chr>,<int>,<chr>
1,F31C56A8-912D-410C-A17D-4C2DD75F71A4,2016,2016
2,A19E87E6-A89C-4993-B550-802226730D54,2016,2016
3,6F1D71D3-9F87-4C93-B179-A12C8938D18D,2016,2016
4,9C67C9F1-1E89-4FD2-ADC0-0390E0022D62,2016,2016


### location_position_classification

In [None]:
sql_position_class <-
"
  SELECT
    grid_point,
    aspect_mean_deg,
    elevation_mean_m,
    slope_mean_deg,
    cover_type_2016_gridVeg,
    type3_vegetation_indicators,
    type4_indicators_history
  FROM
    `mpg-data-warehouse.grid_point_summaries.location_position_classification`
"

In [None]:
bq_position_class <- bq_project_query(billing, sql_position_class)

In [None]:
tb_position_class <- bq_table_download(bq_position_class)

In [None]:
df_position_class <- as.data.frame(tb_position_class)

In [None]:
head(df_position_class, n=4)

Unnamed: 0_level_0,grid_point,aspect_mean_deg,elevation_mean_m,slope_mean_deg,cover_type_2016_gridVeg,type3_vegetation_indicators,type4_indicators_history
Unnamed: 0_level_1,<int>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>
1,1,334.705,1395.64,28.4423,woodland/forest,mixed canopy conifer,mixed canopy conifer
2,2,45.303,1456.09,12.2263,non-irrigated grasslands,uncultivated grassland native or degraded,uncultivated grassland native or degraded
3,3,221.334,1126.9,4.2513,shrubland,uncultivated grassland native or degraded,uncultivated grassland native or degraded
4,4,290.489,1166.33,2.68361,shrubland,uncultivated grassland native or degraded,uncultivated grassland native or degraded


### ground_cover_metadata

In [None]:
sql_ground_meta <- 
"
SELECT
  intercept_ground_code,
  ground_group
FROM
  `mpg-data-warehouse.vegetation_point_intercept_gridVeg.gridVeg_ground_cover_metadata`
"

In [None]:
bq_ground_meta <- bq_project_query(billing, sql_ground_meta)

In [None]:
tb_ground_meta <- bq_table_download(bq_ground_meta)

In [None]:
df_ground_meta <- as.data.frame(tb_ground_meta)

In [None]:
df_ground_meta %>% arrange(intercept_ground_code)

intercept_ground_code,ground_group
<chr>,<chr>
BG,inorganic
BV,living vegetation
G,inorganic
L,dead vegetation
LIC,living vegetation
M,living vegetation
M/L,living vegetation
OTHER,inorganic
R,inorganic
SC,dung


# Wrangle

In [None]:
# show unique intercept ground codes
df_ground_point_intercept %>%
  distinct(intercept_ground_code) %>% 
  arrange(intercept_ground_code)

intercept_ground_code
<chr>
BG
BV
G
L
LIC
M
M/L
""
OTHER
R


## Fill missing ground code

In [None]:
df_ground_point_intercept <- df_ground_point_intercept %>%
  # include all intercept_ground variables for each survey_ID if not previously present set to 0
  complete(survey_ID, nesting(intercept_ground_code), fill= list(intercepts_pct = 0)) %>%
  fill(grid_point) %>%
  filter(intercept_ground_code != 'NA') %>% 
  glimpse()

Rows: 22,392
Columns: 4
$ survey_ID             [3m[90m<chr>[39m[23m "012C5FAD-2451-41B0-9E2F-432D1ECEB55C", "012C5F…
$ intercept_ground_code [3m[90m<chr>[39m[23m "BG", "BV", "G", "L", "LIC", "M", "M/L", "OTHER…
$ grid_point            [3m[90m<int>[39m[23m 285, 285, 285, 285, 285, 285, 285, 285, 285, 28…
$ intercepts_pct        [3m[90m<dbl>[39m[23m 15.5, 8.5, 8.5, 62.0, 0.0, 1.5, 0.0, 0.0, 1.5, …


## Join datasets

In [None]:
df_gridVeg_groundCover_intercepts <- df_ground_point_intercept %>%
  left_join(df_survey_metadata, by = "survey_ID")
dim(df_gridVeg_groundCover_intercepts)


In [None]:
df_gridVeg_groundCover_intercepts <- df_gridVeg_groundCover_intercepts %>%
  left_join(df_position_class, by = "grid_point")
dim(df_gridVeg_groundCover_intercepts)

In [None]:
df_gridVeg_groundCover_intercepts <- df_gridVeg_groundCover_intercepts %>%
  left_join(df_ground_meta, by = "intercept_ground_code") %>% 
  glimpse()

Rows: 23,636
Columns: 13
$ survey_ID                   [3m[90m<chr>[39m[23m "012C5FAD-2451-41B0-9E2F-432D1ECEB55C", "…
$ intercept_ground_code       [3m[90m<chr>[39m[23m "BG", "BV", "G", "L", "LIC", "M", "M/L", …
$ grid_point                  [3m[90m<int>[39m[23m 285, 285, 285, 285, 285, 285, 285, 285, 2…
$ intercepts_pct              [3m[90m<dbl>[39m[23m 15.5, 8.5, 8.5, 62.0, 0.0, 1.5, 0.0, 0.0,…
$ year                        [3m[90m<int>[39m[23m 2016, 2016, 2016, 2016, 2016, 2016, 2016,…
$ survey_sequence             [3m[90m<chr>[39m[23m "2016", "2016", "2016", "2016", "2016", "…
$ aspect_mean_deg             [3m[90m<dbl>[39m[23m 138.749, 138.749, 138.749, 138.749, 138.7…
$ elevation_mean_m            [3m[90m<dbl>[39m[23m 1352.10, 1352.10, 1352.10, 1352.10, 1352.…
$ slope_mean_deg              [3m[90m<dbl>[39m[23m 24.5141, 24.5141, 24.5141, 24.5141, 24.51…
$ cover_type_2016_gridVeg     [3m[90m<chr>[39m[23m "shrubland", "shrubland", "shrubland",

In [None]:
df_gridVeg_groundCover_intercepts <- 
  df_ground_point_intercept %>%
  left_join(df_survey_metadata, by = "survey_ID") %>% 
  glimpse() %>% 
  left_join(df_position_class, by = "grid_point") %>% 
  glimpse() %>% 
  left_join(df_ground_meta, by = "intercept_ground_code") %>% 
  glimpse()

Rows: 22,392
Columns: 6
$ survey_ID             [3m[90m<chr>[39m[23m "012C5FAD-2451-41B0-9E2F-432D1ECEB55C", "012C5F…
$ intercept_ground_code [3m[90m<chr>[39m[23m "BG", "BV", "G", "L", "LIC", "M", "M/L", "OTHER…
$ grid_point            [3m[90m<int>[39m[23m 285, 285, 285, 285, 285, 285, 285, 285, 285, 28…
$ intercepts_pct        [3m[90m<dbl>[39m[23m 15.5, 8.5, 8.5, 62.0, 0.0, 1.5, 0.0, 0.0, 1.5, …
$ year                  [3m[90m<int>[39m[23m 2016, 2016, 2016, 2016, 2016, 2016, 2016, 2016,…
$ survey_sequence       [3m[90m<chr>[39m[23m "2016", "2016", "2016", "2016", "2016", "2016",…
Rows: 22,392
Columns: 12
$ survey_ID                   [3m[90m<chr>[39m[23m "012C5FAD-2451-41B0-9E2F-432D1ECEB55C", "…
$ intercept_ground_code       [3m[90m<chr>[39m[23m "BG", "BV", "G", "L", "LIC", "M", "M/L", …
$ grid_point                  [3m[90m<int>[39m[23m 285, 285, 285, 285, 285, 285, 285, 285, 2…
$ intercepts_pct              [3m[90m<dbl>[39m[23m 15.5, 8.5, 8.5

# Output

In [None]:
write_csv(df_gridVeg_groundCover_intercepts, path = "gridVeg_groundCover_intercepts_WRANGLE.csv")