<a href="https://colab.research.google.com/github/samsoe/mpg_notebooks/blob/master/gridVeg_plant_abundance_matrix.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Security

* The user must load a `json` file containing the BigQuery API key into the local directory `/content/...`
* The user must have a Google Maps API key to enable mapping. 
   * CAUTION make sure the key is deleted from the current instance of the notebook before sharing

# Tools

* Remember that the file containing authorization keys for Big Query must be loaded into the virutual envrionment manually.

In [None]:
install.packages("bigrquery")
library(bigrquery)

Installing package into ‘/usr/local/lib/R/site-library’
(as ‘lib’ is unspecified)



In [None]:
library(tidyverse)

# Source

## Database Connection

In [None]:
# BigQuery API Key
bq_auth(path = "/content/mpg-data-warehouse-api_key-master.json")

In [None]:
Sys.setenv(BIGQUERY_TEST_PROJECT = "mpg-data-warehouse")

In [None]:
billing <- bq_test_project()

### Survey Effort

In [None]:
con_survey_effort <- dbConnect(
  bigrquery::bigquery(),
  project = "mpg-data-warehouse",
  dataset = "vegetation_gridVeg_summaries",
  billing = billing
)

In [None]:
dbListTables(con_survey_effort)

In [None]:
intercepts_sql <- "SELECT survey_ID, grid_point, key_plant_code, intercepts_pct
                   FROM `mpg-data-warehouse.vegetation_gridVeg_summaries.gridVeg_plant_intercepts`"

In [None]:
bq_intercepts <- bq_project_query(billing, intercepts_sql)

In [None]:
tb_intercepts <- bq_table_download(bq_intercepts)

In [None]:
df_intercepts <- as.data.frame(tb_intercepts)

In [None]:
glimpse(df_intercepts)

Rows: 25,089
Columns: 4
$ survey_ID      [3m[90m<chr>[39m[23m "436", "436", "436", "436", "436", "436", "436", "436"…
$ grid_point     [3m[90m<int>[39m[23m 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
$ key_plant_code [3m[90m<chr>[39m[23m "HEUCYL", "ALLCER", "GEUTRI", "ERIG_SP", "ARESER", "GA…
$ intercepts_pct [3m[90m<dbl>[39m[23m 2.5, 0.5, 1.0, 0.5, 1.0, 1.5, 1.0, 2.5, 0.5, 1.0, 14.5…


### Survey Metadata

In [None]:
con_survey_meta <- dbConnect(
  bigrquery::bigquery(),
  project = "mpg-data-warehouse",
  dataset = "vegetation_point_intercept_gridVeg",
  billing = billing
)

In [None]:
dbListTables(con_survey_meta)

In [None]:
meta_sql <- "SELECT survey_ID, year, survey_sequence FROM `mpg-data-warehouse.vegetation_point_intercept_gridVeg.gridVeg_survey_metadata`"

In [None]:
bq_meta <- bq_project_query(billing, meta_sql)

In [None]:
tb_meta <- bq_table_download(bq_meta)

In [None]:
df_meta <- as.data.frame(tb_meta)

In [None]:
glimpse(df_meta)

Rows: 1,472
Columns: 3
$ survey_ID       [3m[90m<chr>[39m[23m "F31C56A8-912D-410C-A17D-4C2DD75F71A4", "A19E87E6-A89…
$ year            [3m[90m<int>[39m[23m 2016, 2016, 2016, 2016, 2016, 2016, 2016, 2016, 2016,…
$ survey_sequence [3m[90m<chr>[39m[23m "2016", "2016", "2016", "2016", "2016", "2016", "2016…


### Position Classification

In [None]:
con_position_class <- dbConnect(
  bigrquery::bigquery(),
  project = "mpg-data-warehouse",
  dataset = "grid_point_summaries",
  billing = billing
)

In [None]:
dbListTables(con_position_class)

In [None]:
position_sql <- "SELECT 
                grid_point, aspect_mean_deg, elevation_mean_m, slope_mean_deg, 
                cover_type_2016_gridVeg, type3_vegetation_indicators, type4_indicators_history
             FROM `mpg-data-warehouse.grid_point_summaries.location_position_classification`"

In [None]:
bq_position <- bq_project_query(billing, position_sql)

In [None]:
tb_position <- bq_table_download(bq_position)

In [None]:
df_position <- as.data.frame(tb_position)

In [None]:
glimpse(df_position)

Rows: 582
Columns: 7
$ grid_point                  [3m[90m<int>[39m[23m 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13…
$ aspect_mean_deg             [3m[90m<dbl>[39m[23m 334.7050, 45.3030, 221.3340, 290.4890, 28…
$ elevation_mean_m            [3m[90m<dbl>[39m[23m 1395.64, 1456.09, 1126.90, 1166.33, 1179.…
$ slope_mean_deg              [3m[90m<dbl>[39m[23m 28.44230, 12.22630, 4.25130, 2.68361, 4.2…
$ cover_type_2016_gridVeg     [3m[90m<chr>[39m[23m "woodland/forest", "non-irrigated grassla…
$ type3_vegetation_indicators [3m[90m<chr>[39m[23m "mixed canopy conifer", "uncultivated gra…
$ type4_indicators_history    [3m[90m<chr>[39m[23m "mixed canopy conifer", "uncultivated gra…


# Wrangle

## Reshape Intercepts/Effort

In [None]:
df_intercepts <- df_intercepts %>%
  spread(key_plant_code, intercepts_pct, fill = 0)

## Join Tables

In [None]:
df_abundance_matrix <- df_intercepts %>%
  left_join(df_meta)

Joining, by = "survey_ID"



In [None]:
df_abundance_matrix <- df_abundance_matrix %>%
  left_join(df_position)

Joining, by = "grid_point"



## Reorder Columns

In [None]:
head(df_abundance_matrix, n=4)

Unnamed: 0_level_0,survey_ID,grid_point,ABIGRA,ABILAS,ACEGLA,ACHMIL,ACTRUB,AGAURT,AGOAUR,AGOGLA,⋯,XERTEN,ZEAMAY,year,survey_sequence,aspect_mean_deg,elevation_mean_m,slope_mean_deg,cover_type_2016_gridVeg,type3_vegetation_indicators,type4_indicators_history
Unnamed: 0_level_1,<chr>,<int>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<int>,<chr>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>
1,012C5FAD-2451-41B0-9E2F-432D1ECEB55C,285,0,0,0,0.0,0,0,0,0,⋯,0,0,2016,2016,138.749,1352.1,24.5141,shrubland,mixed sage and bitterbrush,mixed sage and bitterbrush
2,0133805F-8237-4190-B125-14E883158664,505,0,0,8,0.0,0,0,0,0,⋯,0,0,2016,2016,319.161,1194.89,16.505,woody draw,wooded draw,wooded draw
3,0155DA4F-B744-4B92-BAE3-326ABC3C586F,401,0,0,0,6.5,0,0,0,0,⋯,0,0,2017,2017,178.871,1588.6,8.31327,woodland/forest,open canopy conifer,open canopy conifer
4,0159CEF8-3C0B-42D1-B961-03DA47A2C274,23,0,0,0,0.0,0,0,0,0,⋯,0,0,2016,2016,264.245,1224.44,9.72424,shrubland,bitterbrush,bitterbrush


In [None]:
df_abundance_matrix <- df_abundance_matrix[,c(1,492,493,2,494:ncol(df_abundance_matrix),3:491)]

In [None]:
head(df_abundance_matrix, n=4)

Unnamed: 0_level_0,survey_ID,year,survey_sequence,grid_point,aspect_mean_deg,elevation_mean_m,slope_mean_deg,cover_type_2016_gridVeg,type3_vegetation_indicators,type4_indicators_history,⋯,VERVIR,VICAME,VICSAT,VIOCAN,VIOL_SP,VIONUT,VULOCT,WOOORE,XERTEN,ZEAMAY
Unnamed: 0_level_1,<chr>,<int>,<chr>,<int>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,012C5FAD-2451-41B0-9E2F-432D1ECEB55C,2016,2016,285,138.749,1352.1,24.5141,shrubland,mixed sage and bitterbrush,mixed sage and bitterbrush,⋯,0,0,0,0,0,0,0,0,0,0
2,0133805F-8237-4190-B125-14E883158664,2016,2016,505,319.161,1194.89,16.505,woody draw,wooded draw,wooded draw,⋯,0,0,0,0,0,0,0,0,0,0
3,0155DA4F-B744-4B92-BAE3-326ABC3C586F,2017,2017,401,178.871,1588.6,8.31327,woodland/forest,open canopy conifer,open canopy conifer,⋯,0,0,0,0,0,0,0,0,0,0
4,0159CEF8-3C0B-42D1-B961-03DA47A2C274,2016,2016,23,264.245,1224.44,9.72424,shrubland,bitterbrush,bitterbrush,⋯,0,0,0,0,0,0,0,0,0,0


# Output

In [None]:
write_csv(df_abundance_matrix, path = "gridVeg_plant_abundance_matrix_WRANGLE.csv")