<a href="https://colab.research.google.com/github/samsoe/mpg_notebooks/blob/master/abundance_order_habitat_wrangle.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Documentation: [Readme bird point count monitoring](https://docs.google.com/document/d/1PRryJzGOUtfr-fKXzb3tbr48xiaTuAMVk18XFXlvxcM/edit?usp=sharing)

# Tools

In [0]:
library(tidyverse)

In [0]:
install.packages("bigrquery")
library(bigrquery)

# Source

## BigQuery

### Database Connection

In [0]:
# BigQuery API Key
bq_auth(path = "/content/mpg-data-warehouse-34434e1a9914.json")

In [0]:
Sys.setenv(BIGQUERY_TEST_PROJECT = "mpg-data-warehouse")

In [0]:
billing <- bq_test_project()

In [0]:
con_bird_point_counts <- dbConnect(
  bigrquery::bigquery(),
  project = "mpg-data-warehouse",
  dataset = "bird_point_counts",
  billing = billing
)

In [0]:
con_location_position_classification <- dbConnect(
    bigrquery::bigquery(),
  project = "mpg-data-warehouse",
  dataset = "grid_point_summaries",
  billing = billing
)

In [11]:
dbListTables(con_bird_point_counts)

In [15]:
dbListTables(con_location_position_classification)

### Query Database

#### bird_point_counts

In [0]:
bird_sql <- "SELECT *
             FROM `mpg-data-warehouse.bird_point_counts.bird_surveyInfo_records_function`"

In [0]:
# result bird table
bq_bird <- bq_project_query(billing, bird_sql)

In [0]:
tb_bird = bq_table_download(bq_bird)

In [25]:
class(tb_bird)

In [0]:
df_bird <- as.data.frame(tb_bird)

In [33]:
str(df_bird)

'data.frame':	117810 obs. of  31 variables:
 $ survey_ID                         : int  5195 8598 8599 4294 4295 4296 4297 4302 10 22 ...
 $ survey_year                       : int  2013 2015 2015 2013 2013 2013 2013 2013 2010 2010 ...
 $ survey_date                       : Date, format: "2013-05-29" "2015-06-09" ...
 $ survey_visit_calc                 : int  1 1 1 1 1 1 1 1 1 1 ...
 $ survey_time_start_MDT             : 'hms' num  09:38:00 06:43:00 07:02:00 06:13:00 ...
  ..- attr(*, "units")= chr "secs"
 $ survey_time_end_MDT               : 'hms' num  09:48:00 06:53:00 07:12:00 06:23:00 ...
  ..- attr(*, "units")= chr "secs"
 $ survey_grid_point                 : int  163 482 480 193 194 195 181 166 477 484 ...
 $ survey_observer                   : chr  "Katharine Stone" "Eric Rasmussen" "Eric Rasmussen" "Eric Rasmussen" ...
 $ survey_wind_code                  : int  2 1 0 1 2 1 1 2 1 2 ...
 $ survey_sky_code                   : int  1 0 0 0 0 0 0 1 2 2 ...
 $ survey_noise_code  

#### location_position_classification

In [0]:
location_sql <- "SELECT *
                 FROM `mpg-data-warehouse.grid_point_summaries.location_position_classification`"

Leave the following error for now.  To move forward in the present output csv from table output csv from `mpg-data-warehouse.grid_point_summaries.location_position_classification`

In [29]:
bq_location <- bq_project_query(billing, location_sql)

ERROR: ignored

## CSV

### location_position_classification

In [30]:
# MPG Ranch Datasets > Bird point count monitoring > Data - Processed > location_position_classification.csv
src_location = "https://drive.google.com/uc?id=1z1nFdsk0vZXhw78rB3ijrFcLX2mF4VBQ"
df_location <- read_csv(src_location)

Parsed with column specification:
cols(
  grid_point = [32mcol_double()[39m,
  lat = [32mcol_double()[39m,
  long = [32mcol_double()[39m,
  aspect_mean_deg = [32mcol_double()[39m,
  elevation_mean_m = [32mcol_double()[39m,
  slope_mean_deg = [32mcol_double()[39m,
  cover_type_2016_gridVeg = [31mcol_character()[39m,
  biomass_habitat_type = [31mcol_character()[39m,
  type1_biome = [31mcol_character()[39m,
  type2_vegetation_community = [31mcol_character()[39m,
  type3_vegetation_indicators = [31mcol_character()[39m,
  type4_indicators_history = [31mcol_character()[39m
)



# Structure

## bird_point_counts

In [0]:
bird_abundance <- df_bird %>%
  group_by(survey_year, survey_visit_calc, survey_grid_point, function_taxa_order) %>%
  filter(records_detect_distance_min_meters <= 100,
         records_interval %in% c(1, 2)) %>%
  summarize(bird_abundance = sum(records_abundance)) %>%
  group_by(survey_year, survey_grid_point, function_taxa_order) %>%
  summarize(bird_abundance_avg = mean(bird_abundance))

# Join

In [51]:
names(bird_abundance)

In [52]:
names(df_location)

In [56]:
bird_abundance %>% 
  left_join(df_location, by = c("survey_grid_point" = "grid_point")) %>%
  head()

survey_year,survey_grid_point,function_taxa_order,bird_abundance_avg,lat,long,aspect_mean_deg,elevation_mean_m,slope_mean_deg,cover_type_2016_gridVeg,biomass_habitat_type,type1_biome,type2_vegetation_community,type3_vegetation_indicators,type4_indicators_history
<int>,<dbl>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
2010,1,Passeriformes,1.0,46.73193,-114.0017,334.705,1395.64,28.4423,woodland/forest,,forest,upland,mixed canopy conifer,mixed canopy conifer
2010,2,Passeriformes,1.0,46.72972,-114.001,45.303,1456.09,12.2263,non-irrigated grasslands,Range,rangeland,grassland,uncultivated grassland native or degraded,uncultivated grassland native or degraded
2010,3,Passeriformes,1.0,46.72443,-114.0227,221.334,1126.9,4.2513,shrubland,Range,rangeland,grassland,uncultivated grassland native or degraded,uncultivated grassland native or degraded
2010,4,Passeriformes,1.0,46.72487,-114.0195,290.489,1166.33,2.68361,shrubland,Range,rangeland,grassland,uncultivated grassland native or degraded,uncultivated grassland native or degraded
2010,7,Accipitriformes,1.0,46.72707,-114.0036,302.818,1387.8,26.1757,non-irrigated grasslands,Range,rangeland,grassland,uncultivated grassland native or degraded,uncultivated grassland native or degraded
2010,7,Passeriformes,1.5,46.72707,-114.0036,302.818,1387.8,26.1757,non-irrigated grasslands,Range,rangeland,grassland,uncultivated grassland native or degraded,uncultivated grassland native or degraded
