<a href="https://colab.research.google.com/github/samsoe/mpg_notebooks/blob/master/gridVeg_point_intercept_wrangle.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Documentation

[Readme - vegetation point transect survey](https://docs.google.com/document/d/1JWnhxNjeSQZkSnGhtHP68i_l1mDj4vPFMBdUvGqN0TA/edit?usp=sharing)

# Tools

In [1]:
library(tidyverse)

── [1mAttaching packages[22m ─────────────────────────────────────── tidyverse 1.3.0 ──

[32m✔[39m [34mggplot2[39m 3.3.1     [32m✔[39m [34mpurrr  [39m 0.3.4
[32m✔[39m [34mtibble [39m 3.0.1     [32m✔[39m [34mdplyr  [39m 1.0.0
[32m✔[39m [34mtidyr  [39m 1.1.0     [32m✔[39m [34mstringr[39m 1.4.0
[32m✔[39m [34mreadr  [39m 1.3.1     [32m✔[39m [34mforcats[39m 0.5.0

── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()



# Source

In [2]:
# 2020-06-17_gridVeg_point_intercepts_SOURCE.csv
src_intercept <- "https://drive.google.com/uc?export=download&id=1Uj8Nvo0GtFueZt6_Yli2D8jWiqACNssv"

In [3]:
df_pt <- read_csv(src_intercept, col_types = cols("Survey Data::__kp_Survey" = col_character(),
                                                  "_kf_Hit2_serial" = col_character(),
                                                  "_kf_Hit3_serial" = col_character(),
                                                  "_kf_Hit4_serial" = col_character()))

In [4]:
head(df_pt)

Survey Data::__kp_Survey,Survey Data::_kf_Site,Survey Data::SurveyDate,Survey Data::SurveyYear,PointTrans,_kf_Hit1_serial,Height,_kf_Hit2_serial,_kf_Hit3_serial,_kf_Hit4_serial,GroundCover
<chr>,<dbl>,<chr>,<dbl>,<chr>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>
5,108,11/15/2010,2010,N1,406,20,,,,
5,108,11/15/2010,2010,N2,12,27,,,,
5,108,11/15/2010,2010,N3,12,16,,,,
5,108,11/15/2010,2010,N4,12,17,,,,
5,108,11/15/2010,2010,N5,12,16,,,,
5,108,11/15/2010,2010,N6,12,16,,,,


# Wrangle

## Rename

In [5]:
names(df_pt)

In [6]:
oldnames = c('Survey Data::__kp_Survey','Survey Data::_kf_Site','Survey Data::SurveyDate',
             'Survey Data::SurveyYear','PointTrans','_kf_Hit1_serial','Height',
             '_kf_Hit2_serial','_kf_Hit3_serial','_kf_Hit4_serial','GroundCover')
          
newnames = c('survey_ID', 'grid_point', 'date', 'year', 
             'transect_point', 'intercept_1', 'height_intercept_1', 'intercept_2',
             'intercept_3', 'intercept_4', 'intercept_ground_code')

df_pt <- df_pt %>% rename_at(vars(all_of(oldnames)), ~ newnames)

In [7]:
head(df_pt)

survey_ID,grid_point,date,year,transect_point,intercept_1,height_intercept_1,intercept_2,intercept_3,intercept_4,intercept_ground_code
<chr>,<dbl>,<chr>,<dbl>,<chr>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>
5,108,11/15/2010,2010,N1,406,20,,,,
5,108,11/15/2010,2010,N2,12,27,,,,
5,108,11/15/2010,2010,N3,12,16,,,,
5,108,11/15/2010,2010,N4,12,17,,,,
5,108,11/15/2010,2010,N5,12,16,,,,
5,108,11/15/2010,2010,N6,12,16,,,,


## Filter

In [8]:
# remove 2010
df_pt <- df_pt %>%
  filter(year > 2010)

## Explore

### grid_point

In [9]:
df_pt %>%
  distinct(grid_point) %>%
  arrange(grid_point)

grid_point
<dbl>
1
2
3
4
5
6
7
8
9
10


### transect_point

In [None]:
df_pt %>%
  distinct(transect_point) %>%
  arrange(transect_point)

In [13]:
# set outliers to NA
outliers <- c("se", "T", "W21E")

df_pt <- df_pt %>%
  mutate(transect_point = ifelse(transect_point == "se" |
                                 transect_point == "T" |
                                 transect_point == "W21E", NA, transect_point))

### intercept_ground_code

In [14]:
df_pt %>%
  distinct(intercept_ground_code) %>%
  arrange(intercept_ground_code)

intercept_ground_code
<chr>
BG
BV
G
L
LIC
M
M/L
NDC
OTHER
R


In [15]:
not_in_list <- c("NDC", "WDSTUMP")

df_pt <- df_pt %>%
  mutate(intercept_ground_code = ifelse(intercept_ground_code == "NDC" |
                                        intercept_ground_code == "WDSTUMP", NA, intercept_ground_code))

In [16]:
df_pt %>%
  distinct(intercept_ground_code) %>%
  arrange(intercept_ground_code)

intercept_ground_code
<chr>
BG
BV
G
L
LIC
M
M/L
OTHER
R
SC


## Data Types

In [17]:
glimpse(df_pt)

Rows: 248,645
Columns: 11
$ survey_ID             [3m[90m<chr>[39m[23m "69", "69", "69", "69", "69", "69", "69", "69",…
$ grid_point            [3m[90m<dbl>[39m[23m 329, 329, 329, 329, 329, 329, 329, 329, 329, 32…
$ date                  [3m[90m<chr>[39m[23m "07/18/2011", "07/18/2011", "07/18/2011", "07/1…
$ year                  [3m[90m<dbl>[39m[23m 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011,…
$ transect_point        [3m[90m<chr>[39m[23m "N1", "N2", "N3", "N4", "N5", "N6", "N7", "N8",…
$ intercept_1           [3m[90m<dbl>[39m[23m 497, 497, 497, 497, 497, 497, 497, 12, 82, 82, …
$ height_intercept_1    [3m[90m<dbl>[39m[23m 60.0, 67.0, 55.0, 62.5, 65.0, 70.0, 40.0, 40.0,…
$ intercept_2           [3m[90m<chr>[39m[23m "82", "82", "82", "82", "82", "82", "82", "82",…
$ intercept_3           [3m[90m<chr>[39m[23m NA, NA, NA, NA, NA, "12", NA, NA, NA, NA, NA, N…
$ intercept_4           [3m[90m<chr>[39m[23m NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,

In [18]:
df_pt$grid_point <- as.integer(df_pt$grid_point)
df_pt$date <- as.Date(df_pt$date, "%m/%d/%Y")
df_pt$year <- as.integer(df_pt$year)
df_pt$intercept_1 <- as.integer(df_pt$intercept_1)
df_pt$height_intercept_1 <- as.integer(df_pt$height_intercept_1)
df_pt$intercept_2 <- as.integer(df_pt$intercept_2)
df_pt$intercept_3 <- as.integer(df_pt$intercept_3)
df_pt$intercept_4 <- as.integer(df_pt$intercept_4)

In [19]:
glimpse(df_pt)

Rows: 248,645
Columns: 11
$ survey_ID             [3m[90m<chr>[39m[23m "69", "69", "69", "69", "69", "69", "69", "69",…
$ grid_point            [3m[90m<int>[39m[23m 329, 329, 329, 329, 329, 329, 329, 329, 329, 32…
$ date                  [3m[90m<date>[39m[23m 2011-07-18, 2011-07-18, 2011-07-18, 2011-07-18…
$ year                  [3m[90m<int>[39m[23m 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011,…
$ transect_point        [3m[90m<chr>[39m[23m "N1", "N2", "N3", "N4", "N5", "N6", "N7", "N8",…
$ intercept_1           [3m[90m<int>[39m[23m 497, 497, 497, 497, 497, 497, 497, 12, 82, 82, …
$ height_intercept_1    [3m[90m<int>[39m[23m 60, 67, 55, 62, 65, 70, 40, 40, 5, 15, 15, 1, 7…
$ intercept_2           [3m[90m<int>[39m[23m 82, 82, 82, 82, 82, 82, 82, 82, NA, NA, NA, NA,…
$ intercept_3           [3m[90m<int>[39m[23m NA, NA, NA, NA, NA, 12, NA, NA, NA, NA, NA, NA,…
$ intercept_4           [3m[90m<int>[39m[23m NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,

# Table Structure

## gridVeg_point_intercept_vegetation

In [20]:
df_veg <- df_pt[, c("survey_ID", "grid_point", "date", "year",
          "transect_point", "height_intercept_1",
          "intercept_1", "intercept_2", "intercept_3", "intercept_4")]

In [21]:
glimpse(df_veg)

Rows: 248,645
Columns: 10
$ survey_ID          [3m[90m<chr>[39m[23m "69", "69", "69", "69", "69", "69", "69", "69", "6…
$ grid_point         [3m[90m<int>[39m[23m 329, 329, 329, 329, 329, 329, 329, 329, 329, 329, …
$ date               [3m[90m<date>[39m[23m 2011-07-18, 2011-07-18, 2011-07-18, 2011-07-18, 2…
$ year               [3m[90m<int>[39m[23m 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 20…
$ transect_point     [3m[90m<chr>[39m[23m "N1", "N2", "N3", "N4", "N5", "N6", "N7", "N8", "N…
$ height_intercept_1 [3m[90m<int>[39m[23m 60, 67, 55, 62, 65, 70, 40, 40, 5, 15, 15, 1, 7, 5…
$ intercept_1        [3m[90m<int>[39m[23m 497, 497, 497, 497, 497, 497, 497, 12, 82, 82, 82,…
$ intercept_2        [3m[90m<int>[39m[23m 82, 82, 82, 82, 82, 82, 82, 82, NA, NA, NA, NA, NA…
$ intercept_3        [3m[90m<int>[39m[23m NA, NA, NA, NA, NA, 12, NA, NA, NA, NA, NA, NA, NA…
$ intercept_4        [3m[90m<int>[39m[23m NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA

### Output

In [22]:
write_csv(df_veg, path = "gridVeg_point_intercept_vegetation_WRANGLE.csv")

## gridVeg_point_intercept_ground

In [23]:
df_gnd <- df_pt[, c("survey_ID", "grid_point", "date", "year", 
          "transect_point", "intercept_ground_code")]

In [24]:
glimpse(df_gnd)

Rows: 248,645
Columns: 6
$ survey_ID             [3m[90m<chr>[39m[23m "69", "69", "69", "69", "69", "69", "69", "69",…
$ grid_point            [3m[90m<int>[39m[23m 329, 329, 329, 329, 329, 329, 329, 329, 329, 32…
$ date                  [3m[90m<date>[39m[23m 2011-07-18, 2011-07-18, 2011-07-18, 2011-07-18…
$ year                  [3m[90m<int>[39m[23m 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011,…
$ transect_point        [3m[90m<chr>[39m[23m "N1", "N2", "N3", "N4", "N5", "N6", "N7", "N8",…
$ intercept_ground_code [3m[90m<chr>[39m[23m "L", "L", "L", "L", "L", "BV", "L", "BV", "BV",…


### Output

In [25]:
write_csv(df_gnd, path = "gridVeg_point_intercept_ground_WRANGLE.csv")