-
Notifications
You must be signed in to change notification settings - Fork 0
/
dust_analysis.R
153 lines (111 loc) · 6.32 KB
/
dust_analysis.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
## Script name: dust_analysis.R
##
## Purpose of script: Compute the fraction dust concentration per land use type across coccidiomycosis endemic regions in California
##
## Date Created: 12-20-2021
## Last Modified:
## - Added flexibility for using script for multiple years (01-18-2022)
##
## Author: Nicole Keeney
## Email: nicolejkeeney@gmail.com
## GitHub: nicolejkeeney
# ------------------ Dependencies ------------------
library(tidyverse)
library(ncdf4)
library(raster)
library(sf)
library(lubridate)
library(exactextractr)
library(plyr)
library(dplyr)
library(parallel)
source("utils.R") # Helper functions
# ------------------ USER INPUTS ------------------
args <- (commandArgs(TRUE)) # Input year from BASH file (see Rscript.txt)
if (length(args) == 0) { # This condition is TRUE if running code interactively in RStudio
year <- 2016 # Supply default value for year
} else {
year <- eval( parse(text=args[1]) ) # Read in command line argument
year <- as.integer(year) # Convert string input to integer
}
months <- 1:12 # Months to run analysis for
# ------------------ Define filepaths, create outfile & output directory ------------------
# Set locations to data and check that paths exits
WUSTL_FOLDER <- paste("data/SOIL", as.character(year), sep="/") %>% check_path # Path to WUSTL data
CROPSCAPE_FOLDER <- "data/cropscape" %>% check_path # Path to folder containing cropscape rasters
SHAPEFILE_PATH <- "data/CA_Counties" %>% check_path # Path to counties shapefile
OUTPUT_DIR <- paste("data/results", as.character(year), sep="/") # Where to save csv results
# Create outfile for storing info about code
outfile <- paste0(as.character(year), "_log", ".txt")
if (file.exists(outfile)) { unlink(outfile) } # Remove file if it already exists
cat("Created outfile", outfile, file = outfile, append = TRUE)
# Create output directory
dir.create(OUTPUT_DIR, showWarnings = FALSE, recursive = TRUE)
# Start timer
cat("\nPerforming analysis for", year,"...", file = outfile, append = TRUE)
start.time = Sys.time()
# ------------------ Read in cropscape raster & CA counties shapefile ------------------
# Read in cropscape raster & shapefile
cat("\nReading in cropscape raster & central valley shapefile (time invariant)...", file = outfile, append = TRUE)
counties <- read_centralValley(SHAPEFILE_PATH) # Read in shapefile of Central Valley counties of interest
cropscape_raster <- read_cropscape(CROPSCAPE_FOLDER=CROPSCAPE_FOLDER, # Read in CropScape raster
year=as.character(year),
geom=counties)
cat("complete.\n", file = outfile, append = TRUE)
# ------------------ Set up cluster ------------------
# Make cluster
ncores <- as.numeric(Sys.getenv('SLURM_CPUS_ON_NODE')) # Detect number of cores. This should be set in Rscript.txt if running in savio
if (is.na(ncores)) { ncores <- 4 } # Set to 4 cores if no SLURM_CPUS_ON_NODE environment variable detected (i.e running on personal laptop)
cl <- makeCluster(ncores, outfile=outfile) # Make cluster using outfile defined earlier
cat("\nMade cluster with", ncores,"cores.", file = outfile, append = TRUE)
# Load desired packages into each cluster
invisible(clusterEvalQ(cl, c(library(ncdf4),
library(tidyverse),
library(raster),
library(sf),
library(lubridate),
library(exactextractr),
library(plyr),
library(dplyr),
source("utils.R"))))
# Export time-invariant variables to each cluster
clusterExport(cl=cl, varlist=c("WUSTL_FOLDER","counties", "OUTPUT_DIR","cropscape_raster"), envir=environment())
# ------------------ PARALLELIZED ANALYSIS ------------------
# Analysis is performed for each month in parallel using parSapply from R parallel package
parSapply(cl, months, FUN=function(month, year, cropscape_raster, WUSTL_FOLDER, counties) {
start.time.month = Sys.time()
date <- paste0(as.character(year),"-", as.character(month),"-01") %>% # Get year-mon of analysis as date
as.Date("%Y-%m-%d")
cat("\n", format(date,"%B %Y"),": Starting analysis...")
# ------------------ Read in WUSTL raster ------------------
cat("\n", format(date,"%B %Y"),": Reading in WUSTL raster...")
wustl_raster <- read_wustl(WUSTL_FOLDER=WUSTL_FOLDER,
year=as.character(year),
month=format(date,"%m"),
geom=counties)
cat("complete.")
# ------------------ Perform pixel analysis on WUSTL raster ------------------
# Convert raster to data frame of points and determine which county each point (pixel) is in
cat("\n", format(date,"%B %Y"),": Converting WUSTL raster to points & determining county of each point...")
wustl_results <- wuslt_raster_analysis(wustl_raster=wustl_raster, counties=counties)
cat("complete.")
# ------------------ Perform land type analysis ------------------
# Pixel ID is assigned by looping through each polygon
# See the function cov_frac_df for more details on the code
cat("\n", format(date,"%B %Y"),": Computing fraction land use type in each WUSTL pixel...")
crop_results <- crop_extraction_wustl_polys(wustl_raster=wustl_raster, cropscape_raster=cropscape_raster)
cat("complete.")
# ------------------ Combine results and save as csv file ------------------
# Combine results
cat("\n", format(date,"%B %Y"),": Combining WUSTL and cropscape analyses...")
results_all <- left_join(wustl_results, crop_results, on=c("pixel_ID","SOIL")) %>%
dplyr::rename("dust(ug/m3)" = SOIL) # Rename column
# Save data frame as csv
output_filepath = paste0(OUTPUT_DIR,"/", format(date, "%Y%m"),".csv")
cat("\n", format(date,"%B %Y"),": Saving results as a csv file to ", output_filepath, "...")
write.csv(results_all, output_filepath, row.names=FALSE)
cat("\n", format(date,"%B %Y"),": COMPLETED ANALYSIS. Total time elapsed:", time_elapsed_pretty(start.time.month, Sys.time()), "\n")
}, year=year, cropscape_raster=cropscape_raster, WUSTL_FOLDER=WUSTL_FOLDER, counties=counties)
# ------------------ End of script. Stop cluster ------------------
cat("Completed analysis for", year, "\nTotal time elapsed:", time_elapsed_pretty(start.time, Sys.time())) # Print total time elapsed
stopCluster(cl) # Stop cluster