Skip to content

Commit

Permalink
Most of the required files are now included. Still need some work to …
Browse files Browse the repository at this point in the history
…put plots in a new file and QA/QC.
  • Loading branch information
brianburkenoaa committed May 4, 2024
1 parent 830b537 commit cf3d2d9
Show file tree
Hide file tree
Showing 8 changed files with 449 additions and 209 deletions.
176 changes: 0 additions & 176 deletions R/createStoredObjects.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,6 @@
# This code creates those datasets and saves them as RData files
# This only has to be run to update the data, not each time you want to use the code

library(reshape2)
library(sf)
library(ncdf4)

source('R/get_index.R') # This is not the same script as the app version
#source('scripts/dam_counts/run_fetch_FPC_counts_single_species.R')

#********************************************************
# get the land for plotting (wrap across antimeridian)
#********************************************************
Expand Down Expand Up @@ -45,172 +38,3 @@ oceanData_ERSST<-getOceanData(dataSet=dataSet,
save(x = "oceanData_ERSST", file = 'data/oceanSSTData.RData')
load('data/oceanSSTData.RData')

# SSH
dataSet='SSH'
# Full globe
# SSH longitude goes from 0.5 to 359.5, by 1
min.lon=0
max.lon=360
# SSH latitude goes from -74 to 65, by 1/3?
min.lat=-90
max.lat=90
years=seq(1980, 2022, 1)
months=seq(1,12,1)
removeBering=FALSE
returnDataType='anom'
returnObjectType='array'

oceanData_SSH<-getOceanData(dataSet=dataSet,
returnDataType=returnDataType, returnObjectType=returnObjectType,
min.lon=min.lon, max.lon=max.lon,
min.lat=min.lat, max.lat=max.lat,
years = years, months = months,
removeBering = removeBering)
save(x = "oceanData_SSH", file = 'data/oceanSSHData.RData')
load('data/oceanSSHData.RData')


# Code to extract dam counts and save to a file
# The data this retreives has errors in 2023 - I'm moving to DART as the source
# spCK <- fetch_FPC_counts_single_species(my_species="spCK", my_age="adult", Year_start = 1970, Year_end = 2023)
# spCK$val <- spCK$BON[[1]]
# spCK <- spCK[,c("Year","val")]
# colnames(spCK) <- c("year","spCK")
# faCK <- fetch_FPC_counts_single_species(my_species="faCK", my_age="adult", Year_start = 1970, Year_end = 2023)
# faCK$val <- faCK$BON[[1]]
# faCK <- faCK[,c("Year","val")]
# colnames(faCK) <- c("year","faCK")
# steel <- fetch_FPC_counts_single_species(my_species="steel", my_age="adult", Year_start = 1970, Year_end = 2023)
# steel$val <- steel$BON[[1]]
# steel <- steel[,c("Year","val")]
# colnames(steel) <- c("year","steel")
# response <- merge(merge(spCK, faCK), steel)
response <- read.csv("data/response/response.csv")

# lag response (this is now done in the app)
#response$year <- response$year - 2
response <- response[response$year %in% c(1970:2023), ]
save(x = "response", file = 'CMISSTapp/data/responseData.RData')
load(file = 'CMISSTapp/data/responseData.RData')

# Use MCN data as a test for user-defined files
spCK <- fetch_FPC_counts_single_species(my_species="spCK", my_age="adult")
spCK$val <- spCK$MCN[[1]]
response <- spCK[,c("Year","val")]
colnames(response) <- c("year","val")
# lag response
#response$year <- response$year - 2
response <- response[response$year %in% c(1981:2023), ]
write.csv(x = response, file = 'CMISSTapp/data/user_response.csv', row.names = FALSE)



#*******************************
#* PDO, NPGO, ONI, SSTarc, and PC1
#*******************************

pdo<-read.csv("https://oceanview.pfeg.noaa.gov/erddap/tabledap/cciea_OC_PDO.csv?time,PDO", header = TRUE, stringsAsFactors = FALSE)
pdo<-pdo[-1,]
pdo$PDO<-as.numeric(pdo$PDO)
pdo$year<-as.numeric(substr(pdo$time,1,4))
pdo$month <- as.numeric(substr(pdo$time,6,7))
pdo<-pdo[,c(3,4,2)]
colnames(pdo)[3]<-"index"

# Make the seasonal indices
pdo.win<-pdo
pdo.win<-pdo.win[pdo.win$month %in% 1:3,]
pdo.win<-aggregate(pdo.win$index, by=list(pdo.win$year), mean, na.rm=TRUE)
colnames(pdo.win)<-c("year","pdo.win")
pdo.spr<-pdo[pdo$month %in% 4:6,]
pdo.spr<-aggregate(pdo.spr$index, by=list(pdo.spr$year), mean, na.rm=TRUE)
colnames(pdo.spr)<-c("year","pdo.spr")
pdo.sum<-pdo[pdo$month %in% 7:9,]
pdo.sum<-aggregate(pdo.sum$index, by=list(pdo.sum$year), mean, na.rm=TRUE)
colnames(pdo.sum)<-c("year","pdo.sum")
pdo.aut<-pdo[pdo$month %in% 10:12,]
pdo.aut<-aggregate(pdo.aut$index, by=list(pdo.aut$year), mean, na.rm=TRUE)
colnames(pdo.aut)<-c("year","pdo.aut")

otherIndicators<-merge(merge(merge(pdo.win, pdo.spr), pdo.sum), pdo.aut)

# NPGO
npgo<-read.csv("https://oceanview.pfeg.noaa.gov/erddap/tabledap/cciea_OC_NPGO.csv?time,NPGO", header = TRUE, stringsAsFactors = FALSE)
npgo<-npgo[-1,]
npgo$NPGO<-as.numeric(npgo$NPGO)
npgo$year<-as.numeric(substr(npgo$time,1,4))
npgo$month <- as.numeric(substr(npgo$time,6,7))
npgo<-npgo[,c(3,4,2)]
colnames(npgo)[3]<-"index"
# Make the seasonal indices
npgo.win<-npgo
npgo.win<-npgo.win[npgo.win$month %in% 1:3,]
npgo.win<-aggregate(npgo.win$index, by=list(npgo.win$year), mean, na.rm=TRUE)
colnames(npgo.win)<-c("year","npgo.win")
npgo.spr<-npgo[npgo$month %in% 4:6,]
npgo.spr<-aggregate(npgo.spr$index, by=list(npgo.spr$year), mean, na.rm=TRUE)
colnames(npgo.spr)<-c("year","npgo.spr")
npgo.sum<-npgo[npgo$month %in% 7:9,]
npgo.sum<-aggregate(npgo.sum$index, by=list(npgo.sum$year), mean, na.rm=TRUE)
colnames(npgo.sum)<-c("year","npgo.sum")
npgo.aut<-npgo[npgo$month %in% 10:12,]
npgo.aut<-aggregate(npgo.aut$index, by=list(npgo.aut$year), mean, na.rm=TRUE)
colnames(npgo.aut)<-c("year","npgo.aut")

otherIndicators<-merge(merge(merge(merge(otherIndicators, npgo.win), npgo.spr), npgo.sum), npgo.aut)

# ONI
oni<-read.csv("https://oceanview.pfeg.noaa.gov/erddap/tabledap/cciea_OC_ONI.csv?time,ONI", header = TRUE, stringsAsFactors = FALSE)
oni<-oni[-1,]
oni$ONI<-as.numeric(oni$ONI)
oni$year<-as.numeric(substr(oni$time,1,4))
oni$month <- as.numeric(substr(oni$time,6,7))
oni<-oni[,c(3,4,2)]
colnames(oni)[3]<-"index"
# Make the seasonal indices
oni.win<-oni
oni.win<-oni.win[oni.win$month %in% 1:3,]
oni.win<-aggregate(oni.win$index, by=list(oni.win$year), mean, na.rm=TRUE)
colnames(oni.win)<-c("year","oni.win")
oni.spr<-oni[oni$month %in% 4:6,]
oni.spr<-aggregate(oni.spr$index, by=list(oni.spr$year), mean, na.rm=TRUE)
colnames(oni.spr)<-c("year","oni.spr")
oni.sum<-oni[oni$month %in% 7:9,]
oni.sum<-aggregate(oni.sum$index, by=list(oni.sum$year), mean, na.rm=TRUE)
colnames(oni.sum)<-c("year","oni.sum")
oni.aut<-oni[oni$month %in% 10:12,]
oni.aut<-aggregate(oni.aut$index, by=list(oni.aut$year), mean, na.rm=TRUE)
colnames(oni.aut)<-c("year","oni.aut")

otherIndicators<-merge(merge(merge(merge(otherIndicators, oni.win), oni.spr), oni.sum), oni.aut)

# SSTarc
arc<-read.csv("../OceanTeamData/scripts/indicators/ersstArc/ersstArc.anom.csv", header = TRUE, stringsAsFactors = FALSE)
colnames(arc)[3]<-"index"
# Make the seasonal indices
arc.win<-arc
arc.win<-arc.win[arc.win$month %in% 1:3,]
arc.win<-aggregate(arc.win$index, by=list(arc.win$year), mean, na.rm=TRUE)
colnames(arc.win)<-c("year","arc.win")
arc.spr<-arc[arc$month %in% 4:6,]
arc.spr<-aggregate(arc.spr$index, by=list(arc.spr$year), mean, na.rm=TRUE)
colnames(arc.spr)<-c("year","arc.spr")
arc.sum<-arc[arc$month %in% 7:9,]
arc.sum<-aggregate(arc.sum$index, by=list(arc.sum$year), mean, na.rm=TRUE)
colnames(arc.sum)<-c("year","arc.sum")
arc.aut<-arc[arc$month %in% 10:12,]
arc.aut<-aggregate(arc.aut$index, by=list(arc.aut$year), mean, na.rm=TRUE)
colnames(arc.aut)<-c("year","arc.aut")

otherIndicators<-merge(merge(merge(merge(otherIndicators, arc.win), arc.spr), arc.sum), arc.aut)

save(otherIndicators, file = 'CMISSTapp/data/otherIndicators.RData')

# If we add PC1 from the stoplight, we're going to have lots of NAs,
# so let's save it in a different file
# SSTarc
pc1<-read.csv("../../../indicators/2024/Stoplight_for_forecasts.csv", header = TRUE, stringsAsFactors = FALSE)
pc1<-pc1[,c("Year","PC1")]; colnames(pc1)[1]<-"year"

otherIndicators_pc1<-merge(otherIndicators, pc1)
save(otherIndicators_pc1, file = 'CMISSTapp/data/otherIndicators_pc1.RData')
10 changes: 10 additions & 0 deletions R/create_OceanData_Object.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,16 @@
# Extract data from global data sets
# User sets extent and return object type


#*****************************************
#*****************************************
# Not needed by most users. The resulting data
# have been stored in RData files, which will
# be updated annually by Brian Burke
#*****************************************
#*****************************************


# SST data are from ERSST (https://www.ncei.noaa.gov/pub/data/cmb/ersst/v5/netcdf/)
# ERSST is a 2x2 degree gloabal dataset

Expand Down
128 changes: 128 additions & 0 deletions R/crossValidation.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
# Run LOO cross-validation, removing each year sequentially and predicting that year

# library(ncdf4)
# library(RColorBrewer)
# library(sp)
# library(maptools)
# library(reshape2)
# library(ggplot2)
# library(sdmTMB)
# library(ggeffects)
# library(visreg)
# library(doBy)


#*************************************************************
# Create a large loop right here for cross-validation
#*************************************************************
LOO_CV <- function(response = response,
oceanData = oceanData, loocvYears = 5,
min.lon = min.lon, max.lon = max.lon,
min.lat = min.lat, max.lat = max.lat,
years = years, months = months,
includePDO = FALSE, includePC1 = FALSE) {

# Verify that the response is what we expect
if (ncol(response)!=2) { print("incorrect data - requires a 2-column data frame with year and the response"); return(NA) }
colnames(response)<-c("year","val")

year_mo<-data.frame(year=rep(years, each=length(months)), month=rep(months, length(years)),
label=paste(rep(years, each=length(months)), rep(months, length(years)), sep = "_"))

# Index the locations in the file
lons <- as.numeric(dimnames(oceanData)[[1]])
lats <- as.numeric(dimnames(oceanData)[[2]])
yr_mo <- dimnames(oceanData)[[3]]
lon.index<-which(lons >= min.lon & lons <= max.lon)
lat.index<-which(lats >= min.lat & lats <= max.lat)
yr_mo.index<-which(yr_mo %in% year_mo$label)
oceanData <- oceanData[lon.index, lat.index, yr_mo.index]

# Container for results
mae<-data.frame(model=as.character(), season=as.character(), year=as.numeric(),
response=as.numeric(), pred=as.numeric(),
mae=as.numeric(), stringsAsFactors = FALSE)
n<-length(years)
for (this_year in years[(n-(loocvYears-1)):n]) { # just the last "loocvYears" years
years.fit<-years[!years %in% this_year]
years.pred<-years[years %in% this_year]


#************************************************
# Create seasonal datasets
#************************************************

createSeasonalData_LOOCV<-function(oceanData, years = years, months = months, year_mo=year_mo, season=1) {

seasonal<-array(NA, c(dim(oceanData)[1], dim(oceanData)[2], length(years)), dimnames = list(dimnames(oceanData)[[1]], dimnames(oceanData)[[2]], years))
for (yy in 1:length(years)) {
if (season==1) seasonal[,,yy]<-(oceanData[,,year_mo$month == 1 & year_mo$year==years[yy]]+oceanData[,,year_mo$month == 2 & year_mo$year==years[yy]]+oceanData[,,year_mo$month == 3 & year_mo$year==years[yy]])/3
if (season==2) seasonal[,,yy]<-(oceanData[,,year_mo$month == 4 & year_mo$year==years[yy]]+oceanData[,,year_mo$month == 5 & year_mo$year==years[yy]]+oceanData[,,year_mo$month == 6 & year_mo$year==years[yy]])/3
if (season==3) seasonal[,,yy]<-(oceanData[,,year_mo$month == 7 & year_mo$year==years[yy]]+oceanData[,,year_mo$month == 8 & year_mo$year==years[yy]]+oceanData[,,year_mo$month == 9 & year_mo$year==years[yy]])/3
if (season==4) seasonal[,,yy]<-(oceanData[,,year_mo$month == 10 & year_mo$year==years[yy]]+oceanData[,,year_mo$month == 11 & year_mo$year==years[yy]]+oceanData[,,year_mo$month == 12 & year_mo$year==years[yy]])/3
}
# This scales (Z-score) the data cell-wise
# The aperm is needed because for some reason the apply function returns the third dimension (time) as the first dimension
oceanData.scl <- aperm(apply(seasonal, 1:2, scale), c(2,3,1))
dimnames(oceanData.scl)[[3]]<-years
return(oceanData.scl)
}

# Create the data by calling our function (returns scaled sst array and full dataset with fish)
oceanData.s1.scl <- createSeasonalData_LOOCV(oceanData = oceanData, years = years, months = months, year_mo=year_mo, season = 1)
oceanData.s2.scl <- createSeasonalData_LOOCV(oceanData = oceanData, years = years, months = months, year_mo=year_mo, season = 2)
oceanData.s3.scl <- createSeasonalData_LOOCV(oceanData = oceanData, years = years, months = months, year_mo=year_mo, season = 3)
oceanData.s4.scl <- createSeasonalData_LOOCV(oceanData = oceanData, years = years, months = months, year_mo=year_mo, season = 4)

# Get covariance between each cell's temperature and survival
covs1<-apply(oceanData.s1.scl[,,as.character(eval(years.fit))], 1:2, function(x) cov(x,response$val[years %in% years.fit], use="pairwise.complete.obs"))
covs2<-apply(oceanData.s2.scl[,,as.character(eval(years.fit))], 1:2, function(x) cov(x,response$val[years %in% years.fit], use="pairwise.complete.obs"))
covs3<-apply(oceanData.s3.scl[,,as.character(eval(years.fit))], 1:2, function(x) cov(x,response$val[years %in% years.fit], use="pairwise.complete.obs"))
covs4<-apply(oceanData.s4.scl[,,as.character(eval(years.fit))], 1:2, function(x) cov(x,response$val[years %in% years.fit], use="pairwise.complete.obs"))

#********************************************************************
# Create the index (how similar is each year to the covariance map)
# The covariance maps were created without the holdout year
# but we want to create the index value for all years, so we can forecast
#********************************************************************
coefs_cov<-NULL
options(na.action="na.omit")
for (tt in 1:dim(oceanData.s1.scl)[3])
coefs_cov<-rbind(coefs_cov, c(lm(as.vector(oceanData.s1.scl[,,tt]) ~ -1 + as.vector(covs1))$coef,
lm(as.vector(oceanData.s2.scl[,,tt]) ~ -1 + as.vector(covs2))$coef,
lm(as.vector(oceanData.s3.scl[,,tt]) ~ -1 + as.vector(covs3))$coef,
lm(as.vector(oceanData.s4.scl[,,tt]) ~ -1 + as.vector(covs4))$coef))
coefs_cov<-data.frame(coefs_cov)
coefs_cov$year<-years
index_cov<-cbind(coefs_cov,response$val)
colnames(index_cov)<-c("win.cov","spr.cov","sum.cov","aut.cov","year","val")

#*****************************************
# Calculate MAE
#*****************************************

index.fit<-index_cov[index_cov$year %in% years.fit,]
index.pred<-index_cov[index_cov$year %in% years.pred,]

# Need to loop over seasons
for(season in c("win","spr","sum","aut")) {

index.fit$var <- index.fit[,paste0(season,".cov")]
index.pred$var <- index.pred[,paste0(season,".cov")]
mdl<-lm(val~var, data=index.fit)
pred<-predict(mdl, newdata = index.pred)
#cov_mae<-sqrt(mean((pred - index.pred$val)^2)) # This is RMSEP
cov_mae<-mean(abs(pred - index.pred$val)) # This is MAE
# Assign it to the data.frame
mae<-rbind(mae, data.frame(model="cmisst", season=season, year=this_year,
response=index.pred$val, pred=pred,
mae=cov_mae, stringsAsFactors = FALSE))
} # End looping over season
} # End looping over years

se <- function(x) { return(sd(x)/sqrt(length(x))) }
mae$season<-factor(mae$season, levels = c("win","spr","sum","aut"))
# Trying this out - if I leave this line out, I might get all the individual predictions
if(exists("pred_out") & !pred_out) mae<-summaryBy(mae ~ model+season, data = mae, FUN = c(mean, se))
return(list(mae))
}
22 changes: 9 additions & 13 deletions R/get_index.R
Original file line number Diff line number Diff line change
Expand Up @@ -26,25 +26,17 @@ get_CMISST_index <- function(response, oceanData=oceanData_ERSST,
# Extract and scale the spatial data
#***************************************************************

# The original script loaded individual .nc files as needed
# The revised version, for shiny, loads a saved version of the
# full globe and subsets it.
# Data that needs to be loaded is in createStoredObjects.R

# if (dataSet == "ERSST") oceanData <- oceanData_ERSST
# if (dataSet == "SSH") oceanData <- oceanData_SSH

# Index the locations in the file
lons <- as.numeric(dimnames(oceanData)[[1]])
lats <- as.numeric(dimnames(oceanData)[[2]])
yr_mo <- dimnames(oceanData)[[3]]
lon.index<-which(lons >= min.lon & lons <= max.lon)
lat.index<-which(lats >= min.lat & lats <= max.lat)
yr_mo.index<-which(yr_mo %in% year_mo$label)
# Subset the ocean data with user-defined extent
oceanData <- oceanData[lon.index, lat.index, yr_mo.index]



# Create the function to calculate seasonal averages
createSeasonalData<-function(oceanData,
years = years, months = months, year_mo=year_mo, season=1) {
seasonal<-array(NA, c(dim(oceanData)[1], dim(oceanData)[2], length(years)), dimnames = list(dimnames(oceanData)[[1]], dimnames(oceanData)[[2]], years))
Expand Down Expand Up @@ -75,8 +67,6 @@ get_CMISST_index <- function(response, oceanData=oceanData_ERSST,

#********************************************************************
# Create the index (how similar is each year to the covariance map)
# For the PDO, I think they regress each year onto the pdo pattern
# But are there other methods of comparing similarity of matrices that we should think about?
#********************************************************************
coefs_cov<-NULL
options(na.action="na.omit")
Expand All @@ -87,9 +77,15 @@ get_CMISST_index <- function(response, oceanData=oceanData_ERSST,
lm(as.vector(oceanData.s4.scl[,,tt]) ~ -1 + as.vector(covs4))$coef))
coefs_cov<-data.frame(coefs_cov)
coefs_cov$year<-years
#index_cov<-cbind(coefs_cov,response$val)
index_cov<-merge(coefs_cov, response[response$year %in% years.fit,], all.x=TRUE)
colnames(index_cov)<-c("year","win.cov","spr.cov","sum.cov","aut.cov","val")

# Returns index as a list
# cmisst[[1]] contains 6 columns (as one list item): 4 seasonal indices, year, response
# cmisst[[2]] winter spatial covariance values (for maps)
# cmisst[[3]] spring spatial covariance values (for maps)
# cmisst[[4]] summer spatial covariance values (for maps)
# cmisst[[5]] autumn spatial covariance values (for maps)
# cmisst[[6]] lat, long min and max, 1 list item
return(list(index_cov, covs1, covs2, covs3, covs4, c(min.lat, max.lat, min.lon, max.lon)))
}
Loading

0 comments on commit cf3d2d9

Please sign in to comment.