In [51]:
Packages <- c("dplyr",  "nleqslv", "broom","cubature", "geosphere", "data.table",  "ggplot2", "bbmle", "stringr",  "lubridate", "RColorBrewer")

invisible(suppressPackageStartupMessages(lapply(Packages, library, character.only = TRUE)))

setwd('/local/home/katrinac/oceanography')
"%!in%" <- function(x,table) match(x,table, nomatch = 0) == 0
source("~/parentage/kernel_fitting/1340_loci/functions/ll_kt_both_bbmle.R")
source("~/parentage/kernel_fitting/1340_loci/functions/ll_kt_both_grid_search.R")
source("~/oceanography/scripts/neg_LL_biophys.R")
source("~/parentage/kernel_fitting/1340_loci/functions/GenGausKernInt_sum0.5.R") #integrate_kernel_sum1
source("~/parentage/kernel_fitting/1340_loci/functions/GenGausKernInt_sum1.R")
source("~/parentage/kernel_fitting/1340_loci/functions/cdf_solve.R") #median
source("~/parentage/kernel_fitting/1340_loci/functions/cdf_solve90.R") #dist 90% retained

#source("~/oceanography/scripts/PredictedProportions.R")

#read in the kernel fitting summary
kernels <- fread(file="~/parentage/kernel_fitting/1340_loci/final_results/tables/kernel_fitting_summary.csv")
kernel2012_14 <- fread(file="~/oceanography/empirical_data/genetics/GenKernelsForROMSComp2012-14.csv")

#read in the centroids adjusted for the simulation, so the Magbangons combined 
#centroids <- fread(file="~/oceanography/script_output/SurveyData/SimulationCentroids.csv")
Centroids <- fread(file="~/oceanography/empirical_data/site_centroids_SimTest.csv") 
Centroids$site <- gsub("_full", "", Centroids$site, fixed=TRUE)
Centroids$site <- gsub("_ten_per_cover", "", Centroids$site, fixed=TRUE)

Centroids <- Centroids %>%
    arrange(site)

#setorder(Centroids, site)#warning! This sets order based on site, and then lat/lon. So the table is not alphabetical by site, but that's fine as long as all of the "sampled_reef" vectors reflect this, so that reef_sizes, distance, and sampled_reefs match up by row/col index 
#read in the table with number of recruits sampled at each site for each year
AnnualRecsSamp <- fread(file="~/oceanography/script_output/SurveyData/AnnualRecruitsSampled.csv")
#read in the table of the proportion of anemones sampled at each site for each year
PropSamp <- unique(fread(file="~/oceanography/script_output/SurveyData/ProportionHabitatSampled.csv")[
    , .(site, year=end_year, prop_anem_samp=total_prop_hab_sampled_anems_tidied)][ #select and rename columns with the tideied data to use
    site %like% "Magbangon", site := "Magbangon"][ #collapse Magbangon values
    , prop_anem_samp := sum(prop_anem_samp), by=c("site", "year")], by=c("site", "year"))[ #collapse magbangons to match ROMS data
    site=="Sitio Lonas" & year %in% c(2012, 2013, 2014), prop_anem_samp :=1][site=="Caridad Proper" & year %in% c(2013, 2014), prop_anem_samp :=1]

### List of source locations
SitesSource <- Centroids

### List of destination locations
SitesDest <- Centroids

DistMatm <- distm(SitesSource[,c('lon','lat')], SitesSource[,c('lon','lat')], fun=distVincentyEllipsoid)
Distances <- DistMatm*10^-3
#read in the reef areas for the kernel fitting
Area <- fread("~/oceanography/empirical_data/site_area_header_nonsurveyed_simulation_kernels_test.csv") %>%
    arrange(site) %>%
    filter(site %!in% c("near_north_full1", "near_north_full2", "near_north_full3", "near_south_full1", "near_south_full2", "near_south_full3")) %>%
    mutate(kmsq=msq*10^-6)# %>%
    #select(kmsq) #need to uncomment for functions to work
Area$site <- gsub("_ten_per_cover", "", Area$site, fixed=TRUE)

reef_sizes <- as.matrix(Area$kmsq)

#make a site index table, use this for Sampled_reefs input in kernel fitting
SiteIndex <- unique(Centroids %>% arrange(site), by="site")[, index := .I] #add the row number as the unique site index, leave CAI in if fitting a kernel 
SiteIndexBioPhys <- unique(Centroids %>% arrange(site), by="site")[site != "CAI" ][, index := .I] #add the row number as the unique site index, take CAI out for biophysical likelihood function

#make a table with the survey information for each site (how many fish sampled, prop anems sampled, total number of anems at site)
SurveyData <- AnnualRecsSamp[PropSamp, on=.(year, site)][#join the sampling tables together
    is.na(n_offs_gen), n_offs_gen := 0]#change NA's to 0
#setnames(SurveyData, c("PropAnemSamp", "TotalAnems"), c("prop_anem_samp", "total_anems"))
#setkey(SurveyData, site)
#check all sites are represented in centroids and area (and indirectly distances, which comes from centroids)
#Area[site %!in% centroids$site] #should be nothing

#Allison's abundance time series data 
#download.file(url = "https://github.com/pinskylab/Clownfish_persistence/blob/master/Data/Script_outputs/females_df_F.RData?raw=true", destfile = "~/oceanography/empirical_data/genetics/females_df_F.RData")
load("~/oceanography/empirical_data/genetics/females_df_F.RData")
Abundance <- as.data.table(females_df_F)
setnames(Abundance, "nF", "num_females")
Abundance <- unique(Abundance[site %like% "Magbangon", site := "Magbangon"][ #collapse Magbangon values
            , num_females := sum(num_females), by=c("site", "year")], by=c("site", "year"))
#join the survey sampling tables together
SurveyData <- AnnualRecsSamp[PropSamp, on=.(year, site)][
    is.na(n_offs_gen), n_offs_gen := 0]#change NA's to 0


SurveyData <- Abundance[, c("year", "site", "num_females")][SurveyData, on=.(year, site)]#join in Allison's estimate of female abundance. There are NA values, but that's okay we can figure those out when we start thinking about incorporating uncertainty in this
#quick check that all components are in the same, alphabetical order
sum(which(SiteIndex$site==Area$site)==FALSE) #needs to be 0!! sites have to be in the same order
sum(which(Area$site==Centroids$site)==FALSE) #needs to be 0!! sites have to be in the same order

#read in genetic parentage matrices for each time frame MATCHING DIMENSIONS WITH BIOPHYSICAL FOR BIOPHYSICAL LIKELIHOOD- NOT KERNEL FITTING
GenMat2012_4 <- as.matrix(fread(file="~/oceanography/script_output/SurveyData/20210625_ParentageMatrix2012-14ForROMSComp.csv"))
GenMat2012 <- as.matrix(fread(file="~/oceanography/script_output/SurveyData/20210625_ParentageMatrix2012ForROMSComp.csv"))
GenMat2013 <- as.matrix(fread(file="~/oceanography/script_output/SurveyData/20210625_ParentageMatrix2013ForROMSComp.csv"))
GenMat2014 <- as.matrix(fread(file="~/oceanography/script_output/SurveyData/20210625_ParentageMatrix2014ForROMSComp.csv"))
GenMatNEM <- as.matrix(fread(file="~/oceanography/script_output/SurveyData/20210625_ParentageMatrixNEM2012-14ForROMSComp.csv"))
GenMatSWM <- as.matrix(fread(file="~/oceanography/script_output/SurveyData/20210625_ParentageMatrixSWM2012-14ForROMSComp.csv"))

#read in genetic parentage matrices for each time frame FOR KERNEL FITTING
KernelGenMat2012_4 <- as.matrix(fread(file="~/oceanography/script_output/SurveyData/20210701_KernelParentageMatrix2012-14ForROMSComp.csv"))
KernelGenMat2012 <- as.matrix(fread(file="~/oceanography/script_output/SurveyData/20210701_KernelParentageMatrix2012ForROMSComp.csv"))
KernelGenMat2013 <- as.matrix(fread(file="~/oceanography/script_output/SurveyData/20210701_KernelParentageMatrix2013ForROMSComp.csv"))
KernelGenMat2014 <- as.matrix(fread(file="~/oceanography/script_output/SurveyData/20210701_KernelParentageMatrix2014ForROMSComp.csv"))
KernelGenMatNEM <- as.matrix(fread(file="~/oceanography/script_output/SurveyData/20210701_KernelParentageMatrixNEM2012-14ForROMSComp.csv"))
KernelGenMatSWM <- as.matrix(fread(file="~/oceanography/script_output/SurveyData/20210701_KernelParentageMatrixSWM2012-14ForROMSComp.csv"))

##read in biophysical data for biophysical likelihood
FullBiophysMatNorm <- as.matrix(fread(file="~/oceanography/script_output/ROMSDataTables/20210701_BioPhysNormConnMatrix2012-14ForROMSComp.csv"))
AnnualBiophysMatNorm2012 <- as.matrix(fread(file="~/oceanography/script_output/ROMSDataTables/20210701_BioPhysNormConnMatrix2012ForROMSComp.csv"))
AnnualBiophysMatNorm2013 <- as.matrix(fread(file="~/oceanography/script_output/ROMSDataTables/20210701_BioPhysNormConnMatrix2013ForROMSComp.csv"))
AnnualBiophysMatNorm2014 <- as.matrix(fread(file="~/oceanography/script_output/ROMSDataTables/20210701_BioPhysNormConnMatrix2014ForROMSComp.csv"))
MonsoonBiophysMatNormNEM <- as.matrix(fread(file="~/oceanography/script_output/ROMSDataTables/20210701_BioPhysNormConnMatrixNEM2012-14ForROMSComp.csv"))
MonsoonBiophysMatNormSWM <-as.matrix(fread(file="~/oceanography/script_output/ROMSDataTables/20210701_BioPhysNormConnMatrixSWM2012-14ForROMSComp.csv"))

##read in biophysical data for KERNEL FITTING
#FullBiophysMat <- as.matrix(fread(file="~/oceanography/script_output/ROMSDataTables/20210701_BioPhysParentageMatrix2012-14ForROMSComp.csv"))
#AnnualBiophysMat2012 <- as.matrix(fread(file="~/oceanography/script_output/ROMSDataTables/20210701_BioPhysParentageMatrix2012ForROMSComp.csv"))
#AnnualBiophysMat2013 <- as.matrix(fread(file="~/oceanography/script_output/ROMSDataTables/20210701_BioPhysParentageMatrix2013ForROMSComp.csv"))
#AnnualBiophysMat2014 <- as.matrix(fread(file="~/oceanography/script_output/ROMSDataTables/20210701_BioPhysParentageMatrix2014ForROMSComp.csv"))
#MonsoonBiophysMatNEM <- as.matrix(fread(file="~/oceanography/script_output/ROMSDataTables/20210701_BioPhysParentageMatrixNEM2012-14ForROMSComp.csv"))
#MonsoonBiophysMatSWM <- as.matrix(fread(file="~/oceanography/script_output/ROMSDataTables/20210701_BioPhysParentageMatrixSWM2012-14ForROMSComp.csv"))

##15 DAY PLD SIMULATIONS
##read in biophysical data for biophysical likelihood
#FullBiophysMatNorm <- as.matrix(fread(file="~/oceanography/script_output/ROMSDataTables/20210917_BioPhysNormConnMatrix2012-14ForROMSComp15DayPLD.csv"))
#AnnualBiophysMatNorm2012 <- as.matrix(fread(file="~/oceanography/script_output/ROMSDataTables/20210917_BioPhysNormConnMatrix2012ForROMSComp15DayPLD.csv"))
#AnnualBiophysMatNorm2013 <- as.matrix(fread(file="~/oceanography/script_output/ROMSDataTables/20210917_BioPhysNormConnMatrix2013ForROMSComp15DayPLD.csv"))
#AnnualBiophysMatNorm2014 <- as.matrix(fread(file="~/oceanography/script_output/ROMSDataTables/20210917_BioPhysNormConnMatrix2014ForROMSComp15DayPLD.csv"))
#MonsoonBiophysMatNormNEM <- as.matrix(fread(file="~/oceanography/script_output/ROMSDataTables/20210917_BioPhysNormConnMatrixNEM2012-14ForROMSComp15DayPLD.csv"))
#MonsoonBiophysMatNormSWM <-as.matrix(fread(file="~/oceanography/script_output/ROMSDataTables/20210917_BioPhysNormConnMatrixSWM2012-14ForROMSComp15DayPLD.csv"))

#read in biophysical data for KERNEL FITTING
FullBiophysMat <- as.matrix(fread(file="~/oceanography/script_output/ROMSDataTables/20210917_BioPhysParentageMatrix2012-14ForROMSComp15DayPLD.csv"))
AnnualBiophysMat2012 <- as.matrix(fread(file="~/oceanography/script_output/ROMSDataTables/20210917_BioPhysParentageMatrix2012ForROMSComp15DayPLD.csv"))
AnnualBiophysMat2013 <- as.matrix(fread(file="~/oceanography/script_output/ROMSDataTables/20210917_BioPhysParentageMatrix2013ForROMSComp15DayPLD.csv"))
AnnualBiophysMat2014 <- as.matrix(fread(file="~/oceanography/script_output/ROMSDataTables/20210917_BioPhysParentageMatrix2014ForROMSComp15DayPLD.csv"))
MonsoonBiophysMatNEM <- as.matrix(fread(file="~/oceanography/script_output/ROMSDataTables/20210917_BioPhysParentageMatrixNEM2012-14ForROMSComp15DayPLD.csv"))
MonsoonBiophysMatSWM <- as.matrix(fread(file="~/oceanography/script_output/ROMSDataTables/20210917_BioPhysParentageMatrixSWM2012-14ForROMSComp15DayPLD.csv"))

In [6]:
mean(Distances[2,]) #avg distance to CAI


In [2]:


#fit the kernels, get the biophysical data together
biophys_par_data2012 <- list(Distances=Distances, Assignments=AnnualBiophysMat2012, Sampled_reefs=t(as.matrix(SiteIndex[site %in% SurveyData[year==2012 & prop_anem_samp >0 , site], .(index)])), 
                  Reef_sizes=reef_sizes, Adult_sample_proportions=matrix(nrow=ncol(AnnualBiophysMat2012), ncol=1, 1))
Sim2012Fit <- suppressWarnings(mle2(LL_kt_bbmle, start=list(k=-3, theta=1), lower=c(-10, 0.15), upper=c(10, 5), method="L-BFGS-B", data=biophys_par_data2012, control=list(maxit=500)))

biophys_par_data2013 <- list(Distances=Distances, Assignments=AnnualBiophysMat2013, Sampled_reefs=t(as.matrix(SiteIndex[site %in% SurveyData[year==2013 & prop_anem_samp >0 , site], .(index)])), 
                  Reef_sizes=reef_sizes, Adult_sample_proportions=matrix(nrow=ncol(AnnualBiophysMat2013), ncol=1, 1))
Sim2013Fit <- suppressWarnings(mle2(LL_kt_bbmle, start=list(k=-3, theta=1), lower=c(-10, 0.15), upper=c(10, 5), method="L-BFGS-B", data=biophys_par_data2013, control=list(maxit=500)))

biophys_par_data2014 <- list(Distances=Distances, Assignments=AnnualBiophysMat2014, Sampled_reefs=t(as.matrix(SiteIndex[site %in% SurveyData[year==2014 & prop_anem_samp >0 , site], .(index)])), 
                  Reef_sizes=reef_sizes, Adult_sample_proportions=matrix(nrow=ncol(AnnualBiophysMat2014), ncol=1, 1))
Sim2014Fit <- suppressWarnings(mle2(LL_kt_bbmle, start=list(k=-3, theta=1), lower=c(-10, 0.15), upper=c(10, 5), method="L-BFGS-B", data=biophys_par_data2014, control=list(maxit=500)))

biophys_par_data2012_4 <- list(Distances=Distances, Assignments=FullBiophysMat, Sampled_reefs=t(as.matrix(SiteIndex[site %in% SurveyData[year==2014 & prop_anem_samp >0 , site], .(index)])), 
                  Reef_sizes=reef_sizes, Adult_sample_proportions=matrix(nrow=ncol(FullBiophysMat), ncol=1, 1))
Sim2012_4Fit <- suppressWarnings(mle2(LL_kt_bbmle, start=list(k=-3, theta=1), lower=c(-10, 0.15), upper=c(10, 5), method="L-BFGS-B", data=biophys_par_data2012_4, control=list(maxit=500)))

biophys_par_dataNEM <- list(Distances=Distances, Assignments=MonsoonBiophysMatNEM, Sampled_reefs=t(as.matrix(SiteIndex[site %in% SurveyData[year==2014 & prop_anem_samp >0 , site], .(index)])), 
                  Reef_sizes=reef_sizes, Adult_sample_proportions=matrix(nrow=ncol(MonsoonBiophysMatNEM), ncol=1, 1))
SimNEMFit <- suppressWarnings(mle2(LL_kt_bbmle, start=list(k=-3, theta=1), lower=c(-10, 0.15), upper=c(10, 5), method="L-BFGS-B", data=biophys_par_dataNEM, control=list(maxit=500)))

biophys_par_dataSWM <- list(Distances=Distances, Assignments=MonsoonBiophysMatSWM, Sampled_reefs=t(as.matrix(SiteIndex[site %in% SurveyData[year==2014 & prop_anem_samp >0 , site], .(index)])), 
                  Reef_sizes=reef_sizes, Adult_sample_proportions=matrix(nrow=ncol(MonsoonBiophysMatSWM), ncol=1, 1))
SimSWMFit <- suppressWarnings(mle2(LL_kt_bbmle, start=list(k=-3, theta=1), lower=c(-10, 0.15), upper=c(10, 5), method="L-BFGS-B", data=biophys_par_dataSWM, control=list(maxit=500)))



In [8]:
summary(Sim2012Fit)
summary(Gen2012Fit)

Maximum likelihood estimation

Call:
mle2(minuslogl = LL_kt_bbmle, start = list(k = -3, theta = 1), 
    method = "L-BFGS-B", data = biophys_par_data2012, lower = c(-10, 
        0.15), upper = c(10, 8), control = list(maxit = 500))

Coefficients:
        Estimate Std. Error z value     Pr(z)    
k     -2.1643785  0.0027103 -798.58 < 2.2e-16 ***
theta  4.9907350  0.0474163  105.25 < 2.2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

-2 log L: 297166.3 

Maximum likelihood estimation

Call:
mle2(minuslogl = LL_kt_bbmle, start = list(k = -3, theta = 1), 
    method = "L-BFGS-B", data = par_data2012, lower = c(-10, 
        0.15), upper = c(10, 8), control = list(maxit = 500))

Coefficients:
      Estimate Std. Error z value  Pr(z)    
k     -3.27091    0.33591 -9.7375 <2e-16 ***
theta  3.52877    3.36407  1.0490 0.2942    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

-2 log L: 34.52999 

In [40]:
Area

site,msq,kmsq
Cabatoan,53004,0.053004
CAI,61575000,61.575
Caridad Cemetery,7963,0.007963
Caridad Proper,7829,0.007829
cuatras_islas_central,2000000,2.0
cuatras_islas_northeast,1500000,1.5
cuatras_islas_northwest,2500000,2.5
cuatras_islas_south,2500000,2.5
Elementary School,14214,0.014214
Gabas,3015,0.003015


In [5]:

#make a summary table for all time frames of simulation data
SimKernels <- as.data.frame(matrix(NA, nrow = 6, ncol = 8))
setDT(SimKernels)
setnames(SimKernels, c("time_scale", "time_id", "k", "theta", "mdd", "med", "dist90", "log_like"))
#SimKernels <- data.table(time_scale=character(), time_id=character(), k=numeric(), theta=numeric(), mdd=numeric(), med=numeric(), dist90=numeric())


BestK2012 <- as.numeric(coef(Sim2012Fit)[1])
BestTheta2012 <- as.numeric(coef(Sim2012Fit)[2])
MDD2012 <- as.numeric(cubintegrate(integrate_kernel_sum1, lower = 0, upper = Inf, k=BestK2012, theta=BestTheta2012, , method = "pcubature")$integral)
k_eval <- BestK2012
theta_eval <- BestTheta2012
Med2012  <- round(nleqslv(x = 7, fn = cdf_solve)$x, 2) 
Dist90_2012 <- round(nleqslv(x = 7, fn = cdf_solve90)$x, 2)

SimKernels$time_scale[1] <- "annual"
SimKernels$time_id[1] <- "2012"
SimKernels$k[1] <-  BestK2012
SimKernels$theta[1] <- BestTheta2012
SimKernels$mdd[1] <- MDD2012
SimKernels$med[1] <- Med2012
SimKernels$dist90[1] <- Dist90_2012
SimKernels$log_like[1] <- logLik(Sim2012Fit)[1]

BestK2013 <- as.numeric(coef(Sim2013Fit)[1])
BestTheta2013 <- as.numeric(coef(Sim2013Fit)[2])
MDD2013 <- as.numeric(cubintegrate(integrate_kernel_sum1, lower = 0, upper = Inf, k=BestK2013, theta=BestTheta2013, , method = "pcubature")$integral)
k_eval <- BestK2013
theta_eval <- BestTheta2013
Med2013  <- round(nleqslv(x = 7, fn = cdf_solve)$x, 2) 
Dist90_2013 <- round(nleqslv(x = 7, fn = cdf_solve90)$x, 2)

SimKernels$time_scale[2] <- "annual"
SimKernels$time_id[2] <- "2013"
SimKernels$k[2] <-  BestK2013
SimKernels$theta[2] <- BestTheta2013
SimKernels$mdd[2] <- MDD2013
SimKernels$med[2] <- Med2013
SimKernels$dist90[2] <- Dist90_2013
SimKernels$log_like[2] <- logLik(Sim2013Fit)[1]


BestK2014 <- as.numeric(coef(Sim2014Fit)[1])
BestTheta2014 <- as.numeric(coef(Sim2014Fit)[2])
MDD2014 <- as.numeric(cubintegrate(integrate_kernel_sum1, lower = 0, upper = Inf, k=BestK2014, theta=BestTheta2014, , method = "pcubature")$integral)
k_eval <- BestK2014
theta_eval <- BestTheta2014
Med2014  <- round(nleqslv(x = 7, fn = cdf_solve)$x, 2) 
Dist90_2014 <- round(nleqslv(x = 7, fn = cdf_solve90)$x, 2)

SimKernels$time_scale[3] <- "annual"
SimKernels$time_id[3] <- "2014"
SimKernels$k[3] <-  BestK2014
SimKernels$theta[3] <- BestTheta2014
SimKernels$mdd[3] <- MDD2014
SimKernels$med[3] <- Med2014
SimKernels$dist90[3] <- Dist90_2014
SimKernels$log_like[3] <- logLik(Sim2014Fit)[1]

BestK2012_4 <- as.numeric(coef(Sim2012_4Fit)[1])
BestTheta2012_4 <- as.numeric(coef(Sim2012_4Fit)[2])
MDD2012_4 <- as.numeric(cubintegrate(integrate_kernel_sum1, lower = 0, upper = Inf, k=BestK2012_4, theta=BestTheta2012_4, , method = "pcubature")$integral)
k_eval <- BestK2012_4
theta_eval <- BestTheta2012_4
Med2012_4  <- round(nleqslv(x = 7, fn = cdf_solve)$x, 2) 
Dist90_2012_4 <- round(nleqslv(x = 7, fn = cdf_solve90)$x, 2)

SimKernels$time_scale[4] <- "interannual"
SimKernels$time_id[4] <- "2012-4"
SimKernels$k[4] <-  BestK2012_4
SimKernels$theta[4] <- BestTheta2012_4
SimKernels$mdd[4] <- MDD2012_4
SimKernels$med[4] <- Med2012_4
SimKernels$dist90[4] <- Dist90_2012_4
SimKernels$log_like[4] <- logLik(Sim2012_4Fit)[1]

BestKNEM <- as.numeric(coef(SimNEMFit)[1])
BestThetaNEM <- as.numeric(coef(SimNEMFit)[2])
MDDNEM <- as.numeric(cubintegrate(integrate_kernel_sum1, lower = 0, upper = Inf, k=BestKNEM, theta=BestThetaNEM, , method = "pcubature")$integral)
k_eval <- BestKNEM
theta_eval <- BestThetaNEM
MedNEM  <- round(nleqslv(x = 7, fn = cdf_solve)$x, 2) 
Dist90_NEM <- round(nleqslv(x = 7, fn = cdf_solve90)$x, 2)

SimKernels$time_scale[5] <- "season"
SimKernels$time_id[5] <- "NEM"
SimKernels$k[5] <-  BestKNEM
SimKernels$theta[5] <- BestThetaNEM
SimKernels$mdd[5] <- MDDNEM
SimKernels$med[5] <- MedNEM
SimKernels$dist90[5] <- Dist90_NEM
SimKernels$log_like[5] <- logLik(SimNEMFit)[1]

BestKSWM <- as.numeric(coef(SimSWMFit)[1])
BestThetaSWM <- as.numeric(coef(SimSWMFit)[2])
MDDSWM <- as.numeric(cubintegrate(integrate_kernel_sum1, lower = 0, upper = Inf, k=BestKSWM, theta=BestThetaSWM, , method = "pcubature")$integral)
k_eval <- BestKSWM
theta_eval <- BestThetaSWM
MedSWM  <- round(nleqslv(x = 7, fn = cdf_solve)$x, 2) 
Dist90_SWM <- round(nleqslv(x = 7, fn = cdf_solve90)$x, 2)

SimKernels$time_scale[6] <- "season"
SimKernels$time_id[6] <- "SWM"
SimKernels$k[6] <-  BestKSWM
SimKernels$theta[6] <- BestThetaSWM
SimKernels$mdd[6] <- MDDSWM
SimKernels$med[6] <- MedSWM
SimKernels$dist90[6] <- Dist90_SWM
SimKernels$log_like[6] <- logLik(SimSWMFit)[1]

SimKernels[, `:=`(k=round(k, 5), theta=round(theta, 5), mdd=round(mdd, 2), med=round(med, 2), dist90=round(dist90, 2), data_source="simulation"), by="time_id"]

#fwrite(SimKernels, file="~/oceanography/script_output/KernelFits/SimulationKernelFits15DayPLD.csv")


SimKernels


time_scale,time_id,k,theta,mdd,med,dist90,log_like,data_source
annual,2012,-2.05116,0.94557,8.45,5.73,19.64,-50575.5,simulation
annual,2013,-1.53911,0.75976,7.63,4.67,18.34,-180189.17,simulation
annual,2014,-0.69775,0.63176,5.28,2.91,13.02,-194862.15,simulation
interannual,2012-4,-1.21595,0.70093,6.68,3.92,16.25,-593694.0,simulation
season,NEM,-1.26172,0.72426,6.45,3.85,15.62,-560379.23,simulation
season,SWM,-1.4255,0.6099,12.16,6.57,30.1,-28908.76,simulation


In [3]:
#genetic data with same reef metadata structure as model

par_data2012 <- list(Distances=Distances, Assignments=KernelGenMat2012, Sampled_reefs=t(as.matrix(SiteIndex[site %in% SurveyData[year==2012 & prop_anem_samp >0 , site], .(index)])), 
                  Reef_sizes=reef_sizes, Adult_sample_proportions=as.matrix(SurveyData[year==2012 & prop_anem_samp >0, .(prop_anem_samp)]))
Gen2012Fit <- suppressWarnings(mle2(LL_kt_bbmle, start=list(k=-3, theta=1), lower=c(-10, 0.15), upper=c(10, 5), method="L-BFGS-B", data=par_data2012, control=list(maxit=500)))

par_data2013 <- list(Distances=Distances, Assignments=KernelGenMat2013, Sampled_reefs=t(as.matrix(SiteIndex[site %in% SurveyData[year==2013 & prop_anem_samp >0 , site], .(index)])), 
                  Reef_sizes=reef_sizes, Adult_sample_proportions=as.matrix(SurveyData[year==2013 & prop_anem_samp >0, .(prop_anem_samp)]))
Gen2013Fit <- suppressWarnings(mle2(LL_kt_bbmle, start=list(k=-3, theta=1), lower=c(-10, 0.15), upper=c(10, 5), method="L-BFGS-B", data=par_data2013, control=list(maxit=500)))

par_data2014 <- list(Distances=Distances, Assignments=KernelGenMat2014, Sampled_reefs=t(as.matrix(SiteIndex[site %in% SurveyData[year==2014 & prop_anem_samp >0 , site], .(index)])), 
                  Reef_sizes=reef_sizes, Adult_sample_proportions=as.matrix(SurveyData[year==2014 & prop_anem_samp >0, .(prop_anem_samp)]))
Gen2014Fit <- suppressWarnings(mle2(LL_kt_bbmle, start=list(k=-3, theta=1), lower=c(-10, 0.15), upper=c(10, 5), method="L-BFGS-B", data=par_data2014, control=list(maxit=500)))

par_data2012_4 <- list(Distances=Distances, Assignments=KernelGenMat2012_4, Sampled_reefs=t(as.matrix(SiteIndex[site %in% SurveyData[year==2014 & prop_anem_samp >0 , site], .(index)])), 
                  Reef_sizes=reef_sizes, Adult_sample_proportions=as.matrix(SurveyData[year==2014 & prop_anem_samp >0, .(prop_anem_samp)]))
Gen2012_4Fit <- suppressWarnings(mle2(LL_kt_bbmle, start=list(k=-3, theta=1), lower=c(-10, 0.15), upper=c(10, 5), method="L-BFGS-B", data=par_data2012_4, control=list(maxit=500)))

par_dataNEM <- list(Distances=Distances, Assignments=KernelGenMatNEM, Sampled_reefs=t(as.matrix(SiteIndex[site %in% SurveyData[year==2014 & prop_anem_samp >0 , site], .(index)])), 
                  Reef_sizes=reef_sizes, Adult_sample_proportions=as.matrix(SurveyData[year==2014 & prop_anem_samp >0, .(prop_anem_samp)]))
GenNEMFit <- suppressWarnings(mle2(LL_kt_bbmle, start=list(k=-3, theta=1), lower=c(-10, 0.15), upper=c(10, 5), method="L-BFGS-B", data=par_dataNEM, control=list(maxit=500)))

par_dataSWM <- list(Distances=Distances, Assignments=KernelGenMatSWM, Sampled_reefs=t(as.matrix(SiteIndex[site %in% SurveyData[year==2014 & prop_anem_samp >0 , site], .(index)])), 
                  Reef_sizes=reef_sizes, Adult_sample_proportions=as.matrix(SurveyData[year==2014 & prop_anem_samp >0, .(prop_anem_samp)]))
GenSWMFit <- suppressWarnings(mle2(LL_kt_bbmle, start=list(k=-3, theta=1), lower=c(-10, 0.15), upper=c(10, 5), method="L-BFGS-B", data=par_dataSWM, control=list(maxit=500)))



In [17]:
#make a summary table for all time frames of genetic data
GenKernels <- as.data.frame(matrix(NA, nrow = 6, ncol = 8))
setDT(GenKernels)
setnames(GenKernels, c("time_scale", "time_id", "k", "theta", "mdd", "med", "dist90", "log_like"))
#GenKernels <- data.table(time_scale=character(), time_id=character(), k=numeric(), theta=numeric(), mdd=numeric(), med=numeric(), dist90=numeric())


BestK2012 <- as.numeric(coef(Gen2012Fit)[1])
BestTheta2012 <- as.numeric(coef(Gen2012Fit)[2])
MDD2012 <- as.numeric(cubintegrate(integrate_kernel_sum1, lower = 0, upper = Inf, k=BestK2012, theta=BestTheta2012, , method = "pcubature")$integral)
k_eval <- BestK2012
theta_eval <- BestTheta2012
Med2012  <- round(nleqslv(x = 7, fn = cdf_solve)$x, 2) 
Dist90_2012 <- round(nleqslv(x = 7, fn = cdf_solve90)$x, 2)

GenKernels$time_scale[1] <- "annual"
GenKernels$time_id[1] <- "2012"
GenKernels$k[1] <-  BestK2012
GenKernels$theta[1] <- BestTheta2012
GenKernels$mdd[1] <- MDD2012
GenKernels$med[1] <- Med2012
GenKernels$dist90[1] <- Dist90_2012
GenKernels$log_like[1] <- logLik(Gen2012Fit)[1]

BestK2013 <- as.numeric(coef(Gen2013Fit)[1])
BestTheta2013 <- as.numeric(coef(Gen2013Fit)[2])
MDD2013 <- as.numeric(cubintegrate(integrate_kernel_sum1, lower = 0, upper = Inf, k=BestK2013, theta=BestTheta2013, , method = "pcubature")$integral)
k_eval <- BestK2013
theta_eval <- BestTheta2013
Med2013  <- round(nleqslv(x = 7, fn = cdf_solve)$x, 2) 
Dist90_2013 <- round(nleqslv(x = 7, fn = cdf_solve90)$x, 2)

GenKernels$time_scale[2] <- "annual"
GenKernels$time_id[2] <- "2013"
GenKernels$k[2] <-  BestK2013
GenKernels$theta[2] <- BestTheta2013
GenKernels$mdd[2] <- MDD2013
GenKernels$med[2] <- Med2013
GenKernels$dist90[2] <- Dist90_2013
GenKernels$log_like[2] <- logLik(Gen2013Fit)[1]

BestK2014 <- as.numeric(coef(Gen2014Fit)[1])
BestTheta2014 <- as.numeric(coef(Gen2014Fit)[2])
MDD2014 <- as.numeric(cubintegrate(integrate_kernel_sum1, lower = 0, upper = Inf, k=BestK2014, theta=BestTheta2014, , method = "pcubature")$integral)
k_eval <- BestK2014
theta_eval <- BestTheta2014
Med2014  <- round(nleqslv(x = 7, fn = cdf_solve)$x, 2) 
Dist90_2014 <- round(nleqslv(x = 7, fn = cdf_solve90)$x, 2)

GenKernels$time_scale[3] <- "annual"
GenKernels$time_id[3] <- "2014"
GenKernels$k[3] <-  BestK2014
GenKernels$theta[3] <- BestTheta2014
GenKernels$mdd[3] <- MDD2014
GenKernels$med[3] <- Med2014
GenKernels$dist90[3] <- Dist90_2014
GenKernels$log_like[3] <- logLik(Gen2014Fit)[1]

BestK2012_4 <- as.numeric(coef(Gen2012_4Fit)[1])
BestTheta2012_4 <- as.numeric(coef(Gen2012_4Fit)[2])
MDD2012_4 <- as.numeric(cubintegrate(integrate_kernel_sum1, lower = 0, upper = Inf, k=BestK2012_4, theta=BestTheta2012_4, , method = "pcubature")$integral)
k_eval <- BestK2012_4
theta_eval <- BestTheta2012_4
Med2012_4  <- round(nleqslv(x = 7, fn = cdf_solve)$x, 2) 
Dist90_2012_4 <- round(nleqslv(x = 7, fn = cdf_solve90)$x, 2)

GenKernels$time_scale[4] <- "interannual"
GenKernels$time_id[4] <- "2012-4"
GenKernels$k[4] <-  BestK2012_4
GenKernels$theta[4] <- BestTheta2012_4
GenKernels$mdd[4] <- MDD2012_4
GenKernels$med[4] <- Med2012_4
GenKernels$dist90[4] <- Dist90_2012_4
GenKernels$log_like[4] <- logLik(Gen2012_4Fit)[1]

BestKNEM <- as.numeric(coef(GenNEMFit)[1])
BestThetaNEM <- as.numeric(coef(GenNEMFit)[2])
MDDNEM <- as.numeric(cubintegrate(integrate_kernel_sum1, lower = 0, upper = Inf, k=BestKNEM, theta=BestThetaNEM, , method = "pcubature")$integral)
k_eval <- BestKNEM
theta_eval <- BestThetaNEM
MedNEM  <- round(nleqslv(x = 7, fn = cdf_solve)$x, 2) 
Dist90_NEM <- round(nleqslv(x = 7, fn = cdf_solve90)$x, 2)

GenKernels$time_scale[5] <- "season"
GenKernels$time_id[5] <- "NEM"
GenKernels$k[5] <-  BestKNEM
GenKernels$theta[5] <- BestThetaNEM
GenKernels$mdd[5] <- MDDNEM
GenKernels$med[5] <- MedNEM
GenKernels$dist90[5] <- Dist90_NEM
GenKernels$log_like[5] <- logLik(GenNEMFit)[1]

BestKSWM <- as.numeric(coef(GenSWMFit)[1])
BestThetaSWM <- as.numeric(coef(GenSWMFit)[2])
MDDSWM <- as.numeric(cubintegrate(integrate_kernel_sum1, lower = 0, upper = Inf, k=BestKSWM, theta=BestThetaSWM, , method = "pcubature")$integral)
k_eval <- BestKSWM
theta_eval <- BestThetaSWM
MedSWM  <- round(nleqslv(x = 7, fn = cdf_solve)$x, 2) 
Dist90_SWM <- round(nleqslv(x = 7, fn = cdf_solve90)$x, 2)

GenKernels$time_scale[6] <- "season"
GenKernels$time_id[6] <- "SWM"
GenKernels$k[6] <-  BestKSWM
GenKernels$theta[6] <- BestThetaSWM
GenKernels$mdd[6] <- MDDSWM
GenKernels$med[6] <- MedSWM
GenKernels$dist90[6] <- Dist90_SWM
GenKernels$log_like[6] <- logLik(GenSWMFit)[1]

GenKernels[, `:=`(k=round(k, 5), theta=round(theta, 5), mdd=round(mdd, 2), med=round(med, 2), dist90=round(dist90, 2), data_source="genetics"), by="time_id"]

#fwrite(GenKernels, file="~/oceanography/script_output/KernelFits/GeneticKernelFits.csv")


GenKernels



time_scale,time_id,k,theta,mdd,med,dist90,log_like,data_source
annual,2012,-3.27091,3.52876,13.02,12.02,25.08,-17.26499,genetics
annual,2013,-3.27484,5.0,12.77,12.18,23.91,-112.75593,genetics
annual,2014,1.97231,0.26016,76.0,17.47,182.37,-78.4862,genetics
interannual,2012-4,-0.22643,0.439,13.69,5.77,34.74,-218.61429,genetics
season,NEM,0.76361,0.37294,12.87,4.64,32.61,-27.26112,genetics
season,SWM,-3.38408,5.0,14.25,13.59,26.67,-71.81265,genetics


In [20]:
#ROMS simulations

# use a grid search to find k that minimizes the log likelihood
LogLikeSim2012 <- setDT(expand.grid(k_eval=seq(from=-10, to=10, length.out = 2000), theta_eval=seq(from=0.1, to=5, length.out = 2000)))
LogLikeSim2012[, log_like := LL_kt_grid(k=k_eval, theta=theta_eval, Distances=biophys_par_data2012$Distances, Assignments=biophys_par_data2012$Assignments, Sampled_reefs=biophys_par_data2012$Sampled_reefs, Reef_sizes=biophys_par_data2012$Reef_sizes, Adult_sample_proportions=biophys_par_data2012$Adult_sample_proportions), by=.(k_eval, theta_eval)]   #write profile results
fwrite(LogLikeSim2012, file="~/oceanography/script_output/KernelFits/LikelihoodProfileBiophysical2012_15DayPLD.csv")

#LogLikeSim2013 <- setDT(expand.grid(k_eval=seq(from=-10, to=10, length.out = 2000), theta_eval=seq(from=0.1, to=5, length.out = 2000)))
#LogLikeSim2013[, log_like := LL_kt_grid(k=k_eval, theta=theta_eval, Distances=biophys_par_data2013$Distances, Assignments=biophys_par_data2013$Assignments, Sampled_reefs=biophys_par_data2013$Sampled_reefs, Reef_sizes=biophys_par_data2013$Reef_sizes, Adult_sample_proportions=biophys_par_data2013$Adult_sample_proportions), by=.(k_eval, theta_eval)] 
##write profile results
#fwrite(LogLikeSim2013, file="~/oceanography/script_output/KernelFits/LikelihoodProfileBiophysical2013_15DayPLD.csv")
#
#LogLikeSim2014 <- setDT(expand.grid(k_eval=seq(from=-10, to=10, length.out = 2000), theta_eval=seq(from=0.1, to=5, length.out = 2000)))
#LogLikeSim2014[, log_like := LL_kt_grid(k=k_eval, theta=theta_eval, Distances=biophys_par_data2014$Distances, Assignments=biophys_par_data2014$Assignments, Sampled_reefs=biophys_par_data2014$Sampled_reefs, Reef_sizes=biophys_par_data2014$Reef_sizes, Adult_sample_proportions=biophys_par_data2014$Adult_sample_proportions), by=.(k_eval, theta_eval)] 
##write profile results
#fwrite(LogLikeSim2014, file="~/oceanography/script_output/KernelFits/LikelihoodProfileBiophysical2014_15DayPLD.csv")
#
#LogLikeSim2012_4 <- setDT(expand.grid(k_eval=seq(from=-10, to=10, length.out = 2000), theta_eval=seq(from=0.1, to=5, length.out = 2000)))
#LogLikeSim2012_4[, log_like := LL_kt_grid(k=k_eval, theta=theta_eval, Distances=biophys_par_data2012_4$Distances, Assignments=biophys_par_data2012_4$Assignments, Sampled_reefs=biophys_par_data2012_4$Sampled_reefs, Reef_sizes=biophys_par_data2012_4$Reef_sizes, Adult_sample_proportions=biophys_par_data2012_4$Adult_sample_proportions), by=.(k_eval, theta_eval)] 
##write profile results
#fwrite(LogLikeSim2012_4, file="~/oceanography/script_output/KernelFits/LikelihoodProfileBiophysical2012_4_15DayPLD.csv")
#
#LogLikeSimNEM <- setDT(expand.grid(k_eval=seq(from=-10, to=10, length.out = 2000), theta_eval=seq(from=0.1, to=5, length.out = 2000)))
#LogLikeSimNEM[, log_like := LL_kt_grid(k=k_eval, theta=theta_eval, Distances=biophys_par_dataNEM$Distances, Assignments=biophys_par_dataNEM$Assignments, Sampled_reefs=biophys_par_dataNEM$Sampled_reefs, Reef_sizes=biophys_par_dataNEM$Reef_sizes, Adult_sample_proportions=biophys_par_dataNEM$Adult_sample_proportions), by=.(k_eval, theta_eval)] 
##write profile results
#fwrite(LogLikeSimNEM, file="~/oceanography/script_output/KernelFits/LikelihoodProfileBiophysicalNEM_15DayPLD.csv")
#
#LogLikeSimSWM <- setDT(expand.grid(k_eval=seq(from=-10, to=10, length.out = 2000), theta_eval=seq(from=0.1, to=5, length.out = 2000)))
#LogLikeSimSWM[, log_like := LL_kt_grid(k=k_eval, theta=theta_eval, Distances=biophys_par_dataSWM$Distances, Assignments=biophys_par_dataSWM$Assignments, Sampled_reefs=biophys_par_dataSWM$Sampled_reefs, Reef_sizes=biophys_par_dataSWM$Reef_sizes, Adult_sample_proportions=biophys_par_dataSWM$Adult_sample_proportions), by=.(k_eval, theta_eval)] 
##write profile results
#fwrite(LogLikeSimSWM, file="~/oceanography/script_output/KernelFits/LikelihoodProfileBiophysicalSWM_15DayPLD.csv")
#
##also for genetics
##LogLikeGen2012 <- setDT(expand.grid(k_eval=seq(from=-10, to=10, length.out = 2000), theta_eval=seq(from=0.1, to=5, length.out = 200)))
##LogLikeGen2012[, log_like := LL_kt_grid(k=k_eval, theta=theta_eval, Distances=par_data2012$Distances, Assignments=par_data2012$Assignments, Sampled_reefs=par_data2012$Sampled_reefs, Reef_sizes=par_data2012$Reef_sizes, Adult_sample_proportions=par_data2012$Adult_sample_proportions), by=.(k_eval, theta_eval)] 
##write profile results
##fwrite(LogLikeGen2012, file="~/oceanography/script_output/KernelFits/LikelihoodProfileGenetic2012.csv")
#
##LogLikeGen2013 <- setDT(expand.grid(k_eval=seq(from=-10, to=10, length.out = 2000), theta_eval=seq(from=0.1, to=5, length.out = 200)))
##LogLikeGen2013[, log_like := LL_kt_grid(k=k_eval, theta=theta_eval, Distances=par_data2013$Distances, Assignments=par_data2013$Assignments, Sampled_reefs=par_data2013$Sampled_reefs, Reef_sizes=par_data2013$Reef_sizes, Adult_sample_proportions=par_data2013$Adult_sample_proportions), by=.(k_eval, theta_eval)] 
##write profile results
##fwrite(LogLikeGen2013, file="~/oceanography/script_output/KernelFits/LikelihoodProfileGenetic2013.csv")
#
##LogLikeGen2014 <- setDT(expand.grid(k_eval=seq(from=-10, to=10, length.out = 2000), theta_eval=seq(from=0.1, to=5, length.out = 200)))
##LogLikeGen2014[, log_like := LL_kt_grid(k=k_eval, theta=theta_eval, Distances=par_data2014$Distances, Assignments=par_data2014$Assignments, Sampled_reefs=par_data2014$Sampled_reefs, Reef_sizes=par_data2014$Reef_sizes, Adult_sample_proportions=par_data2014$Adult_sample_proportions), by=.(k_eval, theta_eval)] 
##write profile results
##fwrite(LogLikeGen2014, file="~/oceanography/script_output/KernelFits/LikelihoodProfileGenetic2014.csv")
#
##LogLikeGen2012_4 <- setDT(expand.grid(k_eval=seq(from=-10, to=10, length.out = 2000), theta_eval=seq(from=0.1, to=5, length.out = 200)))
##LogLikeGen2012_4[, log_like := LL_kt_grid(k=k_eval, theta=theta_eval, Distances=par_data2012_4$Distances, Assignments=par_data2012_4$Assignments, Sampled_reefs=par_data2012_4$Sampled_reefs, Reef_sizes=par_data2012_4$Reef_sizes, Adult_sample_proportions=par_data2012_4$Adult_sample_proportions), by=.(k_eval, theta_eval)] 
##write profile results
##fwrite(LogLikeGen2012_4, file="~/oceanography/script_output/KernelFits/LikelihoodProfileGenetic2012_4.csv")
#
##LogLikeGenNEM <- setDT(expand.grid(k_eval=seq(from=-10, to=10, length.out = 2000), theta_eval=seq(from=0.1, to=5, length.out = 200)))
##LogLikeGenNEM[, log_like := LL_kt_grid(k=k_eval, theta=theta_eval, Distances=par_dataNEM$Distances, Assignments=par_dataNEM$Assignments, Sampled_reefs=par_dataNEM$Sampled_reefs, Reef_sizes=par_dataNEM$Reef_sizes, Adult_sample_proportions=par_dataNEM$Adult_sample_proportions), by=.(k_eval, theta_eval)] 
##write profile results
##fwrite(LogLikeGenNEM, file="~/oceanography/script_output/KernelFits/LikelihoodProfileGeneticNEM.csv")
#
##LogLikeGenSWM <- setDT(expand.grid(k_eval=seq(from=-10, to=10, length.out = 2000), theta_eval=seq(from=0.1, to=5, length.out = 200)))
##LogLikeGenSWM[, log_like := LL_kt_grid(k=k_eval, theta=theta_eval, Distances=par_dataSWM$Distances, Assignments=par_dataSWM$Assignments, Sampled_reefs=par_dataSWM$Sampled_reefs, Reef_sizes=par_dataSWM$Reef_sizes, Adult_sample_proportions=par_dataSWM$Adult_sample_proportions), by=.(k_eval, theta_eval)] 
##write profile results
##fwrite(LogLikeGenSWM, file="~/oceanography/script_output/KernelFits/LikelihoodProfileGeneticSWM.csv")

In [19]:
head(LogLikeSim2012)

k_eval,theta_eval,log_like
-10.0,0.1,69479.04
-9.989995,0.1,69477.49
-9.97999,0.1,69475.93
-9.969985,0.1,69474.36
-9.95998,0.1,69472.8
-9.949975,0.1,69471.24


In [4]:
#on 09/14/2021-fitting kernels to annual monsoons and 2011 biophysical data

#read in parentage matrix
BiophysMat2011NEM <- as.matrix(fread(file="~/oceanography/script_output/ROMSDataTables/20210917_BioPhysParentageMatrixNEM2011ForROMSComp08DayPLD.csv"))
BiophysMat2012NEM <- as.matrix(fread(file="~/oceanography/script_output/ROMSDataTables/20210917_BioPhysParentageMatrixNEM2012ForROMSComp08DayPLD.csv"))
BiophysMat2013NEM <- as.matrix(fread(file="~/oceanography/script_output/ROMSDataTables/20210917_BioPhysParentageMatrixNEM2013ForROMSComp08DayPLD.csv"))
BiophysMat2014NEM <- as.matrix(fread(file="~/oceanography/script_output/ROMSDataTables/20210917_BioPhysParentageMatrixNEM2014ForROMSComp08DayPLD.csv"))

BiophysMat2011SWM <- as.matrix(fread(file="~/oceanography/script_output/ROMSDataTables/20210917_BioPhysParentageMatrixSWM2011ForROMSComp08DayPLD.csv"))
BiophysMat2012SWM <- as.matrix(fread(file="~/oceanography/script_output/ROMSDataTables/20210917_BioPhysParentageMatrixSWM2012ForROMSComp08DayPLD.csv"))
BiophysMat2013SWM <- as.matrix(fread(file="~/oceanography/script_output/ROMSDataTables/20210917_BioPhysParentageMatrixSWM2013ForROMSComp08DayPLD.csv"))
BiophysMat2014SWM <- as.matrix(fread(file="~/oceanography/script_output/ROMSDataTables/20210917_BioPhysParentageMatrixSWM2014ForROMSComp08DayPLD.csv"))


#collect data into list
#NEM
biophys_par_data2011NEM <- list(Distances=Distances, Assignments=BiophysMat2011NEM, Sampled_reefs=t(as.matrix(SiteIndex[site %in% SurveyData[year==2014 & prop_anem_samp >0 , site], .(index)])), 
                  Reef_sizes=reef_sizes, Adult_sample_proportions=matrix(nrow=ncol(BiophysMat2011NEM), ncol=1, 1))
SimNEM2011Fit <- suppressWarnings(mle2(LL_kt_bbmle, start=list(k=-3, theta=1), lower=c(-10, 0.15), upper=c(10, 5), method="L-BFGS-B", data=biophys_par_data2011NEM, control=list(maxit=500)))

biophys_par_data2012NEM <- list(Distances=Distances, Assignments=BiophysMat2012NEM, Sampled_reefs=t(as.matrix(SiteIndex[site %in% SurveyData[year==2014 & prop_anem_samp >0 , site], .(index)])), 
                  Reef_sizes=reef_sizes, Adult_sample_proportions=matrix(nrow=ncol(BiophysMat2012NEM), ncol=1, 1))
SimNEM2012Fit <- suppressWarnings(mle2(LL_kt_bbmle, start=list(k=-3, theta=1), lower=c(-10, 0.15), upper=c(10, 5), method="L-BFGS-B", data=biophys_par_data2012NEM, control=list(maxit=500)))

biophys_par_data2013NEM <- list(Distances=Distances, Assignments=BiophysMat2013NEM, Sampled_reefs=t(as.matrix(SiteIndex[site %in% SurveyData[year==2014 & prop_anem_samp >0 , site], .(index)])), 
                  Reef_sizes=reef_sizes, Adult_sample_proportions=matrix(nrow=ncol(BiophysMat2013NEM), ncol=1, 1))
SimNEM2013Fit <- suppressWarnings(mle2(LL_kt_bbmle, start=list(k=-3, theta=1), lower=c(-10, 0.15), upper=c(10, 5), method="L-BFGS-B", data=biophys_par_data2013NEM, control=list(maxit=500)))

biophys_par_data2014NEM <- list(Distances=Distances, Assignments=BiophysMat2014NEM, Sampled_reefs=t(as.matrix(SiteIndex[site %in% SurveyData[year==2014 & prop_anem_samp >0 , site], .(index)])), 
                  Reef_sizes=reef_sizes, Adult_sample_proportions=matrix(nrow=ncol(BiophysMat2014NEM), ncol=1, 1))
SimNEM2014Fit <- suppressWarnings(mle2(LL_kt_bbmle, start=list(k=-3, theta=1), lower=c(-10, 0.15), upper=c(10, 5), method="L-BFGS-B", data=biophys_par_data2014NEM, control=list(maxit=500)))

#SWM
biophys_par_data2011SWM <- list(Distances=Distances, Assignments=BiophysMat2011SWM, Sampled_reefs=t(as.matrix(SiteIndex[site %in% SurveyData[year==2014 & prop_anem_samp >0 , site], .(index)])), 
                  Reef_sizes=reef_sizes, Adult_sample_proportions=matrix(nrow=ncol(BiophysMat2011SWM), ncol=1, 1))
SimSWM2011Fit <- suppressWarnings(mle2(LL_kt_bbmle, start=list(k=-3, theta=1), lower=c(-10, 0.15), upper=c(10, 5), method="L-BFGS-B", data=biophys_par_data2011SWM, control=list(maxit=500)))

biophys_par_data2012SWM <- list(Distances=Distances, Assignments=BiophysMat2012SWM, Sampled_reefs=t(as.matrix(SiteIndex[site %in% SurveyData[year==2014 & prop_anem_samp >0 , site], .(index)])), 
                  Reef_sizes=reef_sizes, Adult_sample_proportions=matrix(nrow=ncol(BiophysMat2012SWM), ncol=1, 1))
SimSWM2012Fit <- suppressWarnings(mle2(LL_kt_bbmle, start=list(k=-3, theta=1), lower=c(-10, 0.15), upper=c(10, 5), method="L-BFGS-B", data=biophys_par_data2012SWM, control=list(maxit=500)))

biophys_par_data2013SWM <- list(Distances=Distances, Assignments=BiophysMat2013SWM, Sampled_reefs=t(as.matrix(SiteIndex[site %in% SurveyData[year==2014 & prop_anem_samp >0 , site], .(index)])), 
                  Reef_sizes=reef_sizes, Adult_sample_proportions=matrix(nrow=ncol(BiophysMat2013SWM), ncol=1, 1))
SimSWM2013Fit <- suppressWarnings(mle2(LL_kt_bbmle, start=list(k=-3, theta=1), lower=c(-10, 0.15), upper=c(10, 5), method="L-BFGS-B", data=biophys_par_data2013SWM, control=list(maxit=500)))

biophys_par_data2014SWM <- list(Distances=Distances, Assignments=BiophysMat2014SWM, Sampled_reefs=t(as.matrix(SiteIndex[site %in% SurveyData[year==2014 & prop_anem_samp >0 , site], .(index)])), 
                  Reef_sizes=reef_sizes, Adult_sample_proportions=matrix(nrow=ncol(BiophysMat2014SWM), ncol=1, 1))
SimSWM2014Fit <- suppressWarnings(mle2(LL_kt_bbmle, start=list(k=-3, theta=1), lower=c(-10, 0.15), upper=c(10, 5), method="L-BFGS-B", data=biophys_par_data2014SWM, control=list(maxit=500)))


##2011
#biophys_par_data2011 <- list(Distances=Distances, Assignments=BiophysMat2011, Sampled_reefs=t(as.matrix(SiteIndex[site %in% SurveyData[year==2014 & prop_anem_samp >0 , site], .(index)])), 
#                  Reef_sizes=reef_sizes, Adult_sample_proportions=matrix(nrow=ncol(BiophysMat2011), ncol=1, 1))
#Sim2011Fit <- suppressWarnings(mle2(LL_kt_bbmle, start=list(k=-3, theta=1), lower=c(-10, 0.15), upper=c(10, 5), method="L-BFGS-B", data=biophys_par_data2011, control=list(maxit=500)))
#

In [12]:
#make a summary table for all time frames of simulation data
SimKernelsAnnualMonsoon <- as.data.frame(matrix(NA, nrow = 8, ncol = 8))
setDT(SimKernelsAnnualMonsoon)
setnames(SimKernelsAnnualMonsoon, c("time_scale", "time_id", "k", "theta", "mdd", "med", "dist90", "log_like"))
#SimKernelsAnnualMonsoon <- data.table(time_scale=character(), time_id=character(), k=numeric(), theta=numeric(), mdd=numeric(), med=numeric(), dist90=numeric())

BestKNEM2011 <- as.numeric(coef(SimNEM2011Fit)[1])
BestThetaNEM2011 <- as.numeric(coef(SimNEM2011Fit)[2])
MDDNEM2011 <- as.numeric(cubintegrate(integrate_kernel_sum1, lower = 0, upper = Inf, k=BestKNEM2011, theta=BestThetaNEM2011, , method = "pcubature")$integral)
k_eval <- BestKNEM2011
theta_eval <- BestThetaNEM2011
MedNEM2011  <- round(nleqslv(x = 7, fn = cdf_solve)$x, 2) 
Dist90_NEM2011 <- round(nleqslv(x = 7, fn = cdf_solve90)$x, 2)

SimKernelsAnnualMonsoon$time_scale[1] <- "annual_monsoon"
SimKernelsAnnualMonsoon$time_id[1] <- "NEM2011"
SimKernelsAnnualMonsoon$k[1] <-  BestKNEM2011
SimKernelsAnnualMonsoon$theta[1] <- BestThetaNEM2011
SimKernelsAnnualMonsoon$mdd[1] <- MDDNEM2011
SimKernelsAnnualMonsoon$med[1] <- MedNEM2011
SimKernelsAnnualMonsoon$dist90[1] <- Dist90_NEM2011
SimKernelsAnnualMonsoon$log_like[1] <- logLik(SimNEM2011Fit)[1]

BestKNEM2012 <- as.numeric(coef(SimNEM2012Fit)[1]) 
BestThetaNEM2012 <- as.numeric(coef(SimNEM2012Fit)[2])
MDDNEM2012 <- as.numeric(cubintegrate(integrate_kernel_sum1, lower = 0, upper = Inf, k=BestKNEM2012, theta=BestThetaNEM2012, , method = "pcubature")$integral)
k_eval <- BestKNEM2012
theta_eval <- BestThetaNEM2012
MedNEM2012  <- round(nleqslv(x = 7, fn = cdf_solve)$x, 2) 
Dist90_NEM2012 <- round(nleqslv(x = 7, fn = cdf_solve90)$x, 2)

SimKernelsAnnualMonsoon$time_scale[2] <- "annual_monsoon"
SimKernelsAnnualMonsoon$time_id[2] <- "NEM2012"
SimKernelsAnnualMonsoon$k[2] <-  BestKNEM2012
SimKernelsAnnualMonsoon$theta[2] <- BestThetaNEM2012
SimKernelsAnnualMonsoon$mdd[2] <- MDDNEM2012
SimKernelsAnnualMonsoon$med[2] <- MedNEM2012
SimKernelsAnnualMonsoon$dist90[2] <- Dist90_NEM2012
SimKernelsAnnualMonsoon$log_like[2] <- logLik(SimNEM2012Fit)[1]

BestKNEM2013 <- as.numeric(coef(SimNEM2013Fit)[1])
BestThetaNEM2013 <- as.numeric(coef(SimNEM2013Fit)[2])
MDDNEM2013 <- as.numeric(cubintegrate(integrate_kernel_sum1, lower = 0, upper = Inf, k=BestKNEM2013, theta=BestThetaNEM2013, , method = "pcubature")$integral)
k_eval <- BestKNEM2013
theta_eval <- BestThetaNEM2013
MedNEM2013  <- round(nleqslv(x = 7, fn = cdf_solve)$x, 2) 
Dist90_NEM2013 <- round(nleqslv(x = 7, fn = cdf_solve90)$x, 2)

SimKernelsAnnualMonsoon$time_scale[3] <- "annual_monsoon"
SimKernelsAnnualMonsoon$time_id[3] <- "NEM2013"
SimKernelsAnnualMonsoon$k[3] <-  BestKNEM2013
SimKernelsAnnualMonsoon$theta[3] <- BestThetaNEM2013
SimKernelsAnnualMonsoon$mdd[3] <- MDDNEM2013
SimKernelsAnnualMonsoon$med[3] <- MedNEM2013
SimKernelsAnnualMonsoon$dist90[3] <- Dist90_NEM2013
SimKernelsAnnualMonsoon$log_like[3] <- logLik(SimNEM2013Fit)[1]


BestKNEM2014 <- as.numeric(coef(SimNEM2014Fit)[1])
BestThetaNEM2014 <- as.numeric(coef(SimNEM2014Fit)[2])
MDDNEM2014 <- as.numeric(cubintegrate(integrate_kernel_sum1, lower = 0, upper = Inf, k=BestKNEM2014, theta=BestThetaNEM2014, , method = "pcubature")$integral)
k_eval <- BestKNEM2014
theta_eval <- BestThetaNEM2014
MedNEM2014  <- round(nleqslv(x = 7, fn = cdf_solve)$x, 2) 
Dist90_NEM2014 <- round(nleqslv(x = 7, fn = cdf_solve90)$x, 2)

SimKernelsAnnualMonsoon$time_scale[4] <- "annual_monsoon"
SimKernelsAnnualMonsoon$time_id[4] <- "NEM2014"
SimKernelsAnnualMonsoon$k[4] <-  BestKNEM2014
SimKernelsAnnualMonsoon$theta[4] <- BestThetaNEM2014
SimKernelsAnnualMonsoon$mdd[4] <- MDDNEM2014
SimKernelsAnnualMonsoon$med[4] <- MedNEM2014
SimKernelsAnnualMonsoon$dist90[4] <- Dist90_NEM2014
SimKernelsAnnualMonsoon$log_like[4] <- logLik(SimNEM2014Fit)[1]

BestKSWM2011 <- as.numeric(coef(SimSWM2011Fit)[1])
BestThetaSWM2011 <- as.numeric(coef(SimSWM2011Fit)[2])
MDDSWM2011 <- as.numeric(cubintegrate(integrate_kernel_sum1, lower = 0, upper = Inf, k=BestKSWM2011, theta=BestThetaSWM2011, , method = "pcubature")$integral)
k_eval <- BestKSWM2011
theta_eval <- BestThetaSWM2011
MedSWM2011  <- round(nleqslv(x = 7, fn = cdf_solve)$x, 2) 
Dist90_SWM2011 <- round(nleqslv(x = 7, fn = cdf_solve90)$x, 2)

SimKernelsAnnualMonsoon$time_scale[5] <- "annual_monsoon"
SimKernelsAnnualMonsoon$time_id[5] <- "SWM2011"
SimKernelsAnnualMonsoon$k[5] <-  BestKSWM2011
SimKernelsAnnualMonsoon$theta[5] <- BestThetaSWM2011
SimKernelsAnnualMonsoon$mdd[5] <- MDDSWM2011
SimKernelsAnnualMonsoon$med[5] <- MedSWM2011
SimKernelsAnnualMonsoon$dist90[5] <- Dist90_SWM2011
SimKernelsAnnualMonsoon$log_like[5] <- logLik(SimSWM2011Fit)[1]

BestKSWM2012 <- as.numeric(coef(SimSWM2012Fit)[1])
BestThetaSWM2012 <- as.numeric(coef(SimSWM2012Fit)[2])
MDDSWM2012 <- as.numeric(cubintegrate(integrate_kernel_sum1, lower = 0, upper = Inf, k=BestKSWM2012, theta=BestThetaSWM2012, , method = "pcubature")$integral)
k_eval <- BestKSWM2012
theta_eval <- BestThetaSWM2012
MedSWM2012  <- round(nleqslv(x = 7, fn = cdf_solve)$x, 2) 
Dist90_SWM2012 <- round(nleqslv(x = 7, fn = cdf_solve90)$x, 2)

SimKernelsAnnualMonsoon$time_scale[6] <- "annual_monsoon"
SimKernelsAnnualMonsoon$time_id[6] <- "SWM2012"
SimKernelsAnnualMonsoon$k[6] <-  BestKSWM2012
SimKernelsAnnualMonsoon$theta[6] <- BestThetaSWM2012
SimKernelsAnnualMonsoon$mdd[6] <- MDDSWM2012
SimKernelsAnnualMonsoon$med[6] <- MedSWM2012
SimKernelsAnnualMonsoon$dist90[6] <- Dist90_SWM2012
SimKernelsAnnualMonsoon$log_like[6] <- logLik(SimSWM2012Fit)[1]

BestKSWM2013 <- as.numeric(coef(SimSWM2013Fit)[1])
BestThetaSWM2013 <- as.numeric(coef(SimSWM2013Fit)[2])
MDDSWM2013 <- as.numeric(cubintegrate(integrate_kernel_sum1, lower = 0, upper = Inf, k=BestKSWM2013, theta=BestThetaSWM2013, , method = "pcubature")$integral)
k_eval <- BestKSWM2013
theta_eval <- BestThetaSWM2013
MedSWM2013  <- round(nleqslv(x = 7, fn = cdf_solve)$x, 2) 
Dist90_SWM2013 <- round(nleqslv(x = 7, fn = cdf_solve90)$x, 2)

SimKernelsAnnualMonsoon$time_scale[7] <- "annual_monsoon"
SimKernelsAnnualMonsoon$time_id[7] <- "SWM2013"
SimKernelsAnnualMonsoon$k[7] <-  BestKSWM2013
SimKernelsAnnualMonsoon$theta[7] <- BestThetaSWM2013
SimKernelsAnnualMonsoon$mdd[7] <- MDDSWM2013
SimKernelsAnnualMonsoon$med[7] <- MedSWM2013
SimKernelsAnnualMonsoon$dist90[7] <- Dist90_SWM2013
SimKernelsAnnualMonsoon$log_like[7] <- logLik(SimSWM2013Fit)[1]


BestKSWM2014 <- as.numeric(coef(SimSWM2014Fit)[1])
BestThetaSWM2014 <- as.numeric(coef(SimSWM2014Fit)[2])
MDDSWM2014 <- as.numeric(cubintegrate(integrate_kernel_sum1, lower = 0, upper = Inf, k=BestKSWM2014, theta=BestThetaSWM2014, , method = "pcubature")$integral)
k_eval <- BestKSWM2014
theta_eval <- BestThetaSWM2014
MedSWM2014  <- round(nleqslv(x = 7, fn = cdf_solve)$x, 2) 
Dist90_SWM2014 <- round(nleqslv(x = 7, fn = cdf_solve90)$x, 2)

SimKernelsAnnualMonsoon$time_scale[8] <- "annual_monsoon"
SimKernelsAnnualMonsoon$time_id[8] <- "SWM2014"
SimKernelsAnnualMonsoon$k[8] <-  BestKSWM2014
SimKernelsAnnualMonsoon$theta[8] <- BestThetaSWM2014
SimKernelsAnnualMonsoon$mdd[8] <- MDDSWM2014
SimKernelsAnnualMonsoon$med[8] <- MedSWM2014
SimKernelsAnnualMonsoon$dist90[8] <- Dist90_SWM2014
SimKernelsAnnualMonsoon$log_like[8] <- logLik(SimSWM2014Fit)[1]

#BestK2011 <- as.numeric(coef(Sim2011Fit)[1])
#BestTheta2011 <- as.numeric(coef(Sim2011Fit)[2])
#MDD2011 <- as.numeric(cubintegrate(integrate_kernel_sum1, lower = 0, upper = Inf, k=BestK2011, theta=BestTheta2011, , method = "pcubature")$integral)
#k_eval <- BestK2011
#theta_eval <- BestTheta2011
#Med2011  <- round(nleqslv(x = 7, fn = cdf_solve)$x, 2) 
#Dist90_2011 <- round(nleqslv(x = 7, fn = cdf_solve90)$x, 2)
#
#SimKernelsAnnualMonsoon$time_scale[9] <- "annual"
#SimKernelsAnnualMonsoon$time_id[9] <- "2011"
#SimKernelsAnnualMonsoon$k[9] <-  BestK2011
#SimKernelsAnnualMonsoon$theta[9] <- BestTheta2011
#SimKernelsAnnualMonsoon$mdd[9] <- MDD2011
#SimKernelsAnnualMonsoon$med[9] <- Med2011
#SimKernelsAnnualMonsoon$dist90[9] <- Dist90_2011
#SimKernelsAnnualMonsoon$log_like[9] <- logLik(Sim2011Fit)[1]
#
SimKernelsAnnualMonsoon[, `:=`(k=round(k, 5), theta=round(theta, 5), mdd=round(mdd, 3), med=round(med, 2), dist90=round(dist90, 2), data_source="simulation_08PLD"), by="time_id"]

fwrite(SimKernelsAnnualMonsoon, file="~/oceanography/script_output/KernelFits/summary_tables/SimulationKernelFitsAnnualMonsoon08DayPLD.csv")


SimKernelsAnnualMonsoon


time_scale,time_id,k,theta,mdd,med,dist90,log_like,data_source
annual_monsoon,NEM2011,-1.75309,1.30265,4.262,3.24,9.39,-1045881.8,simulation_08PLD
annual_monsoon,NEM2012,-1.73326,1.26304,4.3,3.24,9.52,-1120211.9,simulation_08PLD
annual_monsoon,NEM2013,-1.85889,1.35246,4.584,3.52,10.04,-1028977.7,simulation_08PLD
annual_monsoon,NEM2014,-1.50649,1.11461,3.917,2.83,8.86,-901086.4,simulation_08PLD
annual_monsoon,SWM2011,-2.14126,1.60872,5.359,4.31,11.41,-371065.6,simulation_08PLD
annual_monsoon,SWM2012,-1.92231,1.47343,4.566,3.59,9.86,-141712.1,simulation_08PLD
annual_monsoon,SWM2013,-1.92912,1.4063,4.763,3.7,10.37,-195363.6,simulation_08PLD
annual_monsoon,SWM2014,-2.08679,1.56256,5.17,4.13,11.06,-197547.8,simulation_08PLD


In [6]:
##calculate the 95% likelihood surface cutoffs. Use the log_like from the bbmle function because that's more precise. in the grid search function used for the surfaces, k/theta are limited to 2 decimal places because it's less computation time, but in some cases that means the MLE is from an artificial local optimum that only exists because decimal places are truncated
##genetics
#cutoff_Gen2012 <- min(LogLikeGen2012$log_like) + qchisq(.90, 2)/2 #use the 95% quantile of the chi-sq distribution with two df because it's bivariate
#
#cutoff_Gen2013 <- min(LogLikeGen2013$log_like) + qchisq(.90, 2)/2 #use the 95% quantile of the chi-sq distribution with two df because it's bivariate
#
#cutoff_Gen2014 <- min(LogLikeGen2014$log_like) + qchisq(.90, 2)/2 #use the 95% quantile of the chi-sq distribution with two df because it's bivariate
#
#cutoff_Gen2012_4 <- min(LogLikeGen2012_4$log_like) + qchisq(.90, 2)/2 #use the 95% quantile of the chi-sq distribution with two df because it's bivariate
#
#cutoff_GenNEM <- min(LogLikeGenNEM$log_like) + qchisq(.90, 2)/2 #use the 95% quantile of the chi-sq distribution with two df because it's bivariate
#
#cutoff_GenSWM <- min(LogLikeGenSWM$log_like) + qchisq(.90, 2)/2 #use the 95% quantile of the chi-sq distribution with two df because it's bivariate

LogLikeSim2012 <- fread(file="~/oceanography/script_output/KernelFits/profiles/LikelihoodProfileBiophysical2012_15DayPLD.csv", header = T)
LogLikeSim2013 <- fread(file="~/oceanography/script_output/KernelFits/profiles/LikelihoodProfileBiophysical2013_15DayPLD.csv", header = T)
LogLikeSim2014 <- fread(file="~/oceanography/script_output/KernelFits/profiles/LikelihoodProfileBiophysical2014_15DayPLD.csv", header = T)
LogLikeSim2012_4 <- fread(file="~/oceanography/script_output/KernelFits/profiles/LikelihoodProfileBiophysical2012_4_15DayPLD.csv", header = T)
LogLikeSimNEM <- fread(file="~/oceanography/script_output/KernelFits/profiles/LikelihoodProfileBiophysicalNEM_15DayPLD.csv", header = T)
LogLikeSimSWM <- fread(file="~/oceanography/script_output/KernelFits/profiles/LikelihoodProfileBiophysicalSWM_15DayPLD.csv", header = T)

#biophysical simulation
cutoff_Sim2012 <- min(LogLikeSim2012$log_like) + qchisq(.90, 2)/2 #use the 95% quantile of the chi-sq distribution with two df because it's bivariate

cutoff_Sim2013 <- min(LogLikeSim2013$log_like) + qchisq(.90, 2)/2 #use the 95% quantile of the chi-sq distribution with two df because it's bivariate

cutoff_Sim2014 <- min(LogLikeSim2014$log_like) + qchisq(.90, 2)/2 #use the 95% quantile of the chi-sq distribution with two df because it's bivariate

cutoff_Sim2012_4 <- min(LogLikeSim2012_4$log_like) + qchisq(.90, 2)/2 #use the 95% quantile of the chi-sq distribution with two df because it's bivariate

cutoff_SimNEM <- min(LogLikeSimNEM$log_like) + qchisq(.90, 2)/2 #use the 95% quantile of the chi-sq distribution with two df because it's bivariate

cutoff_SimSWM <- min(LogLikeSimSWM$log_like) + qchisq(.90, 2)/2 #use the 95% quantile of the chi-sq distribution with two df because it's bivariate



In [7]:
head(LogLikeSim2012_4)

k_eval,theta_eval,log_like
-10.0,0.1,862884.3
-9.989995,0.1,862862.8
-9.97999,0.1,862841.3
-9.969985,0.1,862819.8
-9.95998,0.1,862798.2
-9.949975,0.1,862776.6


In [8]:
LogLikeSim2012 <- setDT(expand.grid(k_eval=seq(from=unique(LogLikeSim2012[log_like < cutoff_Sim2012][k_eval==min(k_eval), k_eval]), to=unique(LogLikeSim2012[log_like < cutoff_Sim2012][k_eval==max(k_eval), k_eval]), length.out = 2000), theta_eval=seq(from=unique(LogLikeSim2012[log_like < cutoff_Sim2012][theta_eval==min(theta_eval), theta_eval]), to=unique(LogLikeSim2012[log_like < cutoff_Sim2012][theta_eval==max(theta_eval), theta_eval]), length.out = 500)))
LogLikeSim2012[, log_like := LL_kt_grid(k=k_eval, theta=theta_eval, Distances=biophys_par_data2012$Distances, Assignments=biophys_par_data2012$Assignments, Sampled_reefs=biophys_par_data2012$Sampled_reefs, Reef_sizes=biophys_par_data2012$Reef_sizes, Adult_sample_proportions=biophys_par_data2012$Adult_sample_proportions), by=.(k_eval, theta_eval)]   #write profile results
#fwrite(LogLikeSim2012, file="~/oceanography/script_output/KernelFits/profiles/LikelihoodProfileBiophysical2012NarrowSearch15DayPLD.csv")

LogLikeSim2013 <- setDT(expand.grid(k_eval=seq(from=unique(LogLikeSim2013[log_like < cutoff_Sim2013][k_eval==min(k_eval), k_eval]), to=unique(LogLikeSim2013[log_like < cutoff_Sim2013][k_eval==max(k_eval), k_eval]), length.out = 2000), theta_eval=seq(from=unique(LogLikeSim2013[log_like < cutoff_Sim2013][theta_eval==min(theta_eval), theta_eval]), to=unique(LogLikeSim2013[log_like < cutoff_Sim2013][theta_eval==max(theta_eval), theta_eval]), length.out = 500)))
LogLikeSim2013[, log_like := LL_kt_grid(k=k_eval, theta=theta_eval, Distances=biophys_par_data2013$Distances, Assignments=biophys_par_data2013$Assignments, Sampled_reefs=biophys_par_data2013$Sampled_reefs, Reef_sizes=biophys_par_data2013$Reef_sizes, Adult_sample_proportions=biophys_par_data2013$Adult_sample_proportions), by=.(k_eval, theta_eval)]   #write profile results
#fwrite(LogLikeSim2013, file="~/oceanography/script_output/KernelFits/profiles/LikelihoodProfileBiophysical2013NarrowSearch15DayPLD.csv")

LogLikeSim2014 <- setDT(expand.grid(k_eval=seq(from=unique(LogLikeSim2014[log_like < cutoff_Sim2014][k_eval==min(k_eval), k_eval]), to=unique(LogLikeSim2014[log_like < cutoff_Sim2014][k_eval==max(k_eval), k_eval]), length.out = 2000), theta_eval=seq(from=unique(LogLikeSim2014[log_like < cutoff_Sim2014][theta_eval==min(theta_eval), theta_eval]), to=unique(LogLikeSim2014[log_like < cutoff_Sim2014][theta_eval==max(theta_eval), theta_eval]), length.out = 500)))
LogLikeSim2014[, log_like := LL_kt_grid(k=k_eval, theta=theta_eval, Distances=biophys_par_data2014$Distances, Assignments=biophys_par_data2014$Assignments, Sampled_reefs=biophys_par_data2014$Sampled_reefs, Reef_sizes=biophys_par_data2014$Reef_sizes, Adult_sample_proportions=biophys_par_data2014$Adult_sample_proportions), by=.(k_eval, theta_eval)]   #write profile results
#fwrite(LogLikeSim2014, file="~/oceanography/script_output/KernelFits/profiles/LikelihoodProfileBiophysical2014NarrowSearch15DayPLD.csv")

LogLikeSim2012_4 <- setDT(expand.grid(k_eval=seq(from=unique(LogLikeSim2012_4[log_like < cutoff_Sim2012_4][k_eval==min(k_eval), k_eval]), to=unique(LogLikeSim2012_4[log_like < cutoff_Sim2012_4][k_eval==max(k_eval), k_eval]), length.out = 2000), theta_eval=seq(from=unique(LogLikeSim2012_4[log_like < cutoff_Sim2012_4][theta_eval==min(theta_eval), theta_eval]), to=unique(LogLikeSim2012_4[log_like < cutoff_Sim2012_4][theta_eval==max(theta_eval), theta_eval]), length.out = 500)))
LogLikeSim2012_4[, log_like := LL_kt_grid(k=k_eval, theta=theta_eval, Distances=biophys_par_data2012_4$Distances, Assignments=biophys_par_data2012_4$Assignments, Sampled_reefs=biophys_par_data2012_4$Sampled_reefs, Reef_sizes=biophys_par_data2012_4$Reef_sizes, Adult_sample_proportions=biophys_par_data2012_4$Adult_sample_proportions), by=.(k_eval, theta_eval)]   #write profile results
#fwrite(LogLikeSim2012_4, file="~/oceanography/script_output/KernelFits/profiles/LikelihoodProfileBiophysical2012_4NarrowSearch15DayPLD.csv")

LogLikeSimNEM <- setDT(expand.grid(k_eval=seq(from=unique(LogLikeSimNEM[log_like < cutoff_SimNEM][k_eval==min(k_eval), k_eval]), to=unique(LogLikeSimNEM[log_like < cutoff_SimNEM][k_eval==max(k_eval), k_eval]), length.out = 2000), theta_eval=seq(from=unique(LogLikeSimNEM[log_like < cutoff_SimNEM][theta_eval==min(theta_eval), theta_eval]), to=unique(LogLikeSimNEM[log_like < cutoff_SimNEM][theta_eval==max(theta_eval), theta_eval]), length.out = 500)))
LogLikeSimNEM[, log_like := LL_kt_grid(k=k_eval, theta=theta_eval, Distances=biophys_par_dataNEM$Distances, Assignments=biophys_par_dataNEM$Assignments, Sampled_reefs=biophys_par_dataNEM$Sampled_reefs, Reef_sizes=biophys_par_dataNEM$Reef_sizes, Adult_sample_proportions=biophys_par_dataNEM$Adult_sample_proportions), by=.(k_eval, theta_eval)]   #write profile results
#fwrite(LogLikeSimNEM, file="~/oceanography/script_output/KernelFits/profiles/LikelihoodProfileBiophysicalNEMNarrowSearch15DayPLD.csv")

LogLikeSimSWM <- setDT(expand.grid(k_eval=seq(from=unique(LogLikeSimSWM[log_like < cutoff_SimSWM][k_eval==min(k_eval), k_eval]), to=unique(LogLikeSimSWM[log_like < cutoff_SimSWM][k_eval==max(k_eval), k_eval]), length.out = 2000), theta_eval=seq(from=unique(LogLikeSimSWM[log_like < cutoff_SimSWM][theta_eval==min(theta_eval), theta_eval]), to=unique(LogLikeSimSWM[log_like < cutoff_SimSWM][theta_eval==max(theta_eval), theta_eval]), length.out = 500)))
LogLikeSimSWM[, log_like := LL_kt_grid(k=k_eval, theta=theta_eval, Distances=biophys_par_dataSWM$Distances, Assignments=biophys_par_dataSWM$Assignments, Sampled_reefs=biophys_par_dataSWM$Sampled_reefs, Reef_sizes=biophys_par_dataSWM$Reef_sizes, Adult_sample_proportions=biophys_par_dataSWM$Adult_sample_proportions), by=.(k_eval, theta_eval)]   #write profile results
#fwrite(LogLikeSimSWM, file="~/oceanography/script_output/KernelFits/profiles/LikelihoodProfileBiophysicalSWMNarrowSearch15DayPLD.csv")

#genetics
#LogLikeGen2012 <- setDT(expand.grid(k_eval=seq(from=unique(LogLikeGen2012[log_like < cutoff_Gen2012][k_eval==min(k_eval), k_eval]), to=unique(LogLikeGen2012[log_like < cutoff_Gen2012][k_eval==max(k_eval), k_eval]), length.out = 2000), theta_eval=seq(from=unique(LogLikeGen2012[log_like < cutoff_Gen2012][theta_eval==min(theta_eval), theta_eval]), to=unique(LogLikeGen2012[log_like < cutoff_Gen2012][theta_eval==max(theta_eval), theta_eval]), length.out = 500)))
#LogLikeGen2012[, log_like := LL_kt_grid(k=k_eval, theta=theta_eval, Distances=par_data2012$Distances, Assignments=par_data2012$Assignments, Sampled_reefs=par_data2012$Sampled_reefs, Reef_sizes=par_data2012$Reef_sizes, Adult_sample_proportions=par_data2012$Adult_sample_proportions), by=.(k_eval, theta_eval)]   #write profile results
#fwrite(LogLikeGen2012, file="~/oceanography/script_output/KernelFits/profiles/LikelihoodProfileGenetic2012NarrowSearch.csv")
#
#LogLikeGen2013 <- setDT(expand.grid(k_eval=seq(from=unique(LogLikeGen2013[log_like < cutoff_Gen2013][k_eval==min(k_eval), k_eval]), to=unique(LogLikeGen2013[log_like < cutoff_Gen2013][k_eval==max(k_eval), k_eval]), length.out = 2000), theta_eval=seq(from=unique(LogLikeGen2013[log_like < cutoff_Gen2013][theta_eval==min(theta_eval), theta_eval]), to=unique(LogLikeGen2013[log_like < cutoff_Gen2013][theta_eval==max(theta_eval), theta_eval]), length.out = 500)))
#LogLikeGen2013[, log_like := LL_kt_grid(k=k_eval, theta=theta_eval, Distances=par_data2013$Distances, Assignments=par_data2013$Assignments, Sampled_reefs=par_data2013$Sampled_reefs, Reef_sizes=par_data2013$Reef_sizes, Adult_sample_proportions=par_data2013$Adult_sample_proportions), by=.(k_eval, theta_eval)]   #write profile results
#fwrite(LogLikeGen2013, file="~/oceanography/script_output/KernelFits/profiles/LikelihoodProfileGenetic2013NarrowSearch.csv")

#LogLikeGen2014 <- setDT(expand.grid(k_eval=seq(from=unique(LogLikeGen2014[log_like < cutoff_Gen2014][k_eval==min(k_eval), k_eval]), to=unique(LogLikeGen2014[log_like < cutoff_Gen2014][k_eval==max(k_eval), k_eval]), length.out = 2000), theta_eval=seq(from=unique(LogLikeGen2014[log_like < cutoff_Gen2014][theta_eval==min(theta_eval), theta_eval]), to=unique(LogLikeGen2014[log_like < cutoff_Gen2014][theta_eval==max(theta_eval), theta_eval]), length.out = 500)))
#LogLikeGen2014[, log_like := LL_kt_grid(k=k_eval, theta=theta_eval, Distances=par_data2014$Distances, Assignments=par_data2014$Assignments, Sampled_reefs=par_data2014$Sampled_reefs, Reef_sizes=par_data2014$Reef_sizes, Adult_sample_proportions=par_data2014$Adult_sample_proportions), by=.(k_eval, theta_eval)]   #write profile results
#fwrite(LogLikeGen2014, file="~/oceanography/script_output/KernelFits/profiles/LikelihoodProfileGenetic2014NarrowSearch.csv")
#
#LogLikeGen2012_4 <- setDT(expand.grid(k_eval=seq(from=unique(LogLikeGen2012_4[log_like < cutoff_Gen2012_4][k_eval==min(k_eval), k_eval]), to=unique(LogLikeGen2012_4[log_like < cutoff_Gen2012_4][k_eval==max(k_eval), k_eval]), length.out = 2000), theta_eval=seq(from=unique(LogLikeGen2012_4[log_like < cutoff_Gen2012_4][theta_eval==min(theta_eval), theta_eval]), to=unique(LogLikeGen2012_4[log_like < cutoff_Gen2012_4][theta_eval==max(theta_eval), theta_eval]), length.out = 500)))
#LogLikeGen2012_4[, log_like := LL_kt_grid(k=k_eval, theta=theta_eval, Distances=par_data2012_4$Distances, Assignments=par_data2012_4$Assignments, Sampled_reefs=par_data2012_4$Sampled_reefs, Reef_sizes=par_data2012_4$Reef_sizes, Adult_sample_proportions=par_data2012_4$Adult_sample_proportions), by=.(k_eval, theta_eval)]   #write profile results
#fwrite(LogLikeGen2012_4, file="~/oceanography/script_output/KernelFits/profiles/LikelihoodProfileGenetic2012_4NarrowSearch.csv")

#LogLikeGenNEM <- setDT(expand.grid(k_eval=seq(from=unique(LogLikeGenNEM[log_like < cutoff_GenNEM][k_eval==min(k_eval), k_eval]), to=unique(LogLikeGenNEM[log_like < cutoff_GenNEM][k_eval==max(k_eval), k_eval]), length.out = 2000), theta_eval=seq(from=unique(LogLikeGenNEM[log_like < cutoff_GenNEM][theta_eval==min(theta_eval), theta_eval]), to=unique(LogLikeGenNEM[log_like < cutoff_GenNEM][theta_eval==max(theta_eval), theta_eval]), length.out = 500)))
#LogLikeGenNEM[, log_like := LL_kt_grid(k=k_eval, theta=theta_eval, Distances=par_dataNEM$Distances, Assignments=par_dataNEM$Assignments, Sampled_reefs=par_dataNEM$Sampled_reefs, Reef_sizes=par_dataNEM$Reef_sizes, Adult_sample_proportions=par_dataNEM$Adult_sample_proportions), by=.(k_eval, theta_eval)]   #write profile results
#fwrite(LogLikeGenNEM, file="~/oceanography/script_output/KernelFits/profiles/LikelihoodProfileGeneticNEMNarrowSearch.csv")

#LogLikeGenSWM <- setDT(expand.grid(k_eval=seq(from=unique(LogLikeGenSWM[log_like < cutoff_GenSWM][k_eval==min(k_eval), k_eval]), to=unique(LogLikeGenSWM[log_like < cutoff_GenSWM][k_eval==max(k_eval), k_eval]), length.out = 2000), theta_eval=seq(from=unique(LogLikeGenSWM[log_like < cutoff_GenSWM][theta_eval==min(theta_eval), theta_eval]), to=unique(LogLikeGenSWM[log_like < cutoff_GenSWM][theta_eval==max(theta_eval), theta_eval]), length.out = 500)))
#LogLikeGenSWM[, log_like := LL_kt_grid(k=k_eval, theta=theta_eval, Distances=par_dataSWM$Distances, Assignments=par_dataSWM$Assignments, Sampled_reefs=par_dataSWM$Sampled_reefs, Reef_sizes=par_dataSWM$Reef_sizes, Adult_sample_proportions=par_dataSWM$Adult_sample_proportions), by=.(k_eval, theta_eval)]   #write profile results
#fwrite(LogLikeGenSWM, file="~/oceanography/script_output/KernelFits/profiles/LikelihoodProfileGeneticSWMNarrowSearch.csv")
#


“no non-missing arguments to min; returning Inf”

In [8]:
SimKernels

time_scale,time_id,k,theta,mdd,med,dist90,log_like,data_source
annual,2012,-2.05116,0.94557,8.45,5.73,19.64,-50575.5,simulation
annual,2013,-1.53911,0.75976,7.63,4.67,18.34,-180189.17,simulation
annual,2014,-0.69775,0.63176,5.28,2.91,13.02,-194862.15,simulation
interannual,2012-4,-1.21595,0.70093,6.68,3.92,16.25,-593694.0,simulation
season,NEM,-1.26172,0.72426,6.45,3.85,15.62,-560379.23,simulation
season,SWM,-1.4255,0.6099,12.16,6.57,30.1,-28908.76,simulation


In [38]:
##lay out all the pieces
#sampled_reefs_vec <- as.matrix(SiteIndexBioPhys[site %in% SurveyData[, site], .(index)])
#pop_size_vec <- as.matrix(SurveyData[,.(avg_num_females=mean(num_females, na.rm = TRUE)), by=site][order(site)][, .(avg_num_females)]) #vector of pop sizes for all reefs (a). This term is also used in parentage kernel fitting, but reef sizes are substituted as a proxy for pop size. This is should be bootstrapped to account for uncertainty.
#BioPhysMat <- as.matrix(FullBiophysMatNorm[1:nrow(sampled_reefs_vec),]) #source normalized biophysical connectivity matrix. In Eqn. S3.4, this is m ajt/r a (*should it be r at? As in all particles released in time period t?)
#prop_samp_vec <- as.matrix(SurveyData[year == 2014,  .(prop_anem_samp)])#vector of proportion of habitat sampled for all reefs in time period t- I think that I should be coming up with a different value for this, but for now this will help me code the function
#unassigned_vec <- as.matrix(GenMat2012_4[nrow(GenMat2012_4),])#from genetic parentage data- a vector of the number of unassigned recruits at each destination reef in the system- we only have this for all sampled reefs.... what should the dimensions be?*
#Assignments <- GenMat2012_4[1:nrow(GenMat2012_4)-1,]

In [62]:
#lay out all the pieces
sampled_reefs_vec <- as.matrix(SiteIndexBioPhys[site %in% SurveyData[, site], .(index)]) #***I think these index values need to match dimensions of parentage matrix- filter out sand flats and other unsampled sites***
pop_size_vec <- as.matrix(SurveyData[,.(avg_num_females=mean(num_females, na.rm = TRUE)), by=site][order(site)][, .(avg_num_females)]) #vector of pop sizes for all reefs (a). This term is also used in parentage kernel fitting, but reef sizes are substituted as a proxy for pop size. This is should be bootstrapped to account for uncertainty.
#BioPhysMat <- as.matrix(FullBiophysMatNorm[1:nrow(sampled_reefs_vec),]) #source normalized biophysical connectivity matrix. In Eqn. S3.4, this is m ajt/r a (*should it be r at? As in all particles released in time period t?)
prop_samp_vec <- as.matrix(SurveyData[year == 2014,  .(prop_anem_samp)])#vector of proportion of habitat sampled for all reefs in time period t- I think that I should be coming up with a different value for this, but for now this will help me code the function
#unassigned_vec <- as.matrix(GenMat2012_4[nrow(GenMat2012_4),])#from genetic parentage data- a vector of the number of unassigned recruits at each destination reef in the system- we only have this for all sampled reefs.... what should the dimensions be?*
#Assignments <- GenMat2012_4[1:nrow(GenMat2012_4)-1,]

Data2012_4 <- list(BioPhysMat=as.matrix(FullBiophysMatNorm[1:nrow(sampled_reefs_vec),]),
                    Assignments=GenMat2012_4[1:nrow(GenMat2012_4)-1,], 
                   pop_size_vec=as.matrix(SurveyData[,.(avg_num_females=mean(num_females, na.rm = TRUE)), by=site][order(site)][, .(avg_num_females)]), 
                   sampled_reefs_vec=as.matrix(SiteIndexBioPhys[site %in% SurveyData[, site], .(index)]),
                   prop_samp_vec=as.matrix(SurveyData[year == 2014,  .(prop_anem_samp)]), 
                   unassigned_vec=as.matrix(GenMat2012_4[nrow(GenMat2012_4),]))

Data2012 <- list(BioPhysMat=as.matrix(FullBiophysMatNorm[1:nrow(sampled_reefs_vec),]),
                    Assignments=GenMat2012[1:nrow(GenMat2012)-1,], 
                   pop_size_vec=as.matrix(SurveyData[,.(avg_num_females=mean(num_females, na.rm = TRUE)), by=site][order(site)][, .(avg_num_females)]), 
                   sampled_reefs_vec=as.matrix(SiteIndexBioPhys[site %in% SurveyData[, site], .(index)]),
                   prop_samp_vec=as.matrix(SurveyData[year == 2012,  .(prop_anem_samp)]), 
                   unassigned_vec=as.matrix(GenMat2012[nrow(GenMat2012),]))
Data2013 <- list(BioPhysMat=as.matrix(FullBiophysMatNorm[1:nrow(sampled_reefs_vec),]),
                    Assignments=GenMat2013[1:nrow(GenMat2013)-1,], 
                   pop_size_vec=as.matrix(SurveyData[,.(avg_num_females=mean(num_females, na.rm = TRUE)), by=site][order(site)][, .(avg_num_females)]), 
                   sampled_reefs_vec=as.matrix(SiteIndexBioPhys[site %in% SurveyData[, site], .(index)]),
                   prop_samp_vec=as.matrix(SurveyData[year == 2013,  .(prop_anem_samp)]), 
                   unassigned_vec=as.matrix(GenMat2013[nrow(GenMat2013),]))
Data2014 <- list(BioPhysMat=as.matrix(FullBiophysMatNorm[1:nrow(sampled_reefs_vec),]),
                    Assignments=GenMat2014[1:nrow(GenMat2014)-1,], 
                   pop_size_vec=as.matrix(SurveyData[,.(avg_num_females=mean(num_females, na.rm = TRUE)), by=site][order(site)][, .(avg_num_females)]), 
                   sampled_reefs_vec=as.matrix(SiteIndexBioPhys[site %in% SurveyData[, site], .(index)]),
                   prop_samp_vec=as.matrix(SurveyData[year == 2014,  .(prop_anem_samp)]), 
                   unassigned_vec=as.matrix(GenMat2014[nrow(GenMat2014),]))

#neg_LL_biophys(Data2012)+neg_LL_biophys(Data2013)+neg_LL_biophys(Data2014)

 DataNEM <- list(BioPhysMat=as.matrix(FullBiophysMatNorm[1:nrow(sampled_reefs_vec),]),
                    Assignments=GenMatNEM[1:nrow(GenMatNEM)-1,], 
                   pop_size_vec=as.matrix(SurveyData[,.(avg_num_females=mean(num_females, na.rm = TRUE)), by=site][order(site)][, .(avg_num_females)]), 
                   sampled_reefs_vec=as.matrix(SiteIndexBioPhys[site %in% SurveyData[, site], .(index)]),
                   prop_samp_vec=as.matrix(SurveyData[year == 2014,  .(prop_anem_samp)]), 
                   unassigned_vec=as.matrix(GenMatNEM[nrow(GenMatNEM),]))

 DataSWM <- list(BioPhysMat=as.matrix(FullBiophysMatNorm[1:nrow(sampled_reefs_vec),]),
                    Assignments=GenMatSWM[1:nrow(GenMatSWM)-1,], 
                   pop_size_vec=as.matrix(SurveyData[,.(avg_num_females=mean(num_females, na.rm = TRUE)), by=site][order(site)][, .(avg_num_females)]), 
                   sampled_reefs_vec=as.matrix(SiteIndexBioPhys[site %in% SurveyData[, site], .(index)]),
                   prop_samp_vec=as.matrix(SurveyData[year == 2014,  .(prop_anem_samp)]), 
                   unassigned_vec=as.matrix(GenMatSWM[nrow(GenMatSWM),]))
#neg_LL_biophys(DataNEM)+neg_LL_biophys( DataSWM)

#Data2012NEM <- list(BioPhysMat=as.matrix(FullBiophysMatNorm[1:nrow(sampled_reefs_vec),]),
#                    Assignments=GenMat2012[1:nrow(GenMat2012)-1,], 
#                   pop_size_vec=as.matrix(SurveyData[,.(avg_num_females=mean(num_females, na.rm = TRUE)), by=site][order(site)][, .(avg_num_females)]), 
#                   sampled_reefs_vec=as.matrix(SiteIndexBioPhys[site %in% SurveyData[, site], .(index)]),
#                   prop_samp_vec=as.matrix(SurveyData[year == 2012,  .(prop_anem_samp)]), 
#                   unassigned_vec=as.matrix(GenMat2012[nrow(GenMat2012),]))
#Data2013NEM <- list(BioPhysMat=as.matrix(FullBiophysMatNorm[1:nrow(sampled_reefs_vec),]),
#                    Assignments=GenMat2013[1:nrow(GenMat2013)-1,], 
#                   pop_size_vec=as.matrix(SurveyData[,.(avg_num_females=mean(num_females, na.rm = TRUE)), by=site][order(site)][, .(avg_num_females)]), 
#                   sampled_reefs_vec=as.matrix(SiteIndexBioPhys[site %in% SurveyData[, site], .(index)]),
#                   prop_samp_vec=as.matrix(SurveyData[year == 2013,  .(prop_anem_samp)]), 
#                   unassigned_vec=as.matrix(GenMat2013[nrow(GenMat2013),]))
#Data2014NEM <- list(BioPhysMat=as.matrix(FullBiophysMatNorm[1:nrow(sampled_reefs_vec),]),
#                    Assignments=GenMat2014[1:nrow(GenMat2014)-1,], 
#                   pop_size_vec=as.matrix(SurveyData[,.(avg_num_females=mean(num_females, na.rm = TRUE)), by=site][order(site)][, .(avg_num_females)]), 
#                   sampled_reefs_vec=as.matrix(SiteIndexBioPhys[site %in% SurveyData[, site], .(index)]),
#                   prop_samp_vec=as.matrix(SurveyData[year == 2014,  .(prop_anem_samp)]), 
#                   unassigned_vec=as.matrix(GenMat2014[nrow(GenMat2014),]))
#Data2012SWM <- list(BioPhysMat=as.matrix(FullBiophysMatNorm[1:nrow(sampled_reefs_vec),]),
#                    Assignments=GenMat2012[1:nrow(GenMat2012)-1,], 
#                   pop_size_vec=as.matrix(SurveyData[,.(avg_num_females=mean(num_females, na.rm = TRUE)), by=site][order(site)][, .(avg_num_females)]), 
#                   sampled_reefs_vec=as.matrix(SiteIndexBioPhys[site %in% SurveyData[, site], .(index)]),
#                   prop_samp_vec=as.matrix(SurveyData[year == 2012,  .(prop_anem_samp)]), 
#                   unassigned_vec=as.matrix(GenMat2012[nrow(GenMat2012),]))
#Data2013SWM <- list(BioPhysMat=as.matrix(FullBiophysMatNorm[1:nrow(sampled_reefs_vec),]),
#                    Assignments=GenMat2013[1:nrow(GenMat2013)-1,], 
#                   pop_size_vec=as.matrix(SurveyData[,.(avg_num_females=mean(num_females, na.rm = TRUE)), by=site][order(site)][, .(avg_num_females)]), 
#                   sampled_reefs_vec=as.matrix(SiteIndexBioPhys[site %in% SurveyData[, site], .(index)]),
#                   prop_samp_vec=as.matrix(SurveyData[year == 2013,  .(prop_anem_samp)]), 
#                   unassigned_vec=as.matrix(GenMat2013[nrow(GenMat2013),]))
#Data2014SWM <- list(BioPhysMat=as.matrix(FullBiophysMatNorm[1:nrow(sampled_reefs_vec),]),
#                    Assignments=GenMat2014[1:nrow(GenMat2014)-1,], 
#                   pop_size_vec=as.matrix(SurveyData[,.(avg_num_females=mean(num_females, na.rm = TRUE)), by=site][order(site)][, .(avg_num_females)]), 
#                   sampled_reefs_vec=as.matrix(SiteIndexBioPhys[site %in% SurveyData[, site], .(index)]),
#                   prop_samp_vec=as.matrix(SurveyData[year == 2014,  .(prop_anem_samp)]), 
#                   unassigned_vec=as.matrix(GenMat2014[nrow(GenMat2014),]))
#calculate likelihood of our observed data given the biophysical simulation data- we want the highest likelihood, which is the smallest negative log likelihood
data.table(time_frame= c("interannual", "annual", "avg_monsoonal"), neg_LL=c(neg_LL_biophys(Data2012_4), neg_LL_biophys(Data2012) + neg_LL_biophys(Data2013) + neg_LL_biophys(Data2014), neg_LL_biophys(DataNEM) + neg_LL_biophys(DataSWM)))[order(-neg_LL)]






time_frame,neg_LL
avg_monsoonal,-18818.94
annual,-24381.73
interannual,-28545.95
