In [1]:
Packages <- c("dplyr",  "nleqslv", "broom","cubature", "geosphere", "data.table",  "ggplot2", "bbmle", "stringr",  "lubridate", "RColorBrewer")

invisible(suppressPackageStartupMessages(lapply(Packages, library, character.only = TRUE)))

setwd('/local/home/katrinac/oceanography')
"%!in%" <- function(x,table) match(x,table, nomatch = 0) == 0
source("~/parentage/kernel_fitting/1340_loci/functions/ll_kt_both_bbmle.R")
#source("~/oceanography/scripts/PredictedProportions.R")

#read in the kernel fitting summary
kernels <- fread(file="~/parentage/kernel_fitting/1340_loci/final_results/tables/kernel_fitting_summary.csv")
kernel2012_14 <- fread(file="~/oceanography/empirical_data/genetics/GenKernelsForROMSComp2012-14.csv")

#read in the centroids adjusted for the simulation, so the Magbangons combined 
#centroids <- fread(file="~/oceanography/script_output/SurveyData/SimulationCentroids.csv")
Centroids <- fread(file="~/oceanography/empirical_data/site_centroids_SimTest.csv")
setorder(Centroids, site)
#read in the table with number of recruits sampled at each site for each year
AnnualRecsSamp <- fread(file="~/oceanography/script_output/SurveyData/AnnualRecruitsSampled.csv")
#read in the table of the proportion of anemones sampled at each site for each year
PropSamp <- fread(file="~/oceanography/script_output/SurveyData/ProportionHabitatSampled.csv")
setnames(PropSamp, c("PropAnemSamp", "TotalAnems"), c("prop_anem_samp", "total_anems"))
#read in the ROMS simulation connectivity table with metadata, not yet subsetted (*but check this)
SimConn <- fread(file="~/oceanography/script_output/ROMSDataTables/SimConnectivityTableWithMetaLongForm.csv")

#add in the numbers of particles seeded at each site
SeededParticles <- fread("~/oceanography/ROMS/data/Particles_Per_Release_Site_Renamed.csv")
setnames(SeededParticles,c("source", "daily_particles_released")) 
#DateJoin <- SeededParticles[DateJoin, on="source"][, particles_released_daily := as.numeric(particles_released_daily)] 

#make vectors defining sites we didn't sample, but that are in the model, and the sandflats specifically 
unsampled_sites <- c("SF1", "SF2", "SF3", "SF4", "SF5", "SF6", "Pangasugan", "Other", "CAI") 
sand_flats <- c("SF1", "SF2", "SF3", "SF4", "SF5", "SF6") 
unrealistic_sources <- c("SF1", "SF2", "SF3", "SF4", "SF5", "SF6", "Pangasugan") 
#make the constant inputs for the kernel fitting function
#distance matrix using the centroids with combined Magbangon
### List of source locations
SitesSource <- Centroids

### List of destination locations
SitesDest <- Centroids

DistMatm <- distm(SitesSource[,c('lon','lat')], SitesSource[,c('lon','lat')], fun=distVincentyEllipsoid)
Distances <- DistMatm*10^-3
#read in the reef areas for the kernel fitting
Area <- fread("~/oceanography/empirical_data/site_area_header_nonsurveyed_simulation_kernels_test.csv") %>%
    arrange(site) %>%
    filter(site %!in% c("near_north_full1", "near_north_full2", "near_north_full3", "near_south_full1", "near_south_full2", "near_south_full3")) %>%
    mutate(kmsq=msq*10^-6)# %>%
    #select(kmsq) #need to uncomment for functions to work
setorder(Area, site)
reef_sizes <- as.matrix(Area$kmsq)

#make a site index table, use this for Sampled_reefs input in kernel fitting
SiteIndex <- unique(Centroids, by="site")[, "site"][, index := .I] #add the row number as the unique site index

#make a table with the survey information for each site (how many fish sampled, prop anems sampled, total number of anems at site)
SurveyData <- AnnualRecsSamp[PropSamp, on=.(year=end_year, site)][#join the sampling tables together
    is.na(n_offs_gen), n_offs_gen := 0][#change NA's to 0
    ,-"time_frame"]#drop the time_frame column, we can key with end_year
#setnames(SurveyData, c("PropAnemSamp", "TotalAnems"), c("prop_anem_samp", "total_anems"))
#setkey(SurveyData, site)
#check all sites are represented in centroids and area (and indirectly distances, which comes from centroids)
#Area[site %!in% centroids$site] #should be nothing

#Allison's abundance time series data 
#download.file(url = "https://github.com/pinskylab/Clownfish_persistence/blob/master/Data/Script_outputs/females_df_F.RData?raw=true", destfile = "~/oceanography/empirical_data/genetics/females_df_F.RData")
load("~/oceanography/empirical_data/genetics/females_df_F.RData")
Abundance <- as.data.table(females_df_F)
setnames(Abundance, "nF", "num_females")
#join the survey sampling tables together
SurveyData <- AnnualRecsSamp[PropSamp, on=.(year=end_year, site)][
    is.na(n_offs_gen), n_offs_gen := 0][#change NA's to 0
    ,-"time_frame"]#drop the time_frame

SurveyData <- Abundance[, c("year", "site", "num_females")][SurveyData, on=.(year, site)]#join in Allison's estimate of female abundance. There are NA values, but that's okay we can figure those out when we start thinking about incorporating uncertainty in this


In [2]:
head(SimConn)
head(SurveyData)

date,source,dest,sim_month,sim_day,sim_year,year_sampled,sim_monsoon,daily_particles_rec,daily_particles_released,dist_km,bearing,direction,particle_id
2010-10-01,Other,Palanas,10,10,2010,2011,SWM,9,4497728,,,,P1
2010-10-01,Other,Palanas,10,10,2010,2011,SWM,9,4497728,,,,P2
2010-10-01,Other,Palanas,10,10,2010,2011,SWM,9,4497728,,,,P3
2010-10-01,Other,Palanas,10,10,2010,2011,SWM,9,4497728,,,,P4
2010-10-01,Other,Palanas,10,10,2010,2011,SWM,9,4497728,,,,P5
2010-10-01,Other,Palanas,10,10,2010,2011,SWM,9,4497728,,,,P6


year,site,num_females,n_offs_gen,prop_anem_samp,total_anems
2012,Cabatoan,8.0,4,0.4230769,26
2012,Caridad Cemetery,,0,0.0,4
2012,Caridad Proper,,0,0.0,4
2012,Elementary School,,0,0.0,7
2012,Gabas,,0,0.0,9
2012,Haina,,0,0.0,104


In [14]:
#SourceJoin <- SurveyData[SimConn, on = .(site = source, year=year_sampled)]
setnames(SourceJoin, skip_absent=TRUE, c("site", "n_offs_gen", "prop_anem_samp", "total_anems", "num_females"), c("source", "source_num_rec_sampled_annual",  "source_prop_anem_samp", "source_total_anems", "source_num_females"))


In [15]:
DestJoin <- SurveyData[SourceJoin, on = .(site = dest, year)]
setnames(DestJoin, skip_absent=TRUE, c("site", "n_offs_gen", "prop_anem_samp", "total_anems", "num_females"), c("dest", "dest_num_rec_sampled_annual",  "dest_prop_samp", "dest_total_anems", "dest_num_females"))


In [16]:
str(DestJoin)

Classes ‘data.table’ and 'data.frame':	212791371 obs. of  22 variables:
 $ year                         : num  2011 2011 2011 2011 2011 ...
 $ dest                         : chr  "Palanas" "Palanas" "Palanas" "Palanas" ...
 $ dest_num_females             : num  NA NA NA NA NA NA NA NA NA NA ...
 $ dest_num_rec_sampled_annual  : int  NA NA NA NA NA NA NA NA NA NA ...
 $ dest_prop_samp               : num  NA NA NA NA NA NA NA NA NA NA ...
 $ dest_total_anems             : int  NA NA NA NA NA NA NA NA NA NA ...
 $ source                       : chr  "Other" "Other" "Other" "Other" ...
 $ num_source_females           : num  NA NA NA NA NA NA NA NA NA NA ...
 $ source_num_rec_sampled_annual: int  NA NA NA NA NA NA NA NA NA NA ...
 $ source_prop_anem_samp        : num  NA NA NA NA NA NA NA NA NA NA ...
 $ source_total_anems           : int  NA NA NA NA NA NA NA NA NA NA ...
 $ date                         : chr  "2010-10-01" "2010-10-01" "2010-10-01" "2010-10-01" ...
 $ sim_month           

In [20]:
#make a table of the dates of release for simulations, for calculating the number of particles released in each time frame
season1 <- data.table(date=seq(as.Date("2010-10-01"), as.Date("2011-05-31"), by="days"))

season2 <- data.table(date=seq(as.Date("2011-10-01"), as.Date("2012-05-31"), by="days"))

season3 <- data.table(date=seq(as.Date("2012-10-01"), as.Date("2013-05-31"), by="days"))

season4 <- data.table(date=seq(as.Date("2013-10-01"), as.Date("2014-04-18"), by="days"))

AllDates <- rbind(season1, season2, season3, season4)

#mark the monsoon seasons, based on the same criteria I used for the parentage indirectly through the growth estimates
NEM <- c(11, 12, 1, 2, 3, 4, 5, 6)
SWM <- c(7, 8, 9, 10)

AllDates[,date := ymd(date)][ #format as ymd
            , sim_monsoon := ifelse(month(date) %in% NEM, "NEM", "SWM")][#mark monsoon season based on month
            , sim_year:=year(date)][#add year column
            ,year_sampled:= ifelse(date %in% season1$date, 2011, ifelse(date %in% season2$date, 2012, ifelse(date %in% season3$date, 2013, 2014)))]#and then add a year_sampled for the empircal sampling season of that particle

ReleaseDays <- AllDates[, .(num_release_days_seasonal=.N), by=c("year_sampled", "sim_monsoon")][, num_release_days_annual:= sum(num_release_days_seasonal), by=year_sampled]

total_release_days <- AllDates[year_sampled %in% c(2012, 2013, 2014), .N]#for the all year kernel- how many days of the simulation conincide with our particle sampling?
total_release_days #should be 687


In [21]:
head(DestJoin)
head(ReleaseDays)

year,dest,dest_num_females,dest_num_rec_sampled_annual,dest_prop_samp,dest_total_anems,source,num_source_females,source_num_rec_sampled_annual,source_prop_anem_samp,...,sim_month,sim_day,sim_year,sim_monsoon,daily_particles_rec,daily_particles_released,dist_km,bearing,direction,particle_id
2011,Palanas,,,,,Other,,,,...,10,10,2010,SWM,9,4497728,,,,P1
2011,Palanas,,,,,Other,,,,...,10,10,2010,SWM,9,4497728,,,,P2
2011,Palanas,,,,,Other,,,,...,10,10,2010,SWM,9,4497728,,,,P3
2011,Palanas,,,,,Other,,,,...,10,10,2010,SWM,9,4497728,,,,P4
2011,Palanas,,,,,Other,,,,...,10,10,2010,SWM,9,4497728,,,,P5
2011,Palanas,,,,,Other,,,,...,10,10,2010,SWM,9,4497728,,,,P6


year_sampled,sim_monsoon,num_release_days_seasonal,num_release_days_annual
2011,SWM,31,243
2011,NEM,212,243
2012,SWM,31,244
2012,NEM,213,244
2013,SWM,31,243
2013,NEM,212,243


In [36]:
unrealistic_sources

In [40]:
#prep biophysical connectivity matrix
#outside of the loop, trim this to only be the destinations we sampled

SimConn <- DestJoin[source %!in% unrealistic_sources & dest %!in% unrealistic_sources & year %in% c(2012, 2013, 2014)][#sand flats and Pangasugan are not realistic source or destination sites because there's almost no habitat. Safe to drop, but keep the rest of the possibilities so we can subsample iteratively all possibilities.
    , daily_particles_released := as.numeric(daily_particles_released)] #change from integer to numeric
SimConn <- ReleaseDays[SimConn, on=.(year_sampled=year, sim_monsoon)]#join in the info for number of release days in the time frame
SimConn <- kernels[Year %in% c("2012", "2013", "2014")][, year:=as.integer(Year)][,c("year", "NumParentageMatches")][SimConn, on=.(year=year_sampled)]#add in a column for the observed number of parentage matches
#rename the monsoon column in the full table for consistency
setnames(SimConn, c("sim_monsoon", "NumParentageMatches"), c("monsoon", "num_route_parentage_matches")) #get rid of upper case and inconsistent naming

#each year will require a different set of survey data, so make a list of each and index by site for fast look up
SampledTable <- SurveyData[prop_anem_samp >0, c("year", "site")]#previously named PropSampTable

#make sure all sampled sites are represented when joining the survey data to the sampled simulation- this chunk has the tables to add to a subsampled particle table. no need for the full
SampTable <- rbind(SurveyData[prop_anem_samp >0 & year %in% c(2012, 2013, 2014), c("year", "site")][, .(source=site, dest=site, year=year)][ #will join to the simulated sampling table by source and dest, so make those each a column from site and preserve the year variable as a key
    , c("year", "source", "dest")][, monsoon := "NEM"], SurveyData[prop_anem_samp >0 & year %in% c(2012, 2013, 2014), c("year", "site")][, .(source=site, dest=site, year=year)][ #will join to the simulated sampling table by source and dest, so make those each a column from site and preserve the year variable as a key
    , c("year", "source", "dest")][, monsoon := "SWM"])
UnqSurvey <- unique(SampTable, by=c("source", "dest", "year", "monsoon"))#add in the diff Monsoon seasons so there are complete parentage matrices later
AddDest <- rbind(SurveyData[year %in% c(2012, 2013, 2014) & prop_anem_samp >0][, c("year", "site")][, monsoon := "NEM"], 
                  SurveyData[year %in% c(2012, 2013, 2014) & prop_anem_samp >0][ , c("year", "site")][, monsoon := "SWM"])  #what destinations were sampled, for use with unassigned table
setcolorder(SimConn, c("particle_id", "source", "dest", "year", "monsoon", "date"))

#at this point, we can make the raw number assignment matrix, but we want to make a normalized version that is num assigned from a source to a destination/ num released from that source


In [None]:
#First, figure out how to get normalized conn values for all time frames
#then, test kernel fitting function on full matrix      
#last, start testing likelihood function build

In [33]:
#at this point, we can make the raw number assignment matrix, but we want to make a normalized version that is num assigned from a source to a destination/ num released from that source
AllYearsRec <- SimConn[, .(total_particles_rec = .N), by= c("source", "dest")] #all particles recruiting along each route FILTER HERE FOR TIME PERIOD***

AllYearsRelease <- unique(SimConn, by=c("source", "dest"))[, .(total_particles_released = ), by= c("source", "dest")] 
# sum the total number of particles recruiting for each source/dest, each row is a particle so just count them. if we were looking at smaller time scale that time id would be included as i
                , total_source_normalized_recruitment := total_particles_rec/daily_particles_released*total_release_days, by= c("source", "dest")]
    

                                                                                               #, total_source_normalized_recruitment := sum(total_particles_released)*uniqueN(SimConn, by = c( "date")), by=c("source", "dest")]

In [35]:
tail(test)

particle_id,source,dest,year,monsoon,date,num_route_parentage_matches,num_release_days_seasonal,num_release_days_annual,dest_num_females,...,sim_month,sim_day,sim_year,daily_particles_rec,daily_particles_released,dist_km,bearing,direction,total_particles_rec,total_source_normalized_recruitment
P212791366,Palanas,SF1,2014,NEM,2014-04-18,13,169,200,,...,4,4,2014,42,992,1.318289,157.935,157.935,3250,2250.756
P212791367,Palanas,SF1,2014,NEM,2014-04-18,13,169,200,,...,4,4,2014,42,992,1.318289,157.935,157.935,3250,2250.756
P212791368,Palanas,SF1,2014,NEM,2014-04-18,13,169,200,,...,4,4,2014,42,992,1.318289,157.935,157.935,3250,2250.756
P212791369,Palanas,SF1,2014,NEM,2014-04-18,13,169,200,,...,4,4,2014,42,992,1.318289,157.935,157.935,3250,2250.756
P212791370,Palanas,SF1,2014,NEM,2014-04-18,13,169,200,,...,4,4,2014,42,992,1.318289,157.935,157.935,3250,2250.756
P212791371,Palanas,SF1,2014,NEM,2014-04-18,13,169,200,,...,4,4,2014,42,992,1.318289,157.935,157.935,3250,2250.756


In [25]:
tail(SimConn)

year,num_parentage_matches,source,particles_released_daily,date,surv_weight,dest,dest_n_rec_annual,dest_prop_samp,dest_total_anems,...,sim_month,sim_day,sim_year,monsoon,daily_particles_rec,particle_id,source_size,total_particles_rec,total_particles_released,test
2014,13,Wangag,992,2014-03-31,0.7844995,Caridad Cemetery,0,0.4166667,12,...,3,3,2014,NEM,15,P209407030,0.2637462,2047,280736,283
2014,13,Wangag,992,2014-03-31,0.7844995,Caridad Cemetery,0,0.4166667,12,...,3,3,2014,NEM,15,P209407031,0.2637462,2047,280736,283
2014,13,Wangag,992,2014-03-31,0.7844995,Caridad Cemetery,0,0.4166667,12,...,3,3,2014,NEM,15,P209407032,0.2637462,2047,280736,283
2014,13,Wangag,992,2014-03-31,0.7844995,Caridad Cemetery,0,0.4166667,12,...,3,3,2014,NEM,15,P209407033,0.2637462,2047,280736,283
2014,13,Wangag,992,2014-03-31,0.7844995,Caridad Cemetery,0,0.4166667,12,...,3,3,2014,NEM,15,P209407034,0.2637462,2047,280736,283
2014,13,Wangag,992,2014-03-31,0.7844995,Caridad Proper,0,1.0,4,...,3,3,2014,NEM,1,P209407035,0.2637462,2132,280736,283


In [None]:
sum(unqiue(SimConn[source=="Wangag" & dest== "Caridad Cemetery"])[, daily_particles_rec])

In [21]:
992*728

In [None]:
138880 

In [31]:
unique(SimConn, by = c("source", "dest", "date"))[ #Unique() is important because the table is long form, so repeat rows for source/dest/date combos for each particle!
    , total_particles_rec := sum(daily_particles_rec), by= c("source", "dest")][# sum the total number of particles recruiting for each source/dest 
    , c("source", "dest", "total_particles_rec")]

source,dest,total_particles_rec
Other,Palanas,23438
Other,Wangag,20880
Other,Magbangon,19848
Other,Cabatoan,19795
Other,Caridad Proper,25716
Other,Sitio Tugas,13504
Other,Elementary School,24569
Other,Sitio Lonas,25630
Other,San Agustin,15292
Other,Visca,9529


In [30]:
head(SimConn)


year,num_parentage_matches,source,particles_released_daily,date,surv_weight,dest,dest_n_rec_annual,dest_prop_samp,dest_total_anems,...,source_prop_samp,source_total_anems,sim_month,sim_day,sim_year,monsoon,daily_particles_rec,particle_id,source_size,total_source_normalized_recruitment
2011,,Other,4497728,2010-10-01,0.003292181,Palanas,,,,...,,,10,10,2010,SWM,9,P1,,4.728255e-05
2011,,Other,4497728,2010-10-01,0.003292181,Palanas,,,,...,,,10,10,2010,SWM,9,P2,,4.728255e-05
2011,,Other,4497728,2010-10-01,0.003292181,Palanas,,,,...,,,10,10,2010,SWM,9,P3,,4.728255e-05
2011,,Other,4497728,2010-10-01,0.003292181,Palanas,,,,...,,,10,10,2010,SWM,9,P4,,4.728255e-05
2011,,Other,4497728,2010-10-01,0.003292181,Palanas,,,,...,,,10,10,2010,SWM,9,P5,,4.728255e-05
2011,,Other,4497728,2010-10-01,0.003292181,Palanas,,,,...,,,10,10,2010,SWM,9,P6,,4.728255e-05


In [18]:
head(SeededParticles)

source,particles_released_daily
Palanas,992
Wangag,992
Magbangon,992
Cabatoan,992
Caridad Cemetery,992
Caridad Proper,992


In [20]:
uniqueN(SimConn, by = c( "date"))

In [22]:
str(SimConn)

Classes ‘data.table’ and 'data.frame':	156546356 obs. of  21 variables:
 $ year                               : int  2011 2011 2011 2011 2011 2011 2011 2011 2011 2011 ...
 $ num_parentage_matches              : int  NA NA NA NA NA NA NA NA NA NA ...
 $ source                             : chr  "Other" "Other" "Other" "Other" ...
 $ particles_released_daily           : int  4497728 4497728 4497728 4497728 4497728 4497728 4497728 4497728 4497728 4497728 ...
 $ date                               : chr  "2010-10-01" "2010-10-01" "2010-10-01" "2010-10-01" ...
 $ surv_weight                        : num  0.00329 0.00329 0.00329 0.00329 0.00329 ...
 $ dest                               : chr  "Palanas" "Palanas" "Palanas" "Palanas" ...
 $ dest_n_rec_annual                  : int  NA NA NA NA NA NA NA NA NA NA ...
 $ dest_prop_samp                     : num  NA NA NA NA NA NA NA NA NA NA ...
 $ dest_total_anems                   : int  NA NA NA NA NA NA NA NA NA NA ...
 $ source_n_rec_annual  

In [14]:
#make a parentage matrix for the whole biophysical results
FullBiophysMat <- as.matrix(rbind(dcast(SimConn[source != "Other", .(source, dest)][ #for assigned particles (not from "Other") keep the source/dest columns that will be expanded into wide form to become the connectivity matrix. Filtering for time period etc can be done in i here.
    , parentage :=1][ #mark each row as a parentage match, because at this point I'm using all particles as matches for the simulations
    order(source, dest)] #keep sites in alphabetical order so the matrix is correctly formatted!
        , source ~ dest, value.var="parentage", fun.aggregate = sum)[#use sum to count the matches for each id variable combo, that populated the cells of the matrix
    ,-"source"], #remove the source column after casting
      dcast(SimConn[source == "Other" , .(source, dest)][ #this is to cast the "unassigned row for the model parentage, which is anyting from "Other"
          , parentage :=1][order(source, dest)][, source := "unknown"], source ~ dest, value.var="parentage", fun.aggregate = sum)[,-"source"]))#bind these two cast wide form data tables (assigned and unassigned particles) and then turn into a matrix to be used in the likelihood functions
dim(FullBiophysMat)

In [15]:
FullBiophysMat

CAI,Cabatoan,Caridad Cemetery,Caridad Proper,Elementary School,Gabas,Haina,Hicgop South,Magbangon,Palanas,Poroc Rose,Poroc San Flower,San Agustin,Sitio Baybayon,Sitio Lonas,Sitio Tugas,Tamakin Dacot,Visca,Wangag
127034095,26,0,0,0,0,9,0,43,32,0,0,0,15,0,0,3,0,10
4423,1199,990,1723,219,0,48,922,871,96,69,122,330,1154,286,333,55,1,290
4018,687,2624,2767,129,8,12,273,736,66,48,121,446,862,613,322,1,0,77
3359,525,1028,1634,46,0,8,316,361,4,53,134,540,563,548,279,4,2,48
483,257,824,1808,1827,1,32,4880,239,1,180,280,745,175,712,2258,8,1,8
14,43,119,506,1747,19400,155,955,7,2,776,1181,6813,4,9410,1468,120,10368,0
19,10,22,31,17,3,4158,10,2,0,0,2,63,43,18,16,2323,56,4
2239,681,2285,4250,3638,50,434,5549,471,8,605,1052,1716,1040,1971,2138,245,73,57
2832,960,1826,2589,140,0,46,805,672,278,41,81,499,2244,211,233,23,13,725
3188,675,613,787,4,0,0,104,803,481,10,1,38,3136,1,54,0,0,481


In [4]:
#make a full SOURCE normalized parentage matrix for the whole biophysical results
#make a parentage matrix for the whole biophysical results
FullBiophysMatNorm <- as.matrix(rbind(dcast(SimConn[source != "Other", .(source, dest, total_source_normalized_recruitment)][ #for assigned particles (not from "Other") keep the source/dest columns that will be expanded into wide form to become the connectivity matrix. Filtering for time period etc can be done in i here.
    order(source, dest)] #keep sites in alphabetical order so the matrix is correctly formatted!
        , source ~ dest, value.var="total_source_normalized_recruitment", fun.aggregate = sum)[#use sum to count the matches for each id variable combo, that populated the cells of the matrix
    ,-"source"], #remove the source column after casting
      dcast(SimConn[source == "Other" , .(source, dest, total_source_normalized_recruitment)][ #this is to cast the "unassigned row for the model parentage, which is anyting from "Other"
          order(source, dest)][, source := "unknown"], source ~ dest, value.var="total_source_normalized_recruitment", fun.aggregate = sum)[,-"source"]))#bind these two cast wide form data tables (assigned and unassigned particles) and then turn into a matrix to be used in the likelihood functions
dim(FullBiophysMatNorm )

In [7]:
summary(SimConn$total_source_normalized_recruitment) #nothing should be above one

    Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
0.000004 0.688680 0.689049 0.584106 0.689936 0.778888 

In [None]:
#next test likelihood function and kernel fitting with this matrix

In [10]:
FullBiophysMatNorm

CAI,Cabatoan,Caridad Cemetery,Caridad Proper,Elementary School,Gabas,Haina,Hicgop South,Magbangon,Palanas,Poroc Rose,Poroc San Flower,San Agustin,Sitio Baybayon,Sitio Lonas,Sitio Tugas,Tamakin Dacot,Visca,Wangag
90679230.0,0.0005748802,0.0,0.0,0.0,0.0,0.0002631494,0.0,0.00266793,0.002242842,0.0,0.0,0.0,0.0001416958,0.0,0.0,2.024226e-05,0.0,8.096904e-05
1386.648,67.91633,28.4314516,69.0675403,14.39214,0.0,0.4758065,126.3608871,89.65423,0.804435484,0.69455645,3.754032,9.87298387,51.50605,11.07258,10.54737903,0.9425403,0.001008065,6.052419
1562.052,23.2006,273.5504032,143.6905242,0.9183468,0.03830645,0.05846774,2.79737903,33.79637,1.030241935,0.14919355,1.248992,11.31048387,27.33266,31.51512,9.50201613,0.001008065,0.0,0.71875
1009.007,20.19052,81.9979839,68.233871,0.2177419,0.0,0.02217742,2.57056452,7.99496,0.006048387,0.19455645,1.516129,20.80443548,13.13206,23.50806,5.4203629,0.01008065,0.002016129,0.3165323
126.2349,6.668347,50.9455645,67.0745968,91.54536,0.001008065,0.1471774,191.1391129,17.56956,0.001008065,1.55443548,4.65121,13.73689516,3.386089,34.87097,80.77822581,0.02620968,0.001008065,0.01612903
0.1975806,0.7872984,4.1844758,30.5705645,99.42641,3874.252,1.684476,38.17641129,0.02721774,0.004032258,24.76008065,31.12601,456.66633065,0.008064516,1276.996,70.49596774,1.727823,946.8972,0.0
0.07106855,0.03427419,0.2227823,0.3049395,0.06905242,0.002520161,132.629,0.02620968,0.002016129,0.0,0.0,0.002016129,0.60735887,0.9319556,0.06350806,0.03024194,54.42188,0.1360887,0.008064516
339.0489,9.933972,70.1517137,145.3387097,397.5423,0.1381048,7.210685,142.39969758,5.129536,0.01108871,2.98840726,8.670363,15.91129032,41.60383,27.46724,33.17540323,1.151714,0.2444556,0.3714718
835.1956,22.85282,62.6633065,167.8457661,2.372984,0.0,1.21371,40.10383065,13.0746,4.385080645,0.14818548,0.7933468,42.37600806,107.4617,9.93246,4.69052419,0.2268145,0.04737903,25.70867
1311.524,58.96673,20.0675403,50.6804435,0.006048387,0.0,0.0,1.9516129,58.3619,30.226814516,0.03629032,0.001008065,0.5,372.1129,0.001008065,0.76814516,0.0,0.0,17.19456


In [18]:
FullBiophysMat

CAI,Cabatoan,Caridad Cemetery,Caridad Proper,Elementary School,Gabas,Haina,Hicgop South,Magbangon,Palanas,Poroc Rose,Poroc San Flower,San Agustin,Sitio Baybayon,Sitio Lonas,Sitio Tugas,Tamakin Dacot,Visca,Wangag
127034095,26,0,0,0,0,9,0,43,32,0,0,0,15,0,0,3,0,10
4423,1199,990,1723,219,0,48,922,871,96,69,122,330,1154,286,333,55,1,290
4018,687,2624,2767,129,8,12,273,736,66,48,121,446,862,613,322,1,0,77
3359,525,1028,1634,46,0,8,316,361,4,53,134,540,563,548,279,4,2,48
483,257,824,1808,1827,1,32,4880,239,1,180,280,745,175,712,2258,8,1,8
14,43,119,506,1747,19400,155,955,7,2,776,1181,6813,4,9410,1468,120,10368,0
19,10,22,31,17,3,4158,10,2,0,0,2,63,43,18,16,2323,56,4
2239,681,2285,4250,3638,50,434,5549,471,8,605,1052,1716,1040,1971,2138,245,73,57
2832,960,1826,2589,140,0,46,805,672,278,41,81,499,2244,211,233,23,13,725
3188,675,613,787,4,0,0,104,803,481,10,1,38,3136,1,54,0,0,481


In [None]:
Area

In [None]:
x <- list(Distances=Distances, Assignments=FullBiophysMat, Sampled_reefs=t(SiteIndex[site %in% SurveyData[, site], index]),
                  Reef_sizes=reef_sizes, Adult_sample_proportions=matrix(nrow=ncol(FullBiophysMat), ncol=1, 1)) #put inputs into a list because that's the bbmle format
Sim2012_4Fit <- suppressWarnings(mle2(LL_kt_bbmle, start=list(k=-3, theta=1), lower=c(-10, 0.15), upper=c(10, 8), method="L-BFGS-B", data=x, control=list(maxit=500)))


In [None]:
dim(Assignments2012)
dim(Adult_sample_proportions2012)
dim(Sampled_reefs2012)

In [None]:
x

In [None]:
dim(FullBiophysMat)

In [None]:
dcast(DestSampled[source == "Other", .(source, dest)][, parentage :=1][order(source, dest)], source ~ dest, value.var="parentage", fun.aggregate = sum)

In [None]:
#Mike Bode said I should be less aggressively subsampling the biophysical data, but how should I do that? The kernel fitting still requires that I have a prop samp vector... but when I tried putting 1's in that vector and using all of the particles for fitting I got crazy 

In [None]:
#sample the particle data
SimSample <- DestSampled[, .SD[sample(.N, dest_n_rec_annual)], by = c("year", "dest")] #, prob=surv_weight #randomly sample rows (particles) from the table according to the survival weighting, based on the number we sampled at each site in each year of surveys
check1 <- nrow(SimSample)

#assign parentage
SimParentage <- SimSample[source_prop_samp > 0][, .SD[sample(.N, num_parentage_matches)], by = .(year)][#Prob=SurvWeight, #now randomly assign parentage or not parentage, based on how well we sampled the source and the number of parentage matches we had in that year
                , parentage := 1]
#for faster searching, set keys
setkey(SimParentage, particle_id)
setkey(SimSample, particle_id)

l <- list(SimSample[particle_id %!in% SimParentage$particle_id][, parentage := 0], SimParentage)
SimSample <- rbindlist(l, use.names = TRUE, fill=TRUE, idcol = NULL)[, c("year", "source", "dest", "parentage", "monsoon")] #add back in to the unassigned particles, select only the columns necessary

#check results, for testing loop only
#nrow(SimSample)==check1 #should be TRUE
#sum(SimSample$parentage) #should be 37

In [None]:
sum(SimSample$parentage)

In [None]:

#calculate the unassigned row
Unassigned <- unique(SimSample[parentage==0][#not counting parentage!
    , num_sampled := .(.N), by= c("dest", "year", "monsoon")], by=c("dest", "year", "monsoon"))[, -"source"]
#add destinations not sampled in loop iteration to unassigned 
Unassigned <- Unassigned[AddDest, on=.(year=year, dest=site, monsoon)]
Unassigned$num_sampled[is.na(Unassigned$num_sampled)] <- 0
#sum(Unassigned$num_sampled, na.rm=T)==check1-37 #total should be the total sampled particles minus the total assigned
setorder(Unassigned, year, dest)

##adding in the possible sampled routes needs to happen AFTER calculating unassigned because unassigned is calculated from row counts
SimSample <- PropSampTable[SimSample, on=.(year, source, dest, monsoon)]
#check all is well- for testing loop only
#sum(SimSample$parentage) #should be 37

#add in the routes we could have assigned given our sampling so the parentage matrix is complete
UnqSimSample <- unique(SimSample, by=c("source", "dest", "year", "monsoon"))

AddRoutes <- UnqSurvey[!UnqSimSample, on = names(UnqSurvey)][ #what combos are not appearing because we didn't sample particles, but the route is possible based on our survey sampling
    , `:=`(parentage= 0, num_sampled = 0) ] #add the parentage column 

#add back into the sampled simulation data
l <- list(SimSample, AddRoutes[,-"num_sampled"])
SimSample <- rbindlist(l, use.names = TRUE, fill=TRUE, idcol = NULL)
setorder(SimSample, year, source, dest)

#make summary tables for each time frame, to be used for making parentage matrix
SimSampleByYear <- SimSample[,  .(total_parentage =sum(parentage)), by=c("year", "source", "dest")]
#sum(SimSampleByYear$total_parentage)
UnassignedByYear <- Unassigned[, .(total_sampled = sum(num_sampled)), by=c("year", "dest")]


SimSampleInterannual <- SimSample[,  .(total_parentage =sum(parentage)), by=c("source", "dest")]
#sum(SimSampleInterannual$total_parentage)
UnassignedInterannual <- Unassigned[, .(total_sampled = sum(num_sampled)), by=c("dest")]


SimSampleMonsoon <- SimSample[,  .(total_parentage =sum(parentage)), by=c("monsoon", "source", "dest")]
sum(SimSampleMonsoon$total_parentage)
UnassignedMonsoon <- Unassigned[, .(total_sampled = sum(num_sampled)), by=c("monsoon", "dest")]

#make a parentage matrix for each year
mat2012 <- dcast(SimSampleByYear[year==2012], source ~ dest, value.var="total_parentage", fun.aggregate = sum)[source %in% SurveyData[year==2012 & prop_anem_samp >0, site]][, -"source"] 
mat2012 <- as.matrix(rbind(mat2012, t(UnassignedByYear[year==2012][, total_sampled]), use.names=F))

mat2013 <- dcast(SimSampleByYear[year==2013], source ~ dest, value.var="total_parentage", fun.aggregate = sum)[source %in% SurveyData[year==2013 & prop_anem_samp >0, site]][, -"source"] 
mat2013 <- as.matrix(rbind(mat2013, t(UnassignedByYear[year==2013][, total_sampled]), use.names=F))

mat2014 <- dcast(SimSampleByYear[year==2014], source ~ dest, value.var="total_parentage", fun.aggregate = sum)[source %in% SurveyData[year==2014 & prop_anem_samp >0, site]][, -"source"] 
mat2014 <- as.matrix(rbind(mat2014, t(UnassignedByYear[year==2014][, total_sampled]), use.names=F))

mat2012_4 <- dcast(SimSampleInterannual, source ~ dest, value.var="total_parentage", fun.aggregate = sum)[source %in% SurveyData[year %in% c(2012, 2013, 2014) & prop_anem_samp >0, site]][, -"source"] 
mat2012_4 <- as.matrix(rbind(mat2012_4, t(UnassignedInterannual[, total_sampled]), use.names=F))

matNEM <- dcast(SimSampleMonsoon[monsoon=="NEM"], source ~ dest, value.var="total_parentage", fun.aggregate = sum)[source %in% SurveyData[year %in% c(2012, 2013, 2014) & prop_anem_samp >0, site]][, -"source"] 
matNEM <- as.matrix(rbind(matNEM, t(UnassignedMonsoon[monsoon=="NEM"][, total_sampled]), use.names=F))

matSWM <- dcast(SimSampleMonsoon[monsoon=="SWM"], source ~ dest, value.var="total_parentage", fun.aggregate = sum)[source %in% SurveyData[year %in% c(2012, 2013, 2014) & prop_anem_samp >0, site]][, -"source"] 
matSWM <- as.matrix(rbind(matSWM, t(UnassignedMonsoon[monsoon=="SWM"][, total_sampled]), use.names=F))



In [None]:
matSWM

In [None]:
#Biophysical source normalized matrix
#for each source, what is the normalized recruitment at each destination? 
GenSimConn[, annual_source_normalized_recruitment := sum(daily_particles_recruited)/sum(daily_particles_released), by=c("source", "destination","year_sampled")]


__Code function for likelihood of parentage data given a biophysical model, based on Bode et al. 2019 in Plos Bio__

In [None]:
#lay out all the pieces
pop_size_vec <- #vector of pop sizes for all reefs (a). This term is also used in parentage kernel fitting, but reef sizes are substituted as a proxy for pop size. This is should be bootstrapped to account for uncertainty.
BioPhysMat <- #source normalized biophysical connectivity matrix. In Eqn. S3.4, this is m ajt/r a (*should it be r at? As in all particles released in time period t?)
prop_samp_vec <- #vector of proportion of habitat sampled for all reefs in time period t
unassigned_vec#from genetic parentage data- a vector of the number of unassigned recruits at each destination reef in the system- we only have this for all sampled reefs.... what should the dimensions be?*


In [None]:
#format the data

In [None]:
reef

In [None]:
head(AnnualRecsSamp)

In [None]:
#Eqn. S3.4 term 1 for loop

for(j in 1:nrow(centroids)){ #for each destination reef in the whole Camotes system
    
dest_unassigned <- sum(unassigned_vec[t==time_period]) #the sum of all unassigned recruits at time period t
    
        for(i in 1:nrow(centroids)){ #term 1- for each source reef in the whole Camotes system, expected unassigned at each destination reef
            
            term1_num <- ((1-prop_samp_vec[i,t== time_period])^2)*pop_size_vec[i]*BioPhysMat[i,j,t]
            
            for(a in 1:nrow(centroids)){ #for all reefs total in the whole Camotes system, denominator of term 1
                
                value_a <-  pop_size_vec[a]*BioPhysMat[a, j, t]
                term1_denom <- sum(value_a)
            }
        
        term1 = term1_num/term1_denom
        }

    for(k in 1:nrow(centroids)){ #term 2- for each source reef in the whole Camotes system, expected assigned at each destination reef
        
        term2_num <- (1-(1-prop_samp_vec[i,t== time_period])^2)*pop_size_vec[i]*BioPhysMat[i,j,t]
            
            for(a in 1:nrow(centroids)){ #for all reefs total in the whole Camotes system, denominator of term 2
                
                value_a <-  pop_size_vec[a]*BioPhysMat[a, j, t]
                term2_denom <- sum(value_a)
            }
        
        term2 = term2_num/term2_denom
        
        }
        
    

prob_unsampled <- dest_unassigned*log(term1)  
prob_sampled <- dest_assigned*log(term2)
ll = prob_unsampled + prob_sampled
}

In [None]:

for(i in 1:NumSampledReefs){
   This_SS_A = Adult_sample_proportions[i]#same
   for(j in 1:NumSampledReefs){
    SettlersFromAssignedReefs = Settlers[Sampled_reefs[i],Sampled_reefs[j]]#same
    #Not all settlers from assigned reefs will be assigned, because not all adults were sampled
    AssignedSettlers[i,j] = SettlersFromAssignedReefs*(This_SS_A^2 + 2*This_SS_A*(1 - This_SS_A))
    AssignedSettlers[NumSampledReefs+1,j] = AssignedSettlers[NumSampledReefs+1,j] + SettlersFromAssignedReefs*(1-This_SS_A)^2 #The three dots '...' tell matlab that the code on a given line continues on the next line.
   }
}
Unsampled = as.matrix(setdiff(1:NumReefs,Sampled_reefs))


for(j in 1:NumSampledReefs){
   AssignedSettlers[NumSampledReefs+1,j] = AssignedSettlers[NumSampledReefs+1,j] + sum(Settlers[Unsampled,Sampled_reefs[,j]]) 
}
   



__Loop through sampling different proportions of other and CAI source particles and compare the unassigned proportions of total sample particles to the genetic observations from survey data__

In [None]:
PropAssignedTable <- rbind(kernels[Year %in% c("2012", "2013", "2014")][
    , PropAssigned := PercentAssigned/100][ #change to proportion note percent
    , c("Year", "NumParentageMatches", "NumOffsSampled", "PropAssigned")],                      
    unique(kernels[Year %in% c("2012", "2013", "2014")][ #only the years coinciding with the models
    , `:=` (NumParentageMatches=sum(NumParentageMatches), NumOffsSampled=sum(NumOffsSampled), PropAssigned = NumParentageMatches/NumOffsSampled, Year = "2012-4")][ #summarise across the 3 years
    , c("Year","NumParentageMatches", "NumOffsSampled", "PropAssigned")], by="Year"))
    

PropAssignedTable[]

#add in the average sampled proportion of anemones
AvgPropSamp <- SurveyData[PropAnemSamp >0, .(PropAnemSamp = mean(PropAnemSamp)), by="year"][ #average for each site we sampled, how well we sampled
    year %in% c("2012", "2013", "2014")][
    , year :=as.character(year)]

ExpectedPropAssigned <- AvgPropSamp[PropAssignedTable, on=.(year=Year)]
ExpectedPropAssigned$PropAnemSamp[is.na(ExpectedPropAssigned$PropAnemSamp)] <- mean(ExpectedPropAssigned$PropAnemSamp, na.rm = T) #replace the 2012-4 NA with the average from the 3 years

#what's the normalized self recruitment proportion back to the population
ExpectedPropAssigned[, ExpAssigned := NumParentageMatches/(NumOffsSampled*PropAnemSamp)][] #this is the expected assignment for the whole surveyed population if we had sampled all adults (which we kind of do when we use the simulation results)
#at some point... maybe it would be better to compare on a site to site level? idk that's pretty fine scale, I don't know that our ROMS model can be expected to compare so well with that

__For the expected values of recruits from outside of our sampled region, I'm using the intermediate "PredictedProportions" matrix from the Bode kernel fitting script (https://github.com/MikeBode/Parentage_kernel_fitting/blob/master/Kernel_Fitting_Function.m) because it accounts for how well sites were sampled when estimating proportions of recruits from unsampled sites__

In [None]:
#####outside of the loop*****

PropSampTable <- SurveyData[PropAnemSamp >0, c("year", "site")]

#make sure all sampled sites are represented by joining the survey data to the sampled simulation
PropSampTable <- rbind(SurveyData[PropAnemSamp >0 & year %in% c(2012, 2013, 2014), c("year", "site")][, .(Source=site, Dest=site, Year=year)][ #will join to the simulated sampling table by source and dest, so make those each a column from site and preserve the year variable as a key
    , c("Year", "Source", "Dest")][, Monsoon := "NEM"], SurveyData[PropAnemSamp >0 & year %in% c(2012, 2013, 2014), c("year", "site")][, .(Source=site, Dest=site, Year=year)][ #will join to the simulated sampling table by source and dest, so make those each a column from site and preserve the year variable as a key
    , c("Year", "Source", "Dest")][, Monsoon := "SWM"])
#unq_survey <- unique(PropSampTable, by=c("Source", "Dest", "Year", "Monsoon"))#, unique(PropSampTable, by=c("Source", "Dest", "Year"))[, Monsoon := "SWM"]) #add in the diff Monsoon seasons so there are complete parentage matrices later
#add_dest <- rbind(SurveyData[year %in% c(2012, 2013, 2014) & PropAnemSamp >0][, c("year", "site")][, Monsoon := "NEM"], SurveyData[year %in% c(2012, 2013, 2014) & PropAnemSamp >0][, c("year", "site")][, Monsoon := "SWM"])  #what destinations were sampled, for use with unassigned table

###outside of the loop
PropToEval <- seq(0.1, 1, 0.1) #make a vector of proportions to sample iterativaley and compare
#empty table to hold results
PropSampOtherCAI <- data.table(TimeScale=character(), TimeID=character(), PropUnassigned=numeric(), ExpUnassigned=numeric(),  PropSampEval=numeric(), Check1=character(), Check2=character(), NrowSimConn=numeric())



In [None]:
pb <- txtProgressBar(min = 0, max =length(PropToEval), style = 3)

StartTime <- Sys.time()

for(i in 1:length(PropToEval)){

PropSampOtherCAI_int <- data.table(TimeScale=character(), TimeID=character(), PropUnassigned=numeric(), ExpUnassigned=numeric(),  PropSampEval=numeric(), Check1=character(), Check2=character(), NrowSimConn=numeric())[1:4]

dest_sampled <- date_join[DestPropSamp >0]
check1 <- nrow(dest_sampled)
dest_sampled <- dest_sampled[c(dest_sampled[, .I[Source != "Other"]], sample(dest_sampled[, .I[Source == "Other"]], length(dest_sampled[, .I[Source == "Other"]])*PropToEval[i]))]
check2 <- nrow(dest_sampled)
dest_sampled <- dest_sampled[c(dest_sampled[, .I[Source != "CAI"]], sample(dest_sampled[, .I[Source == "CAI"]], length(dest_sampled[, .I[Source == "CAI"]])*PropToEval[i]))]
check3 <- nrow(dest_sampled)

#check that we have less rows, should both be TRUE
test1 <- check1 > check2
test2 <- check2 > check3
#check1 > check2
#check2 > check3


#join in the number of parentage matches observed by year
dest_sampled <- kernels[Year %in% c("2012", "2013", "2014")][, Year:=as.integer(Year)][,c("Year", "NumParentageMatches")][dest_sampled, on=.(Year=YearSampled)]#[

#randomly subsample the sampled particle data
sim_sample <- dest_sampled[, .SD[sample(.N, DestNOffsAnnual, prob=SurvWeight)], by = c("Year", "Dest")] #randomly sample rows (particles) from the table according to the survival weighting, based on the number we sampled at each site in each year of surveys

PropUnassignedByYear <- (sim_sample[Source == "CAI"| Source == "Other", .(.N), by="Year"][, N]/#total particales sampled from other/CAI sources
sim_sample[, .(.N), by="Year"][, N]) #total particles sampled

PropSampOtherCAI_int$TimeScale[1] <- "annual"
PropSampOtherCAI_int$TimeID[1] <- "2012"
PropSampOtherCAI_int$PropUnassigned[1] <- PropUnassignedByYear[1]
PropSampOtherCAI_int$ExpUnassigned[1] <- 1-(ExpectedPropAssigned[1, ExpAssigned])
PropSampOtherCAI_int$PropSampEval[1] <- PropToEval[i]
PropSampOtherCAI_int$Check1[1] <- test1
PropSampOtherCAI_int$Check2[1] <- test2
PropSampOtherCAI_int$NrowSimConn[1] <- nrow(dest_sampled)
    
PropSampOtherCAI_int$TimeScale[2] <- "annual"
PropSampOtherCAI_int$TimeID[2] <- "2013"
PropSampOtherCAI_int$PropUnassigned[2] <- PropUnassignedByYear[2]
PropSampOtherCAI_int$ExpUnassigned[2] <- 1-(ExpectedPropAssigned[2, ExpAssigned])
PropSampOtherCAI_int$PropSampEval[2] <- PropToEval[i]
PropSampOtherCAI_int$Check1[2] <- test1
PropSampOtherCAI_int$Check2[2] <- test2
PropSampOtherCAI_int$NrowSimConn[2] <- nrow(dest_sampled)
    
PropSampOtherCAI_int$TimeScale[3] <- "annual"
PropSampOtherCAI_int$TimeID[3] <- "2014"
PropSampOtherCAI_int$PropUnassigned[3] <- PropUnassignedByYear[3]
PropSampOtherCAI_int$ExpUnassigned[3] <- 1-(ExpectedPropAssigned[3, ExpAssigned])
PropSampOtherCAI_int$PropSampEval[3] <- PropToEval[i]
PropSampOtherCAI_int$Check1[3] <- test1
PropSampOtherCAI_int$Check2[3] <- test2
PropSampOtherCAI_int$NrowSimConn[3] <- nrow(dest_sampled)
    
PropSampOtherCAI_int$TimeScale[4] <- "interannual"
PropSampOtherCAI_int$TimeID[4] <- "2012_4"
PropSampOtherCAI_int$PropUnassigned[4] <- nrow(sim_sample[Source == "CAI"| Source == "Other"])/nrow(sim_sample)
PropSampOtherCAI_int$ExpUnassigned[4] <- 1-(ExpectedPropAssigned[4, ExpAssigned])
PropSampOtherCAI_int$PropSampEval[4] <- PropToEval[i]
PropSampOtherCAI_int$Check1[4] <- test1
PropSampOtherCAI_int$Check2[4] <- test2
PropSampOtherCAI_int$NrowSimConn[4] <- nrow(dest_sampled)

l <- list(PropSampOtherCAI, PropSampOtherCAI_int)
PropSampOtherCAI <- rbindlist(l, use.names = TRUE, fill=TRUE, idcol = FALSE)
setTxtProgressBar(pb, i)
    
}

close(pb)
EndTime <- Sys.time()
EndTime-StartTime

fwrite(PropSampOtherCAI, file="~/oceanography/script_output/SimulationSummaryTables/PropSampOtherCAIEvaluation.csv")


In [None]:
PropSampOtherCAI[order(-PropUnassigned)]

__Seems like good justification to not subsample the ROMS particles from other/Camotes Islands__

In [None]:
nrow(dest_sampled)
nrow(SimConn)

In [None]:
#save inter file
#fwrite(dest_sampled, file="~/oceanography/script_output/LongFormConnWithProbsTest.csv")
#see if I can write as a compressed file so it can be stored on github
#https://stackoverflow.com/questions/42788401/is-possible-to-use-fwrite-from-data-table-with-gzfile