In [1]:
Packages <- c("dplyr","lattice", "ggplot2", "bbmle", "ncdf4","dplyr","tidyr", "stringr", "tidyverse", "raster", "lubridate", "RColorBrewer")

invisible(suppressPackageStartupMessages(lapply(Packages, library, character.only = TRUE)))

setwd('/local/home/katrinac/oceanography')
"%!in%" <- function(x,table) match(x,table, nomatch = 0) == 0

In [2]:
#read in data and add site names
conn_mat_all <- brick("~/oceanography/ROMS/data/Camotes_Sea_Connectivity_Matrices_08_day_PLD_integer.nc", values=T) 
#read in the data for the number of grid cells per site (site ids are North- South, as in, Palanas is number 1)
sites_n <- read.table("~/oceanography/ROMS/input//camotes_vertices_sites_results_water_only.txt", sep=" ", header=T, stringsAsFactors = F)
#the order of sites in the connectivity matrix
sites_order <- read.table("~/oceanography/ROMS/input/sites_order_as_in_roms.txt", sep=" ", header=T, stringsAsFactors = F)

sites <- as.character(c("Palanas", "Wangag", "Magbangon", "Cabatoan", "Caridad Cemetery", "Caridad Proper", "Hicgop", "Hicgop South", "Sitio Tugas", "Elementary School", "Sitio Lonas", "San Agustin", "Poroc San Flower", "Poroc Rose", "Pangasugan", "Visca", "Gabas", "Tamakin Dacot", "Haina", "Sitio Baybayon", "SF1", "SF2", "SF3", "SF4", "SF5", "SF6", "CAI", "Other"))
site_ind <- c(seq(1,28, 1))
site_id <- as.data.frame(cbind(sites, site_ind), stringsAsFactors = F)
site_id$site_ind <- as.numeric(site_id$site_ind)




In [3]:
#add in dates (seasons here are field seasons, not monsoon seasons)
col <- "date"
col2 <- "index"

season1 <- as.data.frame(seq(as.Date("2010/10/01"), as.Date("2011/05/31"), by="days"))
names(season1) <- col

season2 <- as.data.frame(seq(as.Date("2011/10/01"), as.Date("2012/05/31"), by="days"))
names(season2) <- col

season3 <- as.data.frame(seq(as.Date("2012/10/01"), as.Date("2013/05/31"), by="days"))
names(season3) <- col

season4 <- as.data.frame(seq(as.Date("2013/10/01"), as.Date("2014/04/18"), by="days"))
names(season4) <- col

all_dates <- bind_rows(season1, season2, season3, season4)
time_step <- as.data.frame(seq(1,930,1))
names(time_step)<- col2
all_dates2 <- bind_cols(time_step, all_dates)
dim(all_dates2)


In [4]:
#make each daily connectivity matrix into a df and then pull them all together into a list
#conn_mat_list <- vector("list", nrow(all_dates2))

#date_df <- do.call("rbind", replicate(784, date, simplify = FALSE))

conn_mat_full <- as.data.frame(matrix(nrow=0, ncol=4))

cols_conn_mat <- c("date","sink", "source", "number")
cols_tmp <- c("sink", "source", "number")

names(conn_mat_full) <- cols_conn_mat

for(i in 1:nrow(all_dates2)){
    
    index_eval <- all_dates2$index[i]
    
    tmp <- as.data.frame(conn_mat_all[[i]], xy=T)   #pull out a data frame for each date
    names(tmp) <- cols_tmp
    
    date <- filter(all_dates2, index==index_eval)[2] #grab the date
    date_df <- do.call("rbind", replicate(784, date, simplify = FALSE)) #make a df of the date
    tmp2 <- bind_cols(date_df, tmp)

 
    conn_mat_full <- rbind(conn_mat_full, tmp2) 
}



In [5]:
#connect site names
conn_mat_full2 <- left_join(conn_mat_full, site_id, by=c(source="site_ind")) %>%
    dplyr::select(-source) %>%
    rename(source="sites")
    
conn_mat_full3 <- left_join(conn_mat_full2, site_id, by=c(sink="site_ind")) %>%
    dplyr::select(-sink) %>%
    rename(destination="sites")

    

In [6]:
#make all versions of Hicgop "Hicgop South" to match empirical data
conn_mat_full3 <- conn_mat_full3 %>%
    mutate(source=ifelse(source=="Hicgop", "Hicgop South", source)) %>%
    mutate(destination=ifelse(destination=="Hicgop", "Hicgop South", destination))
    

#trim out sand flats and other sites not represented in our sampling
conn_mat_full4 <- conn_mat_full3 %>%
    filter(source != "SF1" & source != "SF2" & source != "SF3"
           & source != "SF4" & source != "SF5" & source != "SF6" & source != "Pangasugan"  & source != "Other"& source != "CAI") %>%
    filter(destination != "SF1" & destination!= "SF2" & destination != "SF3"
           & destination != "SF4" & destination!= "SF5" & destination != "SF6"  & 
           destination!= "Pangasugan" & destination != "Other" & destination != "CAI") %>%
    mutate(month=month(ymd(date)))

#note to self, I think I messed up sources/destinations in the orginal for loop!!!! so I'm grouping by source below, but that's because the true destination column is misnamed "source" in the code and I haven't figured it out yet
#!!!!

#for unassigned, you want to keep these sites
ConnMatTotalRec <- conn_mat_full3 %>%     
    filter(date %in% season2$date) %>%
    group_by(destination) %>%
    #dplyr::select(-date) %>%
    mutate(SumParticlesRec= sum(number)) %>%
    distinct(destination, .keep_all = T) %>%
    dplyr::select(destination, SumParticlesRec)

nrow(ConnMatTotalRec) #should be 28-1, the total number of sites simulated after accounting for the Hicgop Merge


In [7]:
#put in North-South order for kernel fitting
SitesNS <- read.csv("~/parentage/text_file/sites_NS.txt", stringsAsFactors = F) %>%
    mutate(site=ifelse(site =="S. Magbangon" , "Magbangon", site)) %>%
    filter(site != "N. Magbangon") %>%
    dplyr::select( -index)

#SitesNS$index <- seq(1,18, 1) #might not need this....
#SitesNS$destination <- SitesNS$source


In [None]:
which(SitesNS$site %!in% SimDisp2012$source)
SitesNS[9,]

In [8]:
#add together all of the particles for each season. At some point need to consider matching times that we sampled, from the fish meta-data

SimDisp2012Int <- conn_mat_full4 %>%
    filter(date %in% season2$date) %>%
    dplyr::select(-date, -month) %>%
    group_by(source, destination) %>%
    mutate(SumParticles= sum(number)) %>%
    distinct(source, destination, .keep_all = T) %>%
    dplyr::select(-number)

SimDisp2012Int2 <- left_join(SitesNS, SimDisp2012Int, by=c(site="source")) %>%
    rename(source="site")
SimDisp2012 <- left_join(SitesNS, SimDisp2012Int2, by=c(site="destination")) %>%
    rename(destination="site")



SimDisp2012RecSampInt <- ConnMatTotalRec %>%#the total recruits in the simulation for each destination site we empirically sampled
    filter(destination %in% SimDisp2012$destination) #%>%
    #mutate(source="unsampled") #not really, just unsampled empirically. Use this to get the unassigned row?

SimDisp2012RecSamp <- left_join(SimDisp2012, SimDisp2012RecSampInt, by="destination") %>%
    group_by(destination) %>%
    mutate(TotalParticlesAssigned=sum(SumParticles)) %>%
    distinct(destination, .keep_all = T) %>%
    mutate(TotalUnassigned=SumParticlesRec-TotalParticlesAssigned) %>%
    dplyr::select(-source, -SumParticles) 


In [9]:
head(SimDisp2012)
head(SimDisp2012RecSamp)

destination,source,SumParticles
Palanas,Palanas,42
Palanas,Wangag,221
Palanas,Magbangon,177
Palanas,Cabatoan,66
Palanas,Caridad Cemetery,57
Palanas,Caridad Proper,35


destination,SumParticlesRec,TotalParticlesAssigned,TotalUnassigned
Palanas,9473,1435,8038
Wangag,11311,2534,8777
Magbangon,15881,4284,11597
Cabatoan,17178,4664,12514
Caridad Cemetery,26509,9076,17433
Caridad Proper,33072,13088,19984


In [11]:
#check that everything looks correct

18*18 #this should be the rows
nrow(SimDisp2012)
nrow(SimDisp2012RecSamp)#should be 18 after the Hicgop merge

In [None]:
#sources are rows, columns are destinations, checked this? check again
#also check if it matters that these are in alphabetical order
SimDisp2012Mat <- SimDisp2012 %>%
    group_by(destination, source) %>%
    spread(destination, SumParticles)
#
rownames(SimDisp2012Mat) <- SimDisp2012Mat$source
SimDisp2012Mat$source <- NULL

In [32]:
#turn into parentage matrix

#SimDisp2012Mat <- SimDisp2012 %>%
#    ungroup() %>%
#    spread(source, destination, SumParticles)
    

#make the rest of the kernel input files
Adult_sample_proportions <- matrix(nrow=18, ncol=1, 1) #we sampled all sites completely because we are working with simulated data
Sampled_reefs <- as.matrix(read.csv("~/parentage/kernel_fitting/1340_loci/input/site_index_all.csv", header=FALSE))[,18] #sampled all reefs in the distance matrix/area/centroid files
Distances <- as.matrix(read.csv("~/parentage/kernel_fitting/1340_loci/input/distance_matrix_unsurveyed.csv", header=FALSE))[-2, -2]  #drop a magbangon row  
Reef_sizes <- as.matrix(read.csv("~/parentage/kernel_fitting/894_loci/area_unsurveyed.c.csv", header=FALSE))[]
Centroids <- as.matrix(read.csv("~/parentage/kernel_fitting/894_loci/centroids_unsurveyed.csv", header=T))



In [33]:
nrow(Reef_sizes)
dim(SimDisp2012Mat)

In [None]:
#monsoon season prep
NEM_months <- c(11, 12, 1, 2, 3, 4)
SWM_months <- c(5, 6, 7, 8, 9, 10)

NEM <- conn_mat_full4 %>%
    filter(month %in% NEM_months) %>%
    group_by(source, destination) %>%
    summarise(conn=max(fraction))

SWM <- conn_mat_full4 %>%
    filter(month %in% SWM_months) %>%
    group_by(source, destination) %>%
    summarise(conn=max(fraction))