# Correcting the map counts to account for diving seals and higher on-the-ground counts
This file includes the code that inflates the map counts using detection rate-estimating functions fitted with data from Erebus Bay, where ground counts could be conducted.  
  
In a nutshell, we use these models to estimate the detection rate, and then inflate the map counts according to this rate.


In [58]:
## Clear memory
rm(list=ls())
gc()

Unnamed: 0,used,(Mb),gc trigger,(Mb).1,max used,(Mb).2
Ncells,680901,36.4,1442291,77.1,1442291,77.1
Vcells,1165538,8.9,10563568,80.6,13204460,100.8


### Loading libraries, functions and data

In [60]:
libs<-c("ggplot2","plyr","dplyr")
lapply(libs, require, character.only = TRUE)
pathToLocalGit<-"/home/ubuntu/Workspace/ContinentalWESEestimates/"

source(paste0(pathToLocalGit,"scripts/countSealsFromTags_functions.R"))

## Load the data - the data were generated from notebook: "Count Seals From Tags.ipynb"
load(file=paste0(pathToLocalGit,"data/countsCorrectedByHour.Rdata"))
head(corrcounts)

## Print and check that this is the correct version of the data
estByRegionCorr<-as.data.frame(corrcounts %>% group_by(region) %>% dplyr::summarize(
    corrLclNumSeals=sum(corrLclNumSeals),corrNumSeals=sum(predicted),corrUclNumSeals=sum(corrUclNumSeals))
)
estByRegionCorr<-rbind(estByRegionCorr,
    data.frame(region="Total",
        corrLclNumSeals=sum(estByRegionCorr$corrLclNumSeals),corrNumSeals=sum(estByRegionCorr$corrNumSeals),
               corrUclNumSeals=sum(estByRegionCorr$corrUclNumSeals)))
print(estByRegionCorr)

Unnamed: 0_level_0,regionMapId,lclNumSeals,estNumSeals,uclNumSeals,mapcoords.x1,mapcoords.x2,acquisition_date,region,satId,numViews,⋯,year,totalTags,scaledTotalTags,avgTags,logAvgTags,sinH,predicted,hourCorr,corrLclNumSeals,corrUclNumSeals
Unnamed: 0_level_1,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dttm>,<chr>,<chr>,<dbl>,⋯,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,AMU101663,1,1,4,-85.48336,-73.34752,2010-11-20 21:50:18,AMU,WV01,8,⋯,2010,1,-0.33940735,1,0.0,1,0,-1,0,3
2,AMU102072,1,1,4,-85.44474,-73.41747,2010-11-20 21:50:18,AMU,WV01,6,⋯,2010,2,-0.29654759,1,0.0,1,0,-1,0,3
3,AMU102099,1,1,4,-85.79235,-73.17324,2010-11-20 21:50:18,AMU,WV01,6,⋯,2010,2,-0.29654759,2,0.6931472,1,2,1,2,5
4,AMU103615,1,1,4,-85.79235,-73.17696,2010-11-20 21:50:18,AMU,WV01,8,⋯,2010,1,-0.33940735,1,0.0,1,0,-1,0,3
5,AMU103695,1,1,4,-85.79235,-73.16951,2010-11-20 21:50:18,AMU,WV01,9,⋯,2010,1,-0.33940735,1,0.0,1,0,-1,0,3
6,AMU103791,1,6,14,-85.81166,-73.17324,2010-11-20 21:50:18,AMU,WV01,15,⋯,2010,8,-0.03938903,8,2.0794415,1,8,2,3,16


  region corrLclNumSeals corrNumSeals corrUclNumSeals
1    AMU            1289         2465            6499
2    EA1            2720         6312           16471
3    EA2            2864         5753           14624
4    QMA            3682         7991           20186
5    RSS            8207        22210           59559
6    WAP            3552         8554           22077
7  Total           22314        53285          139416


Now we use the handy function to request the detection rates under both models. Note that we can specify the "weight" of island counts. This is the proportion of map locations that resemble the islet haul-out locations in Erebus Bay (i.e., Turk's Head-Tryggve and Hutton Cliffs). According to Michelle and David, the vast majority of counts came from such locations. Here we are conservative and assume 95% of locations are islets.

In [61]:
corrcounts$scaledNumTags<-0

adjRates<-predictDetRates(pathToGit=pathToLocalGit,dat=corrcounts,keyFieldName="regionMapId",islandWeight=0.95)
head(adjRates)

Unnamed: 0_level_0,regionMapId,wgtPredColRate,wgtPredIslRate,Year
Unnamed: 0_level_1,<chr>,<dbl>,<dbl>,<chr>
1,AMU101663,0.3267297,0.2617848,2010
2,AMU102072,0.3267297,0.2617848,2010
3,AMU102099,0.3267297,0.2617848,2010
4,AMU103615,0.3267297,0.2617848,2010
5,AMU103695,0.3267297,0.2617848,2010
6,AMU103791,0.3267297,0.2617848,2010


Finally, we add the detection rates to our data.frame of map counts, inflate, and summarize the results.  
  
Since detectionRate = count-in-map/count-on-the-ground, and we want count-on-the-ground, then:
Count-on-the-ground = count-in-map/detectionRate

In [62]:
countdf<-merge(corrcounts,adjRates,by="regionMapId")
nrow(countdf)==2*nrow(corrcounts)

countdf$mdlColEstimate<-round(countdf$predicted/countdf$wgtPredColRate)
countdf$mdlColUpper<-round(countdf$corrUclNumSeals/countdf$wgtPredColRate)
countdf$mdlColLower<-round(countdf$corrLclNumSeals/countdf$wgtPredColRate)

countdf$mdlIslEstimate<-round(countdf$predicted/countdf$wgtPredIslRate)
countdf$mdlIslUpper<-round(countdf$corrUclNumSeals/countdf$wgtPredIslRate)
countdf$mdlIslLower<-round(countdf$corrLclNumSeals/countdf$wgtPredIslRate)
head(countdf)

Unnamed: 0_level_0,regionMapId,lclNumSeals,estNumSeals,uclNumSeals,mapcoords.x1,mapcoords.x2,acquisition_date,region,satId,numViews,⋯,scaledNumTags,wgtPredColRate,wgtPredIslRate,Year,mdlColEstimate,mdlColUpper,mdlColLower,mdlIslEstimate,mdlIslUpper,mdlIslLower
Unnamed: 0_level_1,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dttm>,<chr>,<chr>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,AMU101663,1,1,4,-85.48336,-73.34752,2010-11-20 21:50:18,AMU,WV01,8,⋯,0,0.3267297,0.2617848,2010,0,9,0,0,11,0
2,AMU101663,1,1,4,-85.48336,-73.34752,2010-11-20 21:50:18,AMU,WV01,8,⋯,0,0.2659764,0.2174677,2011,0,11,0,0,14,0
3,AMU102072,1,1,4,-85.44474,-73.41747,2010-11-20 21:50:18,AMU,WV01,6,⋯,0,0.2659764,0.2174677,2011,0,11,0,0,14,0
4,AMU102072,1,1,4,-85.44474,-73.41747,2010-11-20 21:50:18,AMU,WV01,6,⋯,0,0.3267297,0.2617848,2010,0,9,0,0,11,0
5,AMU102099,1,1,4,-85.79235,-73.17324,2010-11-20 21:50:18,AMU,WV01,6,⋯,0,0.3267297,0.2617848,2010,6,15,6,8,19,8
6,AMU102099,1,1,4,-85.79235,-73.17324,2010-11-20 21:50:18,AMU,WV01,6,⋯,0,0.2659764,0.2174677,2011,8,19,8,9,23,9


In [63]:
## Using the colony model for 2010:
print("Colony model with reference year 2010")
countdf10<-subset(countdf,Year=="2010")
estByRegionCol<-as.data.frame(countdf10[,c("region","mdlColLower","mdlColEstimate","mdlColUpper")] %>% group_by(region) %>% dplyr::summarize(Lower=round(sum(mdlColLower)),Estimate=round(sum(mdlColEstimate)),Upper=round(sum(mdlColUpper))))
estByRegionCol<-rbind(estByRegionCol,data.frame(region="Total",Lower=round(sum(countdf10$mdlColLower)),Estimate=round(sum(countdf10$mdlColEstimate)),Upper=round(sum(countdf10$mdlColUpper))))
print(estByRegionCol)
cat("\n")
## Using the colony model for 2011:
print("Colony model with reference year 2011")
countdf11<-subset(countdf,Year=="2011")
estByRegionCol<-as.data.frame(countdf11[,c("region","mdlColLower","mdlColEstimate","mdlColUpper")] %>% group_by(region) %>% dplyr::summarize(Lower=round(sum(mdlColLower)),Estimate=round(sum(mdlColEstimate)),Upper=round(sum(mdlColUpper))))
estByRegionCol<-rbind(estByRegionCol,data.frame(region="Total",Lower=round(sum(countdf11$mdlColLower)),Estimate=round(sum(countdf11$mdlColEstimate)),Upper=round(sum(countdf11$mdlColUpper))))
print(estByRegionCol)

[1] "Colony model with reference year 2010"
  region Lower Estimate  Upper
1    AMU  3867     7439  19739
2    EA1  8154    19093  50116
3    EA2  8587    17338  44503
4    QMA 11041    24148  61444
5    RSS 24571    67253 181339
6    WAP 10636    25852  67227
7  Total 66856   161123 424368

[1] "Colony model with reference year 2011"
  region Lower Estimate  Upper
1    AMU  4929     9380  24386
2    EA1 10411    23972  61835
3    EA2 10981    21874  54916
4    QMA 14046    30341  75832
5    RSS 31432    84220 223558
6    WAP 13593    32458  82898
7  Total 85392   202245 523425


In [64]:
## Using the islet/mainland model for 2010:
print("Islet/mainland model with reference year 2010")
countdf10<-subset(countdf,Year=="2010")
estByRegionIsl<-as.data.frame(countdf10[,c("region","mdlIslLower","mdlIslEstimate","mdlIslUpper")] %>% group_by(region) %>% dplyr::summarize(Lower=round(sum(mdlIslLower)),Estimate=round(sum(mdlIslEstimate)),Upper=round(sum(mdlIslUpper))))
estByRegionIsl<-rbind(estByRegionIsl,data.frame(region="Total",Lower=round(sum(countdf10$mdlIslLower)),Estimate=round(sum(countdf10$mdlIslEstimate)),Upper=round(sum(countdf10$mdlIslUpper))))
print(estByRegionIsl)
cat("\n")
## Using the islet/mainland model for 2011:
print("Islet/mainland model with reference year 2011")
countdf11<-subset(countdf,Year=="2011")
estByRegionIsl<-as.data.frame(countdf11[,c("region","mdlIslLower","mdlIslEstimate","mdlIslUpper")] %>% group_by(region) %>% dplyr::summarize(Lower=round(sum(mdlIslLower)),Estimate=round(sum(mdlIslEstimate)),Upper=round(sum(mdlIslUpper))))
estByRegionIsl<-rbind(estByRegionIsl,data.frame(region="Total",Lower=round(sum(countdf11$mdlIslLower)),Estimate=round(sum(countdf11$mdlIslEstimate)),Upper=round(sum(countdf11$mdlIslUpper))))
print(estByRegionIsl)

[1] "Islet/mainland model with reference year 2010"
  region Lower Estimate  Upper
1    AMU  4928     9456  24630
2    EA1 10403    24203  62511
3    EA2 10978    22071  55455
4    QMA 14040    30638  76648
5    RSS 31373    85133 226061
6    WAP 13577    32775  83822
7  Total 85299   204276 529127

[1] "Islet/mainland model with reference year 2011"
  region  Lower Estimate  Upper
1    AMU   5927    11290  29978
2    EA1  12507    28921  75909
3    EA2  13164    26378  67373
4    QMA  16948    36641  93004
5    RSS  37756   101856 274408
6    WAP  16346    39247 101692
7  Total 102648   244333 642364


In [67]:
head(countdf)

Unnamed: 0_level_0,regionMapId,lclNumSeals,estNumSeals,uclNumSeals,mapcoords.x1,mapcoords.x2,acquisition_date,region,satId,numViews,⋯,scaledNumTags,wgtPredColRate,wgtPredIslRate,Year,mdlColEstimate,mdlColUpper,mdlColLower,mdlIslEstimate,mdlIslUpper,mdlIslLower
Unnamed: 0_level_1,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dttm>,<chr>,<chr>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,AMU101663,1,1,4,-85.48336,-73.34752,2010-11-20 21:50:18,AMU,WV01,8,⋯,0,0.3267297,0.2617848,2010,0,9,0,0,11,0
2,AMU101663,1,1,4,-85.48336,-73.34752,2010-11-20 21:50:18,AMU,WV01,8,⋯,0,0.2659764,0.2174677,2011,0,11,0,0,14,0
3,AMU102072,1,1,4,-85.44474,-73.41747,2010-11-20 21:50:18,AMU,WV01,6,⋯,0,0.2659764,0.2174677,2011,0,11,0,0,14,0
4,AMU102072,1,1,4,-85.44474,-73.41747,2010-11-20 21:50:18,AMU,WV01,6,⋯,0,0.3267297,0.2617848,2010,0,9,0,0,11,0
5,AMU102099,1,1,4,-85.79235,-73.17324,2010-11-20 21:50:18,AMU,WV01,6,⋯,0,0.3267297,0.2617848,2010,6,15,6,8,19,8
6,AMU102099,1,1,4,-85.79235,-73.17324,2010-11-20 21:50:18,AMU,WV01,6,⋯,0,0.2659764,0.2174677,2011,8,19,8,9,23,9


In [68]:
save(countdf, file=paste0(pathToLocalGit,"data/FinalWESEcounts.RData"))