Skip to content

Commit

Permalink
found and fixed major bug in appendSmoothData, and fixed downstream c…
Browse files Browse the repository at this point in the history
…onsequences. appendSmoothData had a variable scope issue, and was pulling "db" from the global environment!
  • Loading branch information
famulare committed Apr 13, 2019
1 parent 129c7a2 commit fdeeb1d
Show file tree
Hide file tree
Showing 7 changed files with 32 additions and 21 deletions.
2 changes: 1 addition & 1 deletion incidenceMapR/R/appendCatchmentModel.R
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ appendCatchmentModel <- function(db,shp = NULL){
saveModel(catchmentModel)

# append catchment as intercept covariate
db$observedData <- db$observedData %>% right_join(catchmentModel$modeledData %>% select(samplingLocation, geo, fitted.values.0.5quant))
db$observedData <- db$observedData %>% left_join(catchmentModel$modeledData %>% select(samplingLocation, geo, fitted.values.0.5quant))
names(db$observedData)[names(db$observedData) %in% 'fitted.values.0.5quant'] <- 'catchment'
db$observedData$catchment <- log(db$observedData$catchment)
db$observedData$catchment <- (db$observedData$catchment - mean(db$observedData$catchment))/sd(db$observedData$catchment)
Expand Down
29 changes: 18 additions & 11 deletions incidenceMapR/R/latentFieldModel.R
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ latentFieldModel <- function(db = dbViewR::selectFromDB(), shp = dbViewR::master
# construct priors
hyper=list()
hyper$global <- list(prec = list( prior = "pc.prec", param = 1/10, alpha = 0.01))
hyper$local <- list(prec = list( prior = "pc.prec", param = 1/100, alpha = 0.01))
hyper$local <- list(prec = list( prior = "pc.prec", param = 1/1000, alpha = 0.01))
hyper$age <- list(prec = list( prior = "pc.prec", param = 1/100, alpha = 0.01))

# unlike smoothing model, we only replicate latent fields across pathogens, but treat all other factors as fixed effects
Expand All @@ -50,6 +50,7 @@ latentFieldModel <- function(db = dbViewR::selectFromDB(), shp = dbViewR::master
levelSet <- levels(as.factor(inputData$pathogen))
numLevels <- length(levelSet)


validLatentFieldColumns <- c('pathogen')

} else {
Expand All @@ -71,14 +72,13 @@ latentFieldModel <- function(db = dbViewR::selectFromDB(), shp = dbViewR::master
}

# initialize formula for each level
if (numLevels>1){
if(numLevels>1){
outcomeStr <- paste('cbind(',paste(paste('outcome',1:numLevels,sep='.'),sep='',collapse=', '),')',sep='',collapse = '')
formula <- as.formula(paste(outcomeStr,'~','pathogen - 1 + catchment',sep=' '))
} else {
formula <- outcome ~ 1 + catchment
} else { # why does R do inconsistent stuff with column names!?!!
formula <- as.formula('outcome ~ 1 + catchment')
}


# factors as fixed effects, assuming no interaction terms
validFactorNames <- c('samplingLocation','fluShot','sex','hasFever','hasCough','hasMyalgia')
factorIdx <- names(db$observedData) %in% validFactorNames
Expand Down Expand Up @@ -214,8 +214,12 @@ latentFieldModel <- function(db = dbViewR::selectFromDB(), shp = dbViewR::master

if(COLUMN %in% c('pathogen') ){

# need to promote pathogen levels to independent columns! https://groups.google.com/forum/#!topic/r-inla-discussion-group/IaTSakB7qy4
pathogenNames <- paste('pathogen',levelSet,sep='')
if(numLevels>1){
# need to promote pathogen levels to independent columns! https://groups.google.com/forum/#!topic/r-inla-discussion-group/IaTSakB7qy4
pathogenNames <- paste('pathogen',levelSet,sep='')
} else { # why does R do inconsistent thing with column names?????!
pathogenNames <- '(Intercept)'
}

} else if (!(COLUMN == 'timeRow_PUMA5CE' )) {
groupIdx<-grepl( paste0('_',gsub('Row','',COLUMN)) ,validLatentFieldColumns) # this nasty thing will get refactored: https://github.com/seattleflu/incidence-mapper/issues/13
Expand Down Expand Up @@ -255,11 +259,14 @@ latentFieldModel <- function(db = dbViewR::selectFromDB(), shp = dbViewR::master
}

df <- data.frame(outcome = outcome, inputData, replicateIdx)
# if(numLevels==1){
# names(df)[names(df)=='outcome'] <- 'outcome.1'
# }

modelDefinition <- list(type='latentField', family = family, formula = formula, lincomb = lc.latentField,
inputData = df, neighborGraph=neighborGraph, hyper=hyper,
latentFieldData = lc.data, # clean up formatting, but this will be useful for exporting latentField csv
queryList = db$queryList)
observedData = db$observedData)

return(modelDefinition)
}
Expand All @@ -273,11 +280,11 @@ latentFieldModel <- function(db = dbViewR::selectFromDB(), shp = dbViewR::master
#'
#' @import lubridate
#'
appendLatentFieldData <- function(model,db, family = 'poisson'){
appendLatentFieldData <- function(model,modelDefinition){

modeledData <- db$observedData
modeledData <- modelDefinition$observedData

if(family[1] == 'binomial'){
if(modelDefinition$family[1] == 'binomial'){
modeledData$fraction <- modeledData$positive/modeledData$n
}

Expand Down
4 changes: 2 additions & 2 deletions incidenceMapR/R/modelTrainR.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,9 @@ modelTrainR <- function(modelDefinition){

# format output
if(modelDefinition$type =='smooth'){
modeledData <- appendSmoothData(model,db, family = modelDefinition$family)
modeledData <- appendSmoothData(model,modelDefinition)
} else if (modelDefinition$type == 'latentField'){
modeledData <- appendLatentFieldData(model,db, family = modelDefinition$family)
modeledData <- appendLatentFieldData(model,modelDefinition)
} else if (modelDefinition$type == 'effects'){

}
Expand Down
9 changes: 5 additions & 4 deletions incidenceMapR/R/smoothModel.R
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,8 @@ smoothModel <- function(db = dbViewR::selectFromDB(), shp = dbViewR::masterSpati

modelDefinition <- list(type='smooth', family = family, formula = formula, lincomb = c(),
inputData = df, neighborGraph=neighborGraph, hyper=hyper,
queryList = db$queryList)
queryList = db$queryList,
observedData = db$observedData)

return(modelDefinition)
}
Expand All @@ -198,11 +199,11 @@ smoothModel <- function(db = dbViewR::selectFromDB(), shp = dbViewR::masterSpati
#'
#' @import lubridate
#'
appendSmoothData <- function(model,db, family = 'poisson'){
appendSmoothData <- function(model,modelDefinition){

modeledData <- db$observedData
modeledData <- modelDefinition$observedData

if(family[1] == 'binomial'){
if(modelDefinition$family[1] == 'binomial'){
modeledData$fraction <- modeledData$positive/modeledData$n
}

Expand Down
2 changes: 1 addition & 1 deletion incidenceMapR/man/appendLatentFieldData.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion incidenceMapR/man/appendSmoothData.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 4 additions & 1 deletion modelTestR/testLatentField.R
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,9 @@ summary(result <- inla(formula = formula, family = family, data = data,
geoLevels <- c('PUMA5CE','CRA_NAME','NEIGHBORHOOD_DISTRICT_NAME','GEOID')

geoLevels<-c('PUMA5CE')
geoLevels <- c('GEOID')
# geoLevels <- c('GEOID')
k=1
geo=geoLevels[1]

for(geo in geoLevels){

Expand All @@ -128,6 +130,7 @@ for(geo in geoLevels){
# query pathogen and time
queryIn <- list(
SELECT =list(COLUMN=c('num_date','pathogen','samplingLocation',geo)),
WHERE =list(COLUMN=c('pathogen'), IN=pathogens[k]),
MUTATE =list(COLUMN=c('num_date'), AS='timeBin'),
GROUP_BY =list(COLUMN=c('pathogen','timeBin','samplingLocation',geo)),
SUMMARIZE=list(COLUMN='pathogen', IN= pathogens[k])
Expand Down

0 comments on commit fdeeb1d

Please sign in to comment.