Skip to content

Commit

Permalink
switching input hform to hform.g0, modified tests; clean-up of printi…
Browse files Browse the repository at this point in the history
…ng messages (when verbose=FALSE/TRUE);
  • Loading branch information
osofr committed Sep 26, 2015
1 parent 197d5be commit ee96840
Show file tree
Hide file tree
Showing 12 changed files with 311 additions and 394 deletions.
68 changes: 8 additions & 60 deletions R/BinOutModel_class.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ logisfit <- function(datsum_obj) UseMethod("logisfit") # Generic for fitting the

# S3 method for glm binomial family fit, takes BinDat data object:
logisfit.glmS3 <- function(datsum_obj) {
if (gvars$verbose) message("calling glm.fit...")
if (gvars$verbose) print("calling glm.fit...")
Xmat <- datsum_obj$getXmat
Y_vals <- datsum_obj$getY
# Xmat has 0 rows: return NA's and avoid throwing exception:
Expand All @@ -18,30 +18,27 @@ logisfit.glmS3 <- function(datsum_obj) {
# }, GetWarningsToSuppress())
}
fit <- list(coef = m.fit$coef, linkfun = "logit_linkinv", fitfunname = "glm")
# fit <- list(coef = m.fit$coef, linkfun = "logitlinkinv", fitfunname = "glm")
if (gvars$verbose) print(fit$coef)
class(fit) <- c(class(fit), c("glmS3"))
return(fit)
}

# S3 method for speedglm binomial family fit, takes BinDat data object:
logisfit.speedglmS3 <- function(datsum_obj) {
if (gvars$verbose) message("calling speedglm.wfit...")
if (gvars$verbose) print("calling speedglm.wfit...")
Xmat <- datsum_obj$getXmat
Y_vals <- datsum_obj$getY

if (nrow(Xmat) == 0L) { # Xmat has 0 rows: return NA's and avoid throwing exception
m.fit <- list(coef = rep.int(NA_real_, ncol(Xmat)))
} else {
m.fit <- try(speedglm::speedglm.wfit(X = Xmat, y = Y_vals, family = binomial()), silent=TRUE)
# m.fit <- speedglm::speedglm.wfit(X = Xmat, y = Y_vals, family = binomial(), sparse = TRUE)
m.fit <- try(speedglm::speedglm.wfit(X = Xmat, y = Y_vals, family = binomial()), silent = TRUE)
if (inherits(m.fit, "try-error")) { # if failed, fall back on stats::glm
message("speedglm::speedglm.wfit failed, falling back on stats:glm.fit; ", m.fit)
return(logisfit.glmS3(datsum_obj))
}
}
fit <- list(coef = m.fit$coef, linkfun = "logit_linkinv", fitfunname = "speedglm")
# fit <- list(coef = m.fit$coef, linkfun = "logitlinkinv", fitfunname = "speedglm")
if (gvars$verbose) print(fit$coef)
class(fit) <- c(class(fit), c("speedglmS3"))
return(fit)
Expand All @@ -60,11 +57,8 @@ summary.BinOutModel <- function(binoutmodel) {
append(list(reg = binoutmodel$show()), fit)
}

# @importFrom reshape2 melt
# @importFrom data.table `[.data.table`
# @importFrom data.table melt.data.table
#' @import data.table

NULL
# Convert existing Bin matrix (Bin indicators) for continuous self$outvar into long format data.table with 3 columns:
# ID - row number; sVar_allB.j - bin indicators collapsed into one col; bin_ID - bin number identify for prev. columns
# automatically removed all missing (degenerate) bin indicators
Expand All @@ -80,9 +74,7 @@ binirized.to.DTlong = function(BinsDat_wide, binID_seq, ID, bin_names, pooled_bi
variable.name = name.sVar,
variable.factor = FALSE,
na.rm = FALSE)
# print("sVar_melt_DT init: "); print(sVar_melt_DT)
nbin_rep <- rep(binID_seq, each = nrow(BinsDat_wide))
# print("pooled_bin_name: " %+% pooled_bin_name);
# 1) Add bin_ID; 2) remove a column with Bin names; 3) remove all rows with NA value for outcome (degenerate bins)
if (!is.data.table(sVar_melt_DT)) {
class(sVar_melt_DT)
Expand All @@ -100,36 +92,10 @@ join.Xmat = function(X_mat, sVar_melt_DT, ID) {
assert_that(nIDs == nrow(X_mat))
X_mat_DT <- data.table::as.data.table(X_mat)[, c("ID") := ID, with = FALSE]
data.table::setkeyv(X_mat_DT, c("ID")) # sort by ID
# print("X_mat_DT: "); print(X_mat_DT)
sVar_melt_DT <- sVar_melt_DT[X_mat_DT] # Merge long format (self$pooled_bin_name, binIDs) with predictors (sW)
# print("sVar_melt_DT[1:10,]"); print(sVar_melt_DT[1:10,])
return(sVar_melt_DT)
}

## ---------------------------------------------------------------------
# (NOT USED) Abstract summary measure class for P(sA[j]|sW,sA[j])
# @export
Abstract_BinDat <- R6Class(classname = "Abstract_BinDat",
portable = TRUE,
class = TRUE,
public = list(
initialize = function(...) { stop("cannot create abstract data store directly")}
# store = function(..., key = digest(list(...)), overwrite = FALSE,
# envir = parent.frame()) {
# stop("store method not implemented")
# },
# try_load = function(key, envir = parent.frame()) {
# ads_try_load(self, key, envir)
# },
)
)
# ## ---------------------------------------------------------------------
# USAGE:
# #' @keywords internal
# ads_try_load <- function(self, key, envir) {
# try(self$load(key, envir = envir), silent = TRUE)
# }

## ---------------------------------------------------------------------
#' R6 class for storing the design matrix and binary outcome for a single logistic regression
#'
Expand Down Expand Up @@ -242,7 +208,7 @@ BinDat <- R6Class(classname = "BinDat",
}
assert_that(is.logical(subset_idx))
if ((length(subset_idx) < self$n) && (length(subset_idx) > 1L)) {
message("subset_idx has smaller length than self$n; repeating subset_idx p times, for p: " %+% data$p)
if (gvars$verbose) message("subset_idx has smaller length than self$n; repeating subset_idx p times, for p: " %+% data$p)
subset_idx <- rep.int(subset_idx, data$p)
if (length(subset_idx) != self$n) stop("BinDat$define.subset_idx: self$n is not equal to nobs*p!")
}
Expand Down Expand Up @@ -290,26 +256,12 @@ BinDat <- R6Class(classname = "BinDat",
BinsDat_wide <- data$get.dat.sWsA(self$subset_idx, self$outvars_to_pool)
self$ID <- as.integer(1:nrow(BinsDat_wide))

# print("self$pooled_bin_name: " %+% self$pooled_bin_name)
# print("self$bin_names: "); print(self$bin_names)
# print("self$nbins: " %+% self$nbins);
# print("binID_seq: "); print(binID_seq)

# To grab bin Ind mat directly (prob a bit faster): BinsDat_wide <- data$dat.bin.sVar[self$subset_idx, ]
# print("self$subset_idx: "); print(head(self$subset_idx))
# print("class(BinsDat_wide): " %+% class(BinsDat_wide));
# print("head(BinsDat_wide) in BinDat: "); print(head(BinsDat_wide))

BinsDat_long <- binirized.to.DTlong(BinsDat_wide = BinsDat_wide, binID_seq = binID_seq, ID = self$ID,
bin_names = self$bin_names, pooled_bin_name = self$pooled_bin_name,
name.sVar = self$outvar)

# print("BinsDat_long: "); print(BinsDat_long)
# print("class(BinsDat_long): "); print(class(BinsDat_long))

sVar_melt_DT <- join.Xmat(X_mat = data$get.dat.sWsA(self$subset_idx, self$predvars),
sVar_melt_DT = BinsDat_long, ID = self$ID)

# prepare design matrix for modeling w/ glm.fit or speedglm.wfit:
X_mat <- sVar_melt_DT[,c("bin_ID", self$predvars), with=FALSE][, c("Intercept") := 1] # select bin_ID + predictors, add intercept column
setcolorder(X_mat, c("Intercept", "bin_ID", self$predvars)) # re-order columns by reference (no copy)
Expand Down Expand Up @@ -374,7 +326,6 @@ BinDat <- R6Class(classname = "BinDat",
if (sum(self$subset_idx > 0)) {
eta <- private$X_mat[,!is.na(m.fit$coef), drop = FALSE] %*% m.fit$coef[!is.na(m.fit$coef)]
pAout[self$subset_idx] <- match.fun(FUN = m.fit$linkfun)(eta)
# pAout[self$subset_idx] <- m.fit$linkfun(eta)
}
return(pAout)
}
Expand Down Expand Up @@ -453,10 +404,8 @@ BinOutModel <- R6Class(classname = "BinOutModel",
if (!reg$useglm) self$glmfitclass <- "speedglmS3"
self$bindat <- BinDat$new(reg = reg, ...) # postponed adding data in BinDat until self$fit() is called
class(self$bindat) <- c(class(self$bindat), self$glmfitclass)
# can also use: self$bindat <- BinDat$new(glm = self$glmfitclass, ...)
# or: self$bindat <- BinDat$new(self, ...) (passing self might get confusing)
if (gvars$verbose) {
print("Init BinOutModel:"); print(self$show())
print("New BinOutModel instance:"); print(self$show())
}

# Get the bin width (interval length) for the current bin name self$getoutvarnm (for discretized continuous sA only):
Expand All @@ -475,10 +424,10 @@ BinOutModel <- R6Class(classname = "BinOutModel",
fit = function(overwrite = FALSE, data, ...) { # Move overwrite to a field? ... self$overwrite
if (!overwrite) assert_that(!self$is.fitted) # do not allow overwrite of prev. fitted model unless explicitely asked
self$bindat$newdata(newdata = data, ...) # populate bindat with X_mat & Y_vals
# self$bindat$newdata(newdata = data, getoutcome = TRUE, ...) # populate bindat with X_mat & Y_vals
private$m.fit <- logisfit(datsum_obj = self$bindat) # private$m.fit <- data_obj$logisfit or private$m.fit <- data_obj$logisfit()
# alternative 2 is to apply data_obj method / method that fits the model
self$is.fitted <- TRUE

# **********************************************************************
# to save RAM space when doing many stacked regressions no longer predicting in fit:
# **********************************************************************
Expand Down Expand Up @@ -533,8 +482,7 @@ BinOutModel <- R6Class(classname = "BinOutModel",
# Predict the response P(Bin = b|sW = sw), which is returned invisibly;
# Needs to know the values of b to be able to do prediction ($newdata(, getouvar = TRUE))
# WARNING: This method cannot be chained together with methods that follow (s.a, class$predictAeqa()$fun())
# rename to:
# predict.like.P_a = function(newdata)
# rename to: predict.like.P_a = function(newdata)
predictAeqa = function(newdata, bw.j.sA_diff) { # P(A^s[i]=a^s|W^s=w^s) - calculating the likelihood for indA[i] (n vector of a's)
assert_that(self$is.fitted)
assert_that(!missing(newdata))
Expand Down
89 changes: 29 additions & 60 deletions R/Model_h.R
Original file line number Diff line number Diff line change
Expand Up @@ -60,28 +60,20 @@ predict.hbars <- function(newdatnet = NULL, m.h.fit) {
# fit models for m_gAi
#---------------------------------------------------------------------------------
fit.hbars <- function(DatNet.ObsP0, est_params_list) {
if (gvars$verbose) message("... running fit.hbars() ...")
.f.mkstrNet <- function(Net) apply(Net, 1, function(Net_i) paste(Net_i, collapse=" ")) # defining the vector of c^A's that needs evaluation under h(c)
#---------------------------------------------------------------------------------
# PARAMETERS FOR LOGISTIC ESTIMATION OF h
#---------------------------------------------------------------------------------
# n <- nrow(data)
O.datnetW <- DatNet.ObsP0$datnetW
O.datnetA <- DatNet.ObsP0$datnetA

# Kmax <- DatNet.ObsP0$Kmax
# nodes <- DatNet.ObsP0$O.datnetW$nodes
# nFnode <- nodes$nFnode
# Anode <- nodes$Anode
# NetInd_k <- DatNet.ObsP0$O.datnetW$NetInd_k

lbound <- est_params_list$lbound
max_npwt <- est_params_list$max_npwt # NOT IMPLEMENTED
ng.MCsims <- est_params_list$ng.MCsims # replace with p adaptive to k: p <- 100*(2^k)

sW <- est_params_list$sW
sA <- est_params_list$sA
h.sVars <- est_params_list$h.sVars
h.g0.sVars <- est_params_list$h.g0.sVars
h.gstar.sVars <- est_params_list$h.gstar.sVars

f.gstar <- est_params_list$f.gstar
Expand All @@ -100,72 +92,45 @@ fit.hbars <- function(DatNet.ObsP0, est_params_list) {

#---------------------------------------------------------------------------------
# Getting OBSERVED sW
# **** TODO ******
# (1) RETURN A MORE INTERPRETABLE ERROR when check.sW.g0.exist or check.sW.gstar.exist fails (list vars not found in sW_g0, sW_gstar)
# (2) Same for check.sA.exist -> return a more interpretable error
# (3) Add the same check for Q model (if not already added)
# ****************
#---------------------------------------------------------------------------------
# Summary measure names
sW.g0_nms <- h.sVars$predvars
# Summary measure names / expressions:
sW.g0_nms <- h.g0.sVars$predvars
sW.gstar_nms <- h.gstar.sVars$predvars

# *****
# CHECK THAT THESE SUMMARY MEASURES EXIST IN O.datnetW$names.sVar
# Check that these summary measures exist in O.datnetW$names.sVar
check.sW.g0.exist <- unlist(lapply(sW.g0_nms, function(sWname) sWname %in% O.datnetW$names.sVar))
if (!all(check.sW.g0.exist)) stop("the following predictors from hform regression could not be located in sW summary measures: " %+%
if (!all(check.sW.g0.exist)) stop("the following predictors from hform.g0 regression could not be located in sW summary measures: " %+%
paste0(sW.g0_nms[!check.sW.g0.exist], collapse = ","))


check.sW.gstar.exist <- unlist(lapply(sW.gstar_nms, function(sWname) sWname %in% O.datnetW$names.sVar))
if (!all(check.sW.gstar.exist)) stop("the following predictors from hform.gstar regression could not be located in sW summary measures: " %+%
paste0(sW.gstar_nms[!check.sW.gstar.exist], collapse = ","))

#---------------------------------------------------------------------------------
# Getting OBSERVED sA
#---------------------------------------------------------------------------------
# Summary measure names / expression
# sA_nms <- h.sVars$outvars
sA_nms_g0 <- h.sVars$outvars
# Summary measure names / expressions:
sA_nms_g0 <- h.g0.sVars$outvars
sA_nms_gstar <- h.gstar.sVars$outvars

# ***********
# CHECK THAT THE OUTCOME SUMMARY MEASURES DEFINED by h.sVars$outvars and h.gstar.sVars$outvars are equivalent:
# NOTE: MIGHT COMMENT OUT IN THE FUTURE AND ALLOW DIFFERENT SUMMARY MEASURES FOR sA_nms_g0 and sA_nms_gstar.
# Check that the outcome summary measures defined by h.g0.sVars$outvars and h.gstar.sVars$outvars are equivalent:
# NOTE: might comment out in the future and allow different summary measures for sA_nms_g0 and sA_nms_gstar.
# ***********
if (!all(sA_nms_g0 == sA_nms_gstar)) stop("the outcome variable names defined by regressions hform & hform.gstar are not identical;" %+%
if (!all(sA_nms_g0 == sA_nms_gstar)) stop("the outcome variable names defined by regressions hform.g0 & hform.gstar are not identical;" %+%
" current implementation requires these to be the same.")

# ***********
# CHECK THAT THESE SUMMARY MEASURES EXIST IN O.datnetW$names.sVar
# ***********
# Check that these summary measures exist in O.datnetW$names.sVar
check.sAg0.exist <- unlist(lapply(sA_nms_g0, function(sAname) sAname %in% O.datnetA$names.sVar))
if (!all(check.sAg0.exist)) stop("the following outcomes from hform regression could not be located in sA summary measures: " %+%
if (!all(check.sAg0.exist)) stop("the following outcomes from hform.g0 regression could not be located in sA summary measures: " %+%
paste0(sA_nms_g0[!check.sAg0.exist], collapse = ","))

check.sAgstar.exist <- unlist(lapply(sA_nms_gstar, function(sAname) sAname %in% O.datnetA$names.sVar))
if (!all(check.sAgstar.exist)) stop("the following outcomes from hform.gstar regression could not be located in sA summary measures: " %+%
paste0(sA_nms_gstar[!check.sAgstar.exist], collapse = ","))
# if (gvars$verbose) {
# print("check.sA.exist"); print(check.sA.exist)
# }
# assert_that(all(check.sA.exist))

#-----------------------------------------------------------
# BELOW IS A BUG, all A are assigned to the same bin when trying automatic $detect.sVar.intrvls:
#-----------------------------------------------------------
# intvrls <- DatNet.sWsA.g0$detect.sVar.intrvls("A")
# DatNet.sWsA.g0$set.sVar.intrvls(name.sVar = "A", new.intrvls = intvrls)
# print("defined intvrls for A: "); print(intvrls)
# print("TABLE FOR A as an ordinal (all A vals got assigned to cat 2): "); print(table(DatNet.sWsA.g0$discretize.sVar("A")))
# print("Bins for A: "); print(head(DatNet.sWsA.g0$binirize.sVar("A")))
# Trying manual intervals for A:
# DatNet.sWsA.g0$set.sVar.intrvls(name.sVar = "A", new.intrvls = seq(0,1,by=0.1))
# print(DatNet.sWsA.g0$get.sVar.intrvls("A"))
# print("Bins for A with manual 10 continuous intervals:")
# print(table(DatNet.sWsA.g0$discretize.sVar("A")))
# print(head(DatNet.sWsA.g0$binirize.sVar("A")))
# print("Stats for sum_1mAW2_nets: ")
#-----------------------------------------------------------
# DEFINING SUBSETING EXPRESSIONS (FOR DETERMINISTIC / DEGENERATE sA)
#-----------------------------------------------------------
Expand All @@ -180,15 +145,12 @@ fit.hbars <- function(DatNet.ObsP0, est_params_list) {
# Summary class params:
##########################################
sA_class <- O.datnetA$type.sVar[sA_nms_g0]
# sVartypes <- gvars$sVartypes # <- list(bin = "binary", cat = "categor", cont = "contin")
# sA_class <- as.list(c(sVartypes$cont, sVartypes$bin, rep_len(sVartypes$bin, length(sA_nms_g0)-2)))
# sA_class.alt <- lapply(sA_nms_g0, O.datnetA$get.sVar.type)
# names(sA_class.alt) <- sA_nms_g0
# print("detected sA_class for O.datnetA$sVar: "); print(sA_class)

message("================================================================")
message("fitting h_g0 with summary measures: ", "(" %+% paste(sA_nms_g0, collapse = ",") %+% " | " %+% paste(sW.g0_nms, collapse = ",") %+% ")")
message("================================================================")
if (gvars$verbose) {
message("================================================================")
message("fitting h_g0 with summary measures: ", "P(" %+% paste(sA_nms_g0, collapse = ",") %+% " | " %+% paste(sW.g0_nms, collapse = ",") %+% ")")
message("================================================================")
}

p_h0 <- ifelse(is.null(f.g0), 1, ng.MCsims)
if (!is.null(f.g0)) {
Expand All @@ -199,7 +161,11 @@ fit.hbars <- function(DatNet.ObsP0, est_params_list) {
DatNet.g0 <- DatNet.ObsP0
}

regclass.g0 <- RegressionClass$new(outvar.class = sA_class, outvar = sA_nms_g0, predvars = sW.g0_nms, subset = subsets_expr)
regclass.g0 <- RegressionClass$new(outvar.class = sA_class,
outvar = sA_nms_g0,
predvars = sW.g0_nms,
subset = subsets_expr)

summeas.g0 <- SummariesModel$new(reg = regclass.g0, DatNet.sWsA.g0 = DatNet.g0)
if (!is.null(h_g0_SummariesModel)) {
message("user supplied model fit for h_g0 is not implemented yet")
Expand All @@ -221,14 +187,17 @@ fit.hbars <- function(DatNet.ObsP0, est_params_list) {
h_gN <- summeas.g0$predictAeqa(newdata = DatNet.ObsP0)
# *********

message("================================================================")
message("fitting h_gstar for summary measures: ", "(" %+% paste(sA_nms_gstar, collapse = ",") %+% " | " %+% paste(sW.gstar_nms, collapse = ",") %+% ")")
message("================================================================")
if (gvars$verbose) {
message("================================================================")
message("fitting h_gstar based on summary measures: ", "P(" %+% paste(sA_nms_gstar, collapse = ",") %+% " | " %+% paste(sW.gstar_nms, collapse = ",") %+% ")")
message("================================================================")
}

DatNet.gstar <- DatNet.sWsA$new(datnetW = O.datnetW, datnetA = O.datnetA)
DatNet.gstar$make.dat.sWsA(p = ng.MCsims, f.g_name = f.gstar, f.g_args = f.g_args, sA.object = sA)

if (gvars$verbose) {
print("DatNet.gstar stored sWsA df: "); print(class(DatNet.gstar$dat.sWsA))
print("Generated new summary measures by sampling A from f_gstar (DatNet.gstar): "); print(class(DatNet.gstar$dat.sWsA))
print(dim(DatNet.gstar$dat.sWsA)); print(head(DatNet.gstar$dat.sWsA));
}

Expand Down
Loading

0 comments on commit ee96840

Please sign in to comment.