From cd7d095b8c2a7e36cf4c6cf83cb82bdbf7f994fd Mon Sep 17 00:00:00 2001
From: topepo <mxkuhn@gmail.com>
Date: Thu, 2 Apr 2015 12:56:23 -0400
Subject: [PATCH] added functionality for distribution changes

See
[https://github.com/topepo/caret/issues/128](https://github.com/topepo/c
aret/issues/128)
---
 RegressionTests/Code/gam.R       | 21 ++++++++++++++++++---
 RegressionTests/Code/gamSpline.R | 15 +++++++++++++++
 RegressionTests/Code/gbm.R       | 20 ++++++++++++++++++++
 models/files/gam.R               | 24 ++++++++++--------------
 models/files/gamLoess.R          | 23 ++++++++++++++---------
 models/files/gamSpline.R         |  7 +++++--
 models/files/gbm.R               |  4 ++--
 pkg/caret/DESCRIPTION            |  4 ++--
 pkg/caret/inst/NEWS.Rd           | 10 ++++++++--
 9 files changed, 94 insertions(+), 34 deletions(-)

diff --git a/RegressionTests/Code/gam.R b/RegressionTests/Code/gam.R
index edc4a7a4d..9df549755 100644
--- a/RegressionTests/Code/gam.R
+++ b/RegressionTests/Code/gam.R
@@ -26,6 +26,14 @@ test_class_cv_model <- train(trainX, trainY,
                              metric = "ROC", 
                              preProc = c("center", "scale"))
 
+set.seed(849)
+test_class_cv_dist <- train(trainX, trainY, 
+                            method = "gam", 
+                            trControl = cctrl1,
+                            metric = "ROC", 
+                            preProc = c("center", "scale"),
+                            family = negbin(theta = 1))
+
 set.seed(849)
 test_class_cv_form <- train(Class ~ ., data = training, 
                             method = "gam", 
@@ -94,11 +102,18 @@ test_reg_cv_model <- train(trainX, trainY,
                            preProc = c("center", "scale"))
 test_reg_pred <- predict(test_reg_cv_model, testX)
 
+set.seed(849)
+test_reg_cv_dist <- train(trainX, trainY, 
+                          method = "gam", 
+                          trControl = rctrl1,
+                          preProc = c("center", "scale"),
+                          family = scat())
+
 set.seed(849)
 test_reg_cv_form <- train(y ~ ., data = training, 
-                           method = "gam", 
-                           trControl = rctrl1,
-                           preProc = c("center", "scale"))
+                          method = "gam", 
+                          trControl = rctrl1,
+                          preProc = c("center", "scale"))
 test_reg_pred_form <- predict(test_reg_cv_form, testX)
 
 set.seed(849)
diff --git a/RegressionTests/Code/gamSpline.R b/RegressionTests/Code/gamSpline.R
index 83269a1bb..610181052 100644
--- a/RegressionTests/Code/gamSpline.R
+++ b/RegressionTests/Code/gamSpline.R
@@ -26,6 +26,14 @@ test_class_cv_model <- train(trainX, trainY,
                              metric = "ROC", 
                              preProc = c("center", "scale"))
 
+set.seed(849)
+test_class_cv_dist <- train(trainX, trainY, 
+                            method = "gamSpline", 
+                            trControl = cctrl1,
+                            metric = "ROC", 
+                            preProc = c("center", "scale"),
+                            family = binomial(link = "cloglog"))
+
 set.seed(849)
 test_class_cv_form <- train(Class ~ ., data = training, 
                             method = "gamSpline", 
@@ -95,6 +103,13 @@ test_reg_cv_model <- train(trainX, trainY,
                            preProc = c("center", "scale"))
 test_reg_pred <- predict(test_reg_cv_model, testX)
 
+set.seed(849)
+test_reg_cv_dist <- train(trainX, abs(trainY), 
+                          method = "gamSpline", 
+                          trControl = rctrl1,
+                          preProc = c("center", "scale"),
+                          family = Gamma)
+
 set.seed(849)
 test_reg_cv_form <- train(y ~ ., data = training, 
                           method = "gamSpline", 
diff --git a/RegressionTests/Code/gbm.R b/RegressionTests/Code/gbm.R
index a9314d8cb..d75439059 100644
--- a/RegressionTests/Code/gbm.R
+++ b/RegressionTests/Code/gbm.R
@@ -32,6 +32,17 @@ test_class_cv_model <- train(trainX, trainY,
                              tuneGrid = gbmGrid,
                              verbose = FALSE)
 
+set.seed(849)
+test_class_cv_dist <- train(trainX, trainY, 
+                            method = "gbm", 
+                            trControl = cctrl1,
+                            metric = "ROC", 
+                            preProc = c("center", "scale"),
+                            tuneGrid = gbmGrid,
+                            verbose = FALSE
+                            distribution = "adaboost")
+
+
 set.seed(849)
 test_class_cv_form <- train(Class ~ ., data = training, 
                             method = "gbm", 
@@ -108,6 +119,15 @@ test_reg_cv_model <- train(trainX, trainY,
                            verbose = FALSE)
 test_reg_pred <- predict(test_reg_cv_model, testX)
 
+set.seed(849)
+test_reg_cv_dist <- train(trainX, trainY, 
+                          method = "gbm", 
+                          trControl = rctrl1,
+                          preProc = c("center", "scale"),
+                          tuneGrid = gbmGrid,
+                          verbose = FALSE,
+                          distribution = "laplace")
+
 set.seed(849)
 test_reg_cv_form <- train(y ~ ., data = training, 
                           method = "gbm", 
diff --git a/models/files/gam.R b/models/files/gam.R
index 454cbc605..6c23e42db 100644
--- a/models/files/gam.R
+++ b/models/files/gam.R
@@ -17,26 +17,22 @@ modelInfo <- list(label = "Generalized Additive Model using Splines",
                       dat$.outcome <- y
                       dist <- gaussian()
                     }
-                    out <- mgcv:::gam(modForm, data = dat, family = dist, 
+                    modelArgs <- list(formula = modForm,
+                                      data = dat,
                                       select = param$select, 
-                                      method = as.character(param$method), 
-                                      ...)
-#                     if(is.null(wts)) {
-# 
-#                     } else {
-#                       out <- mgcv:::gam(modForm, data = dat, family = dist, 
-#                                         select = param$select, 
-#                                         method = as.character(param$method), 
-#                                         weights = wts,
-#                                         ...)
-#                     }
+                                      method = as.character(param$method))
+                    ## Intercept family if passed in
+                    theDots <- list(...)
+                    if(!any(names(theDots) == "family")) modelArgs$family <- dist
+                    modelArgs <- c(modelArgs, theDots)
+                    
+                    out <- do.call(getFromNamespace("gam", "mgcv"), modelArgs)
                     out
                     
                   },
                   predict = function(modelFit, newdata, submodels = NULL) {
                     if(!is.data.frame(newdata)) newdata <- as.data.frame(newdata)
-                    if(modelFit$problemType == "Classification")
-                    {
+                    if(modelFit$problemType == "Classification") {
                       probs <-  predict(modelFit, newdata, type = "response")
                       out <- ifelse(probs < .5,
                                     modelFit$obsLevel[1],
diff --git a/models/files/gamLoess.R b/models/files/gamLoess.R
index 5ecc9da5a..08d74e1d4 100644
--- a/models/files/gamLoess.R
+++ b/models/files/gamLoess.R
@@ -8,16 +8,21 @@ modelInfo <- list(label = "Generalized Additive Model using LOESS",
                   grid = function(x, y, len = NULL) 
                     expand.grid(span = .5, degree = 1),
                   fit = function(x, y, wts, param, lev, last, classProbs, ...) {
-                    dat <- if(is.data.frame(x)) x else as.data.frame(x)
-                    dat$.outcome <- y
+                    args <- list(data = if(is.data.frame(x)) x else as.data.frame(x))
+                    args$data$.outcome <- y
+                    args$formula <- caret:::smootherFormula(x,
+                                                            smoother = "lo",
+                                                            span = param$span,
+                                                            degree = param$degree)
+                    theDots <- list(...)
                     
-                    gam:::gam(caret:::smootherFormula(x,
-                                              smoother = "lo",
-                                              span = param$span,
-                                              degree = param$degree),
-                              data = dat,
-                              family =  if(is.factor(y)) binomial() else  gaussian(),
-                              ...)
+                    
+                    if(!any(names(theDots) == "family")) 
+                      args$family <- if(is.factor(y)) binomial else gaussian
+                    
+                    if(length(theDots) > 0) args <- c(args, theDots)
+                    
+                    do.call(getFromNamespace("gam", "gam"), args)
                   },
                   predict = function(modelFit, newdata, submodels = NULL) {
                     if(!is.data.frame(newdata)) newdata <- as.data.frame(newdata)
diff --git a/models/files/gamSpline.R b/models/files/gamSpline.R
index 4356ea134..4845f4c50 100644
--- a/models/files/gamSpline.R
+++ b/models/files/gamSpline.R
@@ -14,9 +14,12 @@ modelInfo <- list(label = "Generalized Additive Model using Splines",
                     args$formula <- caret:::smootherFormula(x,
                                                             smoother = "s",
                                                             df = param$df)
-                    args$family <- if(is.factor(y)) binomial else gaussian
-
                     theDots <- list(...)
+                    
+                    
+                    if(!any(names(theDots) == "family")) 
+                      args$family <- if(is.factor(y)) binomial else gaussian
+                    
                     if(length(theDots) > 0) args <- c(args, theDots)
                     
                     do.call(getFromNamespace("gam", "gam"), args)
diff --git a/models/files/gbm.R b/models/files/gbm.R
index 4076aaa49..fe6be26f3 100644
--- a/models/files/gbm.R
+++ b/models/files/gbm.R
@@ -29,8 +29,7 @@ modelInfo <- list(label = "Stochastic Gradient Boosting",
                       modDist <- theDots$distribution
                       theDots$distribution <- NULL
                     } else {
-                      if(is.numeric(y))
-                      {
+                      if(is.numeric(y)) {
                         modDist <- "gaussian"
                       } else modDist <- if(length(lev) == 2)  "bernoulli" else "multinomial"
                     }
@@ -45,6 +44,7 @@ modelInfo <- list(label = "Stochastic Gradient Boosting",
                                     n.trees = param$n.trees,
                                     shrinkage = param$shrinkage, 
                                     distribution = modDist)
+                    if(any(names(theDots) == "family")) modArgs$distribution <- NULL
                     
                     if(length(theDots) > 0) modArgs <- c(modArgs, theDots)
                     
diff --git a/pkg/caret/DESCRIPTION b/pkg/caret/DESCRIPTION
index 40b8f3d6c..6beeb28e4 100644
--- a/pkg/caret/DESCRIPTION
+++ b/pkg/caret/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: caret
-Version: 6.0-43
-Date: 2015-01-27
+Version: 6.0-44
+Date: 2015-04-01
 Title: Classification and Regression Training
 Author: Max Kuhn. Contributions from Jed Wing, Steve Weston, Andre
     Williams, Chris Keefer, Allan Engelhardt, Tony Cooper, Zachary Mayer,
diff --git a/pkg/caret/inst/NEWS.Rd b/pkg/caret/inst/NEWS.Rd
index 7556dd48f..71228d3a5 100644
--- a/pkg/caret/inst/NEWS.Rd
+++ b/pkg/caret/inst/NEWS.Rd
@@ -10,13 +10,19 @@
 		\item A new option to \code{trainControl} called \code{trim} was added where, if implemented, will reduce the model's footprint. However, features beyond simple prediction may not work. 
 		\item A rarely occurring bug in \code{gbm} model code was fixed (thanks to Wade Cooper)
 		\item \code{splom.resamples} now respects the \code{models} argument
-		\item A new argument to \code{lift} called \code{cuts} was added to allow more control over what thresholds are used to calculat the curve.
+		\item A new argument to \code{lift} called \code{cuts} was added to allow more control over what thresholds are used to calculate the curve.
 		\item The \code{cuts} argument of \code{calibration} now accepts a vector of cut points.
 		\item Jason Schadewald noticed and fixed a bug in the man page for \code{dummyVars}
-		\item Call objects were remoed from the following models: \code{avNNet}, \code{bagFDA}, \code{icr}, \code{knn3}, \code{knnreg}, \code{pcaNNet}, and \code{plsda}.
+		\item Call objects were removed from the following models: \code{avNNet}, \code{bagFDA}, \code{icr}, \code{knn3}, \code{knnreg}, \code{pcaNNet}, and \code{plsda}.
 		\item An argument was added to \code{createTimeSlices} to thin the number of resamples
 		\item The RFE-related functions \code{lrFuncs}, \code{lmFuncs}, and \code{gamFuncs} were updated so that \code{rfe} accepts a matrix \code{x} argument. 
 		\item Using the default grid generation with \code{train} and \code{glmnet}, an initial \code{glmnet} fit is created with \code{alpha = 0.50} to define the \code{lambda} values. 
+		\item \code{train} models for \code{"gbm"}, \code{"gam"}, \code{"gamSpline"}, and \code{"gamLoess"} now allow their respective arguments for the outcome probability distribution to be passed to the underlying function.
+		\item A bug in \code{print.varImp.train} was fixed. 
+		\item \code{train} now returns an additional column called \code{rowIndex} that is exposed when calling the summary function during resampling. 
+		\item The ability to compute class probabilities was removed from the \code{rpartCost} model since they are unlikely to agree with the class predictions. 
+		\item \code{extractProb} no longer redundantly calls \code{extractPrediction} to generate the class predictions. 
+
 
 	}
 }