Merge branch 'release/v0.0.1'

svs14 · Jul 21, 2014 · 1e04ff5 · 1e04ff5
2 parents 9e778e7 + 4ab5146
commit 1e04ff5
Show file tree

Hide file tree

Showing 14 changed files with 166 additions and 136 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1 +1,4 @@
 *.cov
+Gemfile
+Gemfile.lock
+Guardfile
diff --git a/CHANGELOG.yml b/CHANGELOG.yml
@@ -1,6 +1,13 @@
 %YAML 1.2
 ---
 changes:
+  v0.0.1:
+    - Patch example code in README.
+    - Rename GradientBoost to GBAlgorithm.
+    - All algorithm constructors adjusted for keyword arguments.
+    - Rename GBProblem to GBLearner.
+    - Rename GBLearner to GBBaseLearner.
+    - Rename GBL to GBBL.
   v0.0.0:
     - First release.
     - Architecture implemented.

diff --git a/README.md b/README.md
@@ -58,19 +58,19 @@ train_ind, test_ind = GradientBoost.Util.holdout(num_instances, 0.2)
 
 ### Build Learner
 
-The gradient boosting (GB) learning problem comprises of a GB algorithm 
+The gradient boosting (GB) learner comprises of a GB algorithm 
 and what output it must produce. 
 In this case, we shall assign a gradient boosted decision tree to output classes.
 ```julia
-# Build GBProblem
-gbdt = GBDT(
-  BinomialDeviance(), # Loss function
-  0.6, # Sampling rate
-  0.1, # Learning rate
-  100, # Number of iterations
+# Build GBLearner
+gbdt = GBDT(;
+  loss_function = BinomialDeviance(),
+  sampling_rate = 0.6,
+  learning_rate = 0.1,
+  num_iterations = 100
 )
-gbp = GBProblem(
-  gbdt, # Gradient boosting algorithm
+gbl = GBLearner(
+  gbdt,  # Gradient boosting algorithm
   :class # Output (:class, :class_prob, :regression)
 )
 ```
@@ -83,10 +83,10 @@ In this case, it is not an issue.
 
 ```julia
 # Train
-fit!(gbp, instances[train_ind, :], labels[train_ind])
+ML.fit!(gbl, instances[train_ind, :], labels[train_ind])
 
 # Predict
-predictions = predict!(gbp, instances[test_ind, :])
+predictions = ML.predict!(gbl, instances[test_ind, :])
 ```
 
 ### Evaluate
@@ -113,12 +113,12 @@ Current loss functions covered are:
 `LeastSquares`, `LeastAbsoluteDeviation` and `BinomialDeviance`.
 
 ```julia
-gbdt = GBDT(
-  BinomialDeviance(), # Loss function
-  0.6, # Sampling rate
-  0.1, # Learning rate
-  100, # Number of iterations
-  {    # Tree options (DecisionTree.jl regressor)
+gbdt = GBDT(;
+  loss_function = BinomialDeviance(), # Loss function
+  sampling_rate = 0.6,                # Sampling rate
+  learning_rate = 0.1,                # Learning rate
+  num_iterations = 100,               # Number of iterations
+  tree_options = {                    # Tree options (DecisionTree.jl regressor)
     :maxlabels => 5,
     :nsubfeatures => 0
   }
@@ -152,32 +152,53 @@ end
 Once this is done, 
 the algorithm can be instantiated with the respective base learner.
 ```julia
-gbl = GBL(
-  LinearModel, # Base Learner
-  LeastSquares(), # Loss functoin
-  0.8, # Sampling rate
-  0.1, # Learning rate
-  100 # Number of iterations
+gbl = GBBL(
+  LinearModel;                    # Base Learner
+  loss_function = LeastSquares(), # Loss function
+  sampling_rate = 0.8,            # Sampling rate
+  learning_rate = 0.1,            # Learning rate
+  num_iterations = 100            # Number of iterations
 )
-gbp = GBProblem(gbl, :regression)
+gbl = GBLearner(gbl, :regression)
 ```
 
 ## Gradient Boosting Framework
 
 All previously developed algorithms follow the framework 
 provided by `GradientBoost.GB`. 
+
 As this package is in its preliminary stage, 
 major changes may occur in the near future and as such 
 we provide minimal README documentation.
 
-The algorithm must be of type `GradientBoost`, with fields 
-`loss_function`,`learning_rate`, `sampling_rate` and `num_iterations` accessible. 
-The bare minimum an algorithm must implement is 
-`build_base_func`. Optionally, `create_sample_indices` can be extended. 
-Loss functions can be found in `GradientBoost.LossFunctions`.
+All of what is required to be implemented is exampled below:
+```julia
+import GradientBoost.GB
+import GradientBoost.LossFunctions: LossFunction
+
+# Must subtype from GBAlgorithm defined in GB module.
+type ExampleGB <: GB.GBAlgorithm
+  loss_function::LossFunction
+  sampling_rate::FloatingPoint
+  learning_rate::FloatingPoint
+  num_iterations::Int
+end
+
+# Model training and co-efficient optimization should be done here.
+function GB.build_base_func(
+  gb::ExampleGB, instances, labels, prev_func_pred, psuedo)
+
+  model_const = 0.5
+  model_pred = (instances) -> Float64[
+    sum(instances[i,:]) for i = 1:size(instances, 1)
+  ]
+
+  return (instances) -> model_const .* model_pred(instances)
+end
+```
 
 A relatively light algorithm 
-that implements this is `GBLearner`, found in `src/gb_learner.jl`.
+that implements `GBAlgorithm` is `GBBL`, found in `src/gb_bl.jl`.
 
 ## Misc
 

diff --git a/src/GradientBoost.jl b/src/GradientBoost.jl
@@ -4,7 +4,7 @@ module GradientBoost
 include("util.jl")
 include("loss.jl")
 include("gb.jl")
-include("gb_learner.jl")
+include("gb_bl.jl")
 include("gb_dt.jl")
 include("ml.jl")
 

diff --git a/src/gb.jl b/src/gb.jl
@@ -4,7 +4,7 @@ module GB
 importall GradientBoost.Util
 importall GradientBoost.LossFunctions
 
-export GradientBoost,
+export GBAlgorithm,
        GBModel,
        stochastic_gradient_boost,
        fit,
@@ -14,7 +14,7 @@ export GradientBoost,
 
 
 # Gradient boost algorithm.
-abstract GradientBoost
+abstract GBAlgorithm
 
 # Gradient boost model.
 type GBModel
@@ -28,7 +28,7 @@ end
 # @param instances Instances.
 # @param labels Labels.
 # @return Gradient boost model.
-function stochastic_gradient_boost(gb::GradientBoost, instances, labels)
+function stochastic_gradient_boost(gb::GBAlgorithm, instances, labels)
   # Initialize base functions collection
   num_iterations = gb.num_iterations
   base_funcs = Array(Function, num_iterations+1)
@@ -73,7 +73,7 @@ function stochastic_gradient_boost(gb::GradientBoost, instances, labels)
   return GBModel(gb.learning_rate, base_funcs)
 end
 
-function fit(gb::GradientBoost, instances, labels)
+function fit(gb::GBAlgorithm, instances, labels)
   stochastic_gradient_boost(gb, instances, labels)
 end
 function predict(gb_model::GBModel, instances)
@@ -93,7 +93,7 @@ end
 # @param psuedo Psuedo-labels (psuedo-response).
 # @return Function of form (instances) -> predictions.
 function build_base_func(
-  gb::GradientBoost,
+  gb::GBAlgorithm,
   instances,
   labels,
   prev_func_pred,
@@ -109,7 +109,7 @@ end
 # @param instances Instances.
 # @param labels Labels.
 # @return Sample indices.
-function create_sample_indices(gb::GradientBoost, instances, labels)
+function create_sample_indices(gb::GBAlgorithm, instances, labels)
   n = size(instances, 1)
   prop = gb.sampling_rate
 

diff --git a/src/gb_learner.jl → src/gb_bl.jl b/src/gb_learner.jl → src/gb_bl.jl
@@ -1,7 +1,7 @@
 # Gradient Boosted Learner
-module GBLearner
+module GBBaseLearner
 
-export GBL,
+export GBBL,
        build_base_func,
        learner_fit,
        learner_predict
@@ -11,14 +11,14 @@ importall GradientBoost.LossFunctions
 importall GradientBoost.Util
 
 # Gradient boosted base learner algorithm.
-type GBL <: GradientBoost
+type GBBL <: GBAlgorithm
   loss_function::LossFunction
   sampling_rate::FloatingPoint
   learning_rate::FloatingPoint
   num_iterations::Int
   learner
 
-  function GBL(learner, loss_function=LeastSquares(),
+  function GBBL(learner; loss_function=LeastSquares(),
     sampling_rate=0.8, learning_rate=0.1, 
     num_iterations=100)
 
@@ -27,7 +27,7 @@ type GBL <: GradientBoost
 end
 
 function GB.build_base_func(
-    gb::GBL,
+    gb::GBBL,
     instances,
     labels,
     prev_func_pred,
@@ -95,7 +95,7 @@ function fit_best_constant(lf::BinomialDeviance,
   labels, psuedo, psuedo_pred, prev_func_pred)
 
   # TODO(svs14): Add fit_best_constant (BinomialDeviance) for base learner.
-  error("$(typeof(lf)) is not implemented for GBLearner.")
+  error("$(typeof(lf)) is not implemented for GBBaseLearner.")
 end
 
 end # module
diff --git a/src/gb_dt.jl b/src/gb_dt.jl
@@ -11,14 +11,14 @@ export GBDT,
        build_base_func
 
 # Gradient boosted decision tree algorithm.
-type GBDT <: GradientBoost
+type GBDT <: GBAlgorithm
   loss_function::LossFunction
   sampling_rate::FloatingPoint
   learning_rate::FloatingPoint
   num_iterations::Int
   tree_options::Dict
 
-  function GBDT(loss_function=LeastSquares(),
+  function GBDT(;loss_function=LeastSquares(),
     sampling_rate=0.6, learning_rate=0.1, 
     num_iterations=100, tree_options=Dict())
 

diff --git a/src/ml.jl b/src/ml.jl
@@ -4,57 +4,56 @@ module ML
 importall GradientBoost.LossFunctions
 importall GradientBoost.GB
 importall GradientBoost.GBDecisionTree
-importall GradientBoost.GBLearner
+importall GradientBoost.GBBaseLearner
 
-export GBProblem,
+export GBLearner,
        fit!,
        predict!,
        LossFunction,
        LeastSquares,
        LeastAbsoluteDeviation,
        BinomialDeviance,
        GBDT,
-       GBL,
+       GBBL,
        learner_fit,
        learner_predict
 
 
-# Gradient boosting problem.
-# NOTE(svs14): Might want to find a better name for this.
-type GBProblem
-  algorithm::GradientBoost
+# Gradient boosting learner as defined by ML API.
+type GBLearner
+  algorithm::GBAlgorithm
   output::Symbol
   model
 
-  function GBProblem(algorithm, output=:regression)
+  function GBLearner(algorithm, output=:regression)
     new(algorithm, output, nothing)
   end
 end
 
-function fit!(gbp::GBProblem, instances, labels)
+function fit!(gbl::GBLearner, instances, labels)
   error("Instance type: $(typeof(instances)) 
     and label type: $(typeof(labels)) together is currently not supported.")
 end
-function predict!(gbp::GBProblem, instances)
+function predict!(gbl::GBLearner, instances)
   error("Instance type: $(typeof(instances)) is currently not supported.")
 end
 
-function fit!(gbp::GBProblem, 
+function fit!(gbl::GBLearner, 
   instances::Matrix{Float64}, labels::Vector{Float64})
 
   # No special processing required.
-  gbp.model = fit(gbp.algorithm, instances, labels)
+  gbl.model = fit(gbl.algorithm, instances, labels)
 end
 
-function predict!(gbp::GBProblem, 
+function predict!(gbl::GBLearner, 
   instances::Matrix{Float64})
 
   # Predict with GB algorithm
-  predictions = predict(gbp.model, instances)
+  predictions = predict(gbl.model, instances)
 
   # Postprocess according to output and loss function
   predictions = postprocess_pred(
-    gbp.output, gbp.algorithm.loss_function, predictions
+    gbl.output, gbl.algorithm.loss_function, predictions
   )
 
   predictions

diff --git a/test/runtests.jl b/test/runtests.jl
@@ -7,7 +7,7 @@ include("test_util.jl")
 include("test_loss.jl")
 include("test_gb.jl")
 include("test_gb_dt.jl")
-include("test_gb_learner.jl")
+include("test_gb_bl.jl")
 include("test_ml.jl")
 include("test_system.jl")
 

diff --git a/test/test_gb.jl b/test/test_gb.jl
@@ -4,9 +4,9 @@ using FactCheck
 importall GradientBoost.GB
 importall GradientBoost.LossFunctions
 
-type DummyGradientBoost <: GradientBoost; end
+type DummyGradientBoost <: GBAlgorithm; end
 
-type StubGradientBoost <: GradientBoost 
+type StubGradientBoost <: GBAlgorithm
   loss_function::LossFunction
   sampling_rate::FloatingPoint
   learning_rate::FloatingPoint