Skip to content

Commit

Permalink
Merge branch 'release/v0.0.1'
Browse files Browse the repository at this point in the history
  • Loading branch information
svs14 committed Jul 21, 2014
2 parents 9e778e7 + 4ab5146 commit 1e04ff5
Show file tree
Hide file tree
Showing 14 changed files with 166 additions and 136 deletions.
3 changes: 3 additions & 0 deletions .gitignore
@@ -1 +1,4 @@
*.cov
Gemfile
Gemfile.lock
Guardfile
7 changes: 7 additions & 0 deletions CHANGELOG.yml
@@ -1,6 +1,13 @@
%YAML 1.2
---
changes:
v0.0.1:
- Patch example code in README.
- Rename GradientBoost to GBAlgorithm.
- All algorithm constructors adjusted for keyword arguments.
- Rename GBProblem to GBLearner.
- Rename GBLearner to GBBaseLearner.
- Rename GBL to GBBL.
v0.0.0:
- First release.
- Architecture implemented.
Expand Down
81 changes: 51 additions & 30 deletions README.md
Expand Up @@ -58,19 +58,19 @@ train_ind, test_ind = GradientBoost.Util.holdout(num_instances, 0.2)

### Build Learner

The gradient boosting (GB) learning problem comprises of a GB algorithm
The gradient boosting (GB) learner comprises of a GB algorithm
and what output it must produce.
In this case, we shall assign a gradient boosted decision tree to output classes.
```julia
# Build GBProblem
gbdt = GBDT(
BinomialDeviance(), # Loss function
0.6, # Sampling rate
0.1, # Learning rate
100, # Number of iterations
# Build GBLearner
gbdt = GBDT(;
loss_function = BinomialDeviance(),
sampling_rate = 0.6,
learning_rate = 0.1,
num_iterations = 100
)
gbp = GBProblem(
gbdt, # Gradient boosting algorithm
gbl = GBLearner(
gbdt, # Gradient boosting algorithm
:class # Output (:class, :class_prob, :regression)
)
```
Expand All @@ -83,10 +83,10 @@ In this case, it is not an issue.

```julia
# Train
fit!(gbp, instances[train_ind, :], labels[train_ind])
ML.fit!(gbl, instances[train_ind, :], labels[train_ind])

# Predict
predictions = predict!(gbp, instances[test_ind, :])
predictions = ML.predict!(gbl, instances[test_ind, :])
```

### Evaluate
Expand All @@ -113,12 +113,12 @@ Current loss functions covered are:
`LeastSquares`, `LeastAbsoluteDeviation` and `BinomialDeviance`.

```julia
gbdt = GBDT(
BinomialDeviance(), # Loss function
0.6, # Sampling rate
0.1, # Learning rate
100, # Number of iterations
{ # Tree options (DecisionTree.jl regressor)
gbdt = GBDT(;
loss_function = BinomialDeviance(), # Loss function
sampling_rate = 0.6, # Sampling rate
learning_rate = 0.1, # Learning rate
num_iterations = 100, # Number of iterations
tree_options = { # Tree options (DecisionTree.jl regressor)
:maxlabels => 5,
:nsubfeatures => 0
}
Expand Down Expand Up @@ -152,32 +152,53 @@ end
Once this is done,
the algorithm can be instantiated with the respective base learner.
```julia
gbl = GBL(
LinearModel, # Base Learner
LeastSquares(), # Loss functoin
0.8, # Sampling rate
0.1, # Learning rate
100 # Number of iterations
gbl = GBBL(
LinearModel; # Base Learner
loss_function = LeastSquares(), # Loss function
sampling_rate = 0.8, # Sampling rate
learning_rate = 0.1, # Learning rate
num_iterations = 100 # Number of iterations
)
gbp = GBProblem(gbl, :regression)
gbl = GBLearner(gbl, :regression)
```

## Gradient Boosting Framework

All previously developed algorithms follow the framework
provided by `GradientBoost.GB`.

As this package is in its preliminary stage,
major changes may occur in the near future and as such
we provide minimal README documentation.

The algorithm must be of type `GradientBoost`, with fields
`loss_function`,`learning_rate`, `sampling_rate` and `num_iterations` accessible.
The bare minimum an algorithm must implement is
`build_base_func`. Optionally, `create_sample_indices` can be extended.
Loss functions can be found in `GradientBoost.LossFunctions`.
All of what is required to be implemented is exampled below:
```julia
import GradientBoost.GB
import GradientBoost.LossFunctions: LossFunction

# Must subtype from GBAlgorithm defined in GB module.
type ExampleGB <: GB.GBAlgorithm
loss_function::LossFunction
sampling_rate::FloatingPoint
learning_rate::FloatingPoint
num_iterations::Int
end

# Model training and co-efficient optimization should be done here.
function GB.build_base_func(
gb::ExampleGB, instances, labels, prev_func_pred, psuedo)

model_const = 0.5
model_pred = (instances) -> Float64[
sum(instances[i,:]) for i = 1:size(instances, 1)
]

return (instances) -> model_const .* model_pred(instances)
end
```

A relatively light algorithm
that implements this is `GBLearner`, found in `src/gb_learner.jl`.
that implements `GBAlgorithm` is `GBBL`, found in `src/gb_bl.jl`.

## Misc

Expand Down
2 changes: 1 addition & 1 deletion src/GradientBoost.jl
Expand Up @@ -4,7 +4,7 @@ module GradientBoost
include("util.jl")
include("loss.jl")
include("gb.jl")
include("gb_learner.jl")
include("gb_bl.jl")
include("gb_dt.jl")
include("ml.jl")

Expand Down
12 changes: 6 additions & 6 deletions src/gb.jl
Expand Up @@ -4,7 +4,7 @@ module GB
importall GradientBoost.Util
importall GradientBoost.LossFunctions

export GradientBoost,
export GBAlgorithm,
GBModel,
stochastic_gradient_boost,
fit,
Expand All @@ -14,7 +14,7 @@ export GradientBoost,


# Gradient boost algorithm.
abstract GradientBoost
abstract GBAlgorithm

# Gradient boost model.
type GBModel
Expand All @@ -28,7 +28,7 @@ end
# @param instances Instances.
# @param labels Labels.
# @return Gradient boost model.
function stochastic_gradient_boost(gb::GradientBoost, instances, labels)
function stochastic_gradient_boost(gb::GBAlgorithm, instances, labels)
# Initialize base functions collection
num_iterations = gb.num_iterations
base_funcs = Array(Function, num_iterations+1)
Expand Down Expand Up @@ -73,7 +73,7 @@ function stochastic_gradient_boost(gb::GradientBoost, instances, labels)
return GBModel(gb.learning_rate, base_funcs)
end

function fit(gb::GradientBoost, instances, labels)
function fit(gb::GBAlgorithm, instances, labels)
stochastic_gradient_boost(gb, instances, labels)
end
function predict(gb_model::GBModel, instances)
Expand All @@ -93,7 +93,7 @@ end
# @param psuedo Psuedo-labels (psuedo-response).
# @return Function of form (instances) -> predictions.
function build_base_func(
gb::GradientBoost,
gb::GBAlgorithm,
instances,
labels,
prev_func_pred,
Expand All @@ -109,7 +109,7 @@ end
# @param instances Instances.
# @param labels Labels.
# @return Sample indices.
function create_sample_indices(gb::GradientBoost, instances, labels)
function create_sample_indices(gb::GBAlgorithm, instances, labels)
n = size(instances, 1)
prop = gb.sampling_rate

Expand Down
12 changes: 6 additions & 6 deletions src/gb_learner.jl → src/gb_bl.jl
@@ -1,7 +1,7 @@
# Gradient Boosted Learner
module GBLearner
module GBBaseLearner

export GBL,
export GBBL,
build_base_func,
learner_fit,
learner_predict
Expand All @@ -11,14 +11,14 @@ importall GradientBoost.LossFunctions
importall GradientBoost.Util

# Gradient boosted base learner algorithm.
type GBL <: GradientBoost
type GBBL <: GBAlgorithm
loss_function::LossFunction
sampling_rate::FloatingPoint
learning_rate::FloatingPoint
num_iterations::Int
learner

function GBL(learner, loss_function=LeastSquares(),
function GBBL(learner; loss_function=LeastSquares(),
sampling_rate=0.8, learning_rate=0.1,
num_iterations=100)

Expand All @@ -27,7 +27,7 @@ type GBL <: GradientBoost
end

function GB.build_base_func(
gb::GBL,
gb::GBBL,
instances,
labels,
prev_func_pred,
Expand Down Expand Up @@ -95,7 +95,7 @@ function fit_best_constant(lf::BinomialDeviance,
labels, psuedo, psuedo_pred, prev_func_pred)

# TODO(svs14): Add fit_best_constant (BinomialDeviance) for base learner.
error("$(typeof(lf)) is not implemented for GBLearner.")
error("$(typeof(lf)) is not implemented for GBBaseLearner.")
end

end # module
4 changes: 2 additions & 2 deletions src/gb_dt.jl
Expand Up @@ -11,14 +11,14 @@ export GBDT,
build_base_func

# Gradient boosted decision tree algorithm.
type GBDT <: GradientBoost
type GBDT <: GBAlgorithm
loss_function::LossFunction
sampling_rate::FloatingPoint
learning_rate::FloatingPoint
num_iterations::Int
tree_options::Dict

function GBDT(loss_function=LeastSquares(),
function GBDT(;loss_function=LeastSquares(),
sampling_rate=0.6, learning_rate=0.1,
num_iterations=100, tree_options=Dict())

Expand Down
29 changes: 14 additions & 15 deletions src/ml.jl
Expand Up @@ -4,57 +4,56 @@ module ML
importall GradientBoost.LossFunctions
importall GradientBoost.GB
importall GradientBoost.GBDecisionTree
importall GradientBoost.GBLearner
importall GradientBoost.GBBaseLearner

export GBProblem,
export GBLearner,
fit!,
predict!,
LossFunction,
LeastSquares,
LeastAbsoluteDeviation,
BinomialDeviance,
GBDT,
GBL,
GBBL,
learner_fit,
learner_predict


# Gradient boosting problem.
# NOTE(svs14): Might want to find a better name for this.
type GBProblem
algorithm::GradientBoost
# Gradient boosting learner as defined by ML API.
type GBLearner
algorithm::GBAlgorithm
output::Symbol
model

function GBProblem(algorithm, output=:regression)
function GBLearner(algorithm, output=:regression)
new(algorithm, output, nothing)
end
end

function fit!(gbp::GBProblem, instances, labels)
function fit!(gbl::GBLearner, instances, labels)
error("Instance type: $(typeof(instances))
and label type: $(typeof(labels)) together is currently not supported.")
end
function predict!(gbp::GBProblem, instances)
function predict!(gbl::GBLearner, instances)
error("Instance type: $(typeof(instances)) is currently not supported.")
end

function fit!(gbp::GBProblem,
function fit!(gbl::GBLearner,
instances::Matrix{Float64}, labels::Vector{Float64})

# No special processing required.
gbp.model = fit(gbp.algorithm, instances, labels)
gbl.model = fit(gbl.algorithm, instances, labels)
end

function predict!(gbp::GBProblem,
function predict!(gbl::GBLearner,
instances::Matrix{Float64})

# Predict with GB algorithm
predictions = predict(gbp.model, instances)
predictions = predict(gbl.model, instances)

# Postprocess according to output and loss function
predictions = postprocess_pred(
gbp.output, gbp.algorithm.loss_function, predictions
gbl.output, gbl.algorithm.loss_function, predictions
)

predictions
Expand Down
2 changes: 1 addition & 1 deletion test/runtests.jl
Expand Up @@ -7,7 +7,7 @@ include("test_util.jl")
include("test_loss.jl")
include("test_gb.jl")
include("test_gb_dt.jl")
include("test_gb_learner.jl")
include("test_gb_bl.jl")
include("test_ml.jl")
include("test_system.jl")

Expand Down
4 changes: 2 additions & 2 deletions test/test_gb.jl
Expand Up @@ -4,9 +4,9 @@ using FactCheck
importall GradientBoost.GB
importall GradientBoost.LossFunctions

type DummyGradientBoost <: GradientBoost; end
type DummyGradientBoost <: GBAlgorithm; end

type StubGradientBoost <: GradientBoost
type StubGradientBoost <: GBAlgorithm
loss_function::LossFunction
sampling_rate::FloatingPoint
learning_rate::FloatingPoint
Expand Down

0 comments on commit 1e04ff5

Please sign in to comment.