Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
playerkk
committed
Dec 4, 2014
1 parent
fd84e5c
commit 4173a17
Showing
294 changed files
with
32,055 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
function confidences = test_boosted_dt_mc(classifier, features) | ||
% confidences = test_boosted_dt_mc(classifier, features) | ||
% | ||
% Returns a log likelihod ratio for each class in the classifier | ||
% | ||
% Input: | ||
% classifier: boosted decision tree classifier | ||
% features: classifier features (ndata, nvariables) | ||
% Output: | ||
% confidences(ndata, nclasses): | ||
% P(class=k|features) \propto 1./(1+exp(-confidences(k))) | ||
|
||
npred = classifier.wcs(1).dt.npred; | ||
if size(features, 2)~=npred | ||
error('Incorrect number of attributes') | ||
end | ||
|
||
wcs = classifier.wcs; | ||
nclasses = size(wcs, 2); | ||
|
||
ntrees = size(wcs, 1); | ||
|
||
confidences = zeros(size(features, 1), nclasses); | ||
for c = 1:nclasses | ||
for t = 1:ntrees | ||
if ~isempty(wcs(t,c).dt) | ||
if 0 | ||
dt = wcs(t,c).dt; | ||
[var, cut, children, catsplit] = tree_getParameters(dt); | ||
nodes = treevalc(int32(var), cut, int32(children(:, 1)), ... | ||
int32(children(:, 2)), catsplit(:, 1), features'); | ||
%disp(num2str(nodes)); | ||
else | ||
[class_indices, nodes, classes] = treeval(wcs(t, c).dt, features); | ||
end | ||
confidences(:, c) = confidences(:, c) + wcs(t, c).confidences(nodes); | ||
end | ||
end | ||
confidences(:, c) = confidences(:, c) + classifier.h0(c); | ||
end | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
function p = test_boosted_kde_2c(density, x, data) | ||
% used to evaluate the likelihood of a point from a kernel density estimate | ||
% density is the density function | ||
% x are the points at which f is defined (assumed to be equally spaced) | ||
% data are the data points to be evaluated | ||
% p is the value of f(xi) where x(xi) is the closest value in x to y | ||
|
||
n = length(x); | ||
wx = x(2)-x(1); | ||
indices = round((data- x(1))/wx)+1; | ||
indices = min(max(indices, 1), n); | ||
p = density(indices); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,91 @@ | ||
function classifier = train_boosted_dt_2c(features, cat_features, ... | ||
labels, num_iterations, nodespertree, stopval, w) | ||
% classifier = train_boosted_dt_2c(features, cat_features, ... | ||
% labels, num_iterations, nodespertree, stopval, w) | ||
% | ||
% Trains a two-class classifier based on boosted decision trees. Boosting done by the | ||
% logistic regression version of Adaboost (Adaboost.L - Collins, Schapire, | ||
% Singer 2002). At each iteration, a set of decision trees is created, with | ||
% confidences equal to 1/2*ln(P+/P-), according to the | ||
% weighted distribution. Weights are assigned as | ||
% w(i,j) = 1 / (1+exp(sum{t in iterations}[yij*ht(xi, j)])). | ||
% features(ndata, nfeatures) | ||
% cat_features - discrete-valued output feature indices (could be []) | ||
% labels - {-1, 1} | ||
% num_iterations - the number of trees to create | ||
|
||
|
||
num_data = length(labels); | ||
|
||
cl = [-1 1]; | ||
y = labels; | ||
|
||
if ~exist('stopval', 'var') || isempty(stopval) | ||
stopval = 0; | ||
end | ||
|
||
if ~exist('w', 'var') || isempty(w) | ||
w = ones(num_data, 1); | ||
end | ||
w = w/sum(w); | ||
|
||
classifier.h0 = 0; | ||
|
||
% for i = 1:2 | ||
% indices = find(y==cl(i)); | ||
% count = numel(indices); | ||
% if cl(i)==1 | ||
% classifier.h0 = log(count / (num_data-count)); | ||
% end | ||
% w(indices) = 1 / count/2; | ||
% end | ||
|
||
data_confidences = zeros(num_data, 1); | ||
aveconf = []; | ||
|
||
for t = 1:num_iterations | ||
% learn decision tree based on weighted distribution | ||
dt = treefitw(features, y, w, 1/num_data/2, 'catidx', cat_features, 'method', 'classification', 'maxnodes', nodespertree*4); | ||
[tmp, level] = min(abs(dt.ntermnodes-nodespertree)); | ||
dt = treeprune(dt, 'level', level-1); | ||
% assign partition confidences | ||
temp_c = dt.classname; | ||
pi = (strcmp(temp_c{1},'1')) + (2*strcmp(temp_c{2},'1')); | ||
ni = (strcmp(temp_c{1},'-1')) + (2*strcmp(temp_c{2},'-1')); | ||
classprob = dt.classprob; | ||
confidences = 1/2*(log(classprob(:, pi)) - log(classprob(:, ni))); | ||
|
||
% assign weights | ||
[class_indices, nodes, classes] = treeval(dt, features); | ||
data_confidences = data_confidences + confidences(nodes); | ||
w = 1 ./ (1+exp(y.*data_confidences)); | ||
w = w / sum(w); | ||
|
||
pconf = mean(1./(1+exp(-data_confidences(y==1)))); | ||
nconf = mean(1./(1+exp(-data_confidences(y==-1)))); | ||
|
||
disp(['t: ', num2str(t), ' c: ' num2str(mean(1 ./ (1+exp(-y.*data_confidences)))) ' e: ' ... | ||
num2str(mean(y.*data_confidences < 0)) ' c_p: ' num2str(pconf) ' c_n: ' num2str(nconf)]); | ||
|
||
classifier.wcs(t,1).dt = dt; | ||
classifier.wcs(t,1).confidences = confidences; | ||
%pause(0.1); | ||
|
||
aveconf(t) = mean(1 ./ (1+exp(-y.*data_confidences))); | ||
if t>10 && (aveconf(t)-aveconf(t-10) < stopval) | ||
disp(num2str(aveconf)) | ||
disp(['Stopping after ' num2str(t) ' trees']) | ||
break; | ||
end | ||
|
||
end | ||
|
||
disp(['mean conf = ' num2str(mean(1 ./ (1+exp(-y.*data_confidences))))]); | ||
disp(['training error: ' num2str(mean(y.*data_confidences < 0))]); | ||
|
||
|
||
|
||
|
||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,155 @@ | ||
function classifier = train_boosted_dt_mc(features, cat_features, labels, ... | ||
num_iterations, num_nodes, stopval, init_weights, varargin) | ||
% | ||
%classifier = train_boosted_dt_mc(features, cat_features, labels, ... | ||
% num_iterations, num_nodes, stopval, init_weights, varargin) | ||
% | ||
% Train a classifier based on boosted decision trees. Boosting done by the | ||
% logistic regression version of Adaboost (Adaboost.L - Collins, Schapire, | ||
% Singer 2002). At each | ||
% iteration, a set of decision trees is created for each class, with | ||
% confidences equal to 1/2*ln(P+/P-) for that class, according to the | ||
% weighted distribution. Final classification is based on the largest | ||
% confidence label (possibly incorporating a prior as h0(c) = | ||
% 1/2*ln(Pc/(1-Pc)). Weights are assigned as | ||
% w(i,j) = 1 / (1+exp(sum{t in iterations}[yij*ht(xi, j)])). | ||
|
||
if length(varargin) == 1 % class names supplied | ||
gn = varargin{1}; | ||
gid = zeros(size(labels)); | ||
for c = 1:length(gn) | ||
ind = find(strcmp(labels, gn{c})); | ||
gid(ind) = c; | ||
if ~isempty(init_weights) | ||
disp([gn{c} ': ' num2str(sum(init_weights(ind)))]); | ||
else | ||
disp([gn{c} ': ' num2str(length(ind))]); | ||
end | ||
end | ||
ind = find(gid==0); | ||
gid(ind) = []; | ||
labels(ind) = []; | ||
features(ind, :) = []; | ||
else | ||
[gid, gn] = grp2idx(labels); | ||
end | ||
|
||
if ~exist('stopval', 'var') || isempty(stopval) | ||
stopval = 0; | ||
end | ||
if ~exist('init_weights', 'var') | ||
init_weights = []; | ||
end | ||
|
||
classifier.names = gn; | ||
|
||
num_classes = length(gn); | ||
num_data = length(gid); | ||
|
||
if isempty(init_weights) | ||
init_weights = ones(num_data, 1)/num_data; | ||
else | ||
init_weights = init_weights / sum(init_weights); | ||
end | ||
|
||
% if no examples from a class are present, create one dummy example for | ||
% that class with very small weight | ||
for c = 1:numel(gn) | ||
if ~any(gid==c) | ||
disp(['warning: no examples from class ' gn(c)]) | ||
gid(end+1) = c; | ||
features(end+1, :) = zeros(size(features(end, 1))); | ||
num_data = num_data + 1; | ||
init_weights(end+1) = min(init_weights)/2; | ||
end | ||
end | ||
|
||
all_conf = zeros(num_data, num_classes); | ||
for c = 1:num_classes | ||
|
||
disp(['class: ' num2str(gn{c})]); | ||
y = (gid == c)*2-1; | ||
cl = [-1 1]; | ||
nc = 2; | ||
w = zeros(num_data, 1); | ||
cw = zeros(num_classes, 1); | ||
for i = 1:2 | ||
indices = find(y==cl(i)); | ||
%count = sum(init_weights(indices)); | ||
%w(indices) = init_weights(indices) / count / 2; | ||
w(indices) = init_weights(indices); | ||
|
||
if cl(i)==1 | ||
%classifier.h0(c) = log(count / (1-count)); | ||
classifier.h0(c) = 0; | ||
end | ||
|
||
end | ||
|
||
data_confidences = zeros(num_data, 1); | ||
aveconf = []; | ||
|
||
for t = 1:num_iterations | ||
% learn decision tree based on weighted distribution | ||
dt = treefitw(features, y, w, 1/num_data/2, 'catidx', cat_features, 'method', 'classification', 'maxnodes', num_nodes*4); | ||
[tmp, level] = min(abs(dt.ntermnodes-num_nodes)); | ||
dt = treeprune(dt, 'level', level-1); | ||
|
||
% assign partition confidences | ||
temp_c = dt.classname; | ||
pi = (strcmp(temp_c{1},'1')) + (2*strcmp(temp_c{2},'1')); | ||
ni = (strcmp(temp_c{1},'-1')) + (2*strcmp(temp_c{2},'-1')); | ||
|
||
classprob = dt.classprob; | ||
confidences = 1/2*(log(classprob(:, pi)) - log(classprob(:, ni))); | ||
|
||
% assign weights | ||
[class_indices, nodes, classes] = treeval(dt, features); | ||
data_confidences = data_confidences + confidences(nodes); | ||
|
||
w = 1 ./ (1+exp(y.*data_confidences)); | ||
w = w / sum(w); | ||
|
||
%disp(['c: ' num2str(mean(1 ./ (1+exp(-y.*data_confidences)))) ' e: ' num2str(mean(y.*data_confidences < 0)) ' w: ' num2str(max(w))]); | ||
|
||
classifier.wcs(t, c).dt = dt; | ||
classifier.wcs(t, c).confidences = confidences; | ||
|
||
|
||
aveconf(t) = mean(1 ./ (1+exp(-y.*data_confidences))); | ||
if t>10 && (aveconf(t)-aveconf(t-10) < stopval) | ||
disp(num2str(aveconf)) | ||
disp(['Stopping after ' num2str(t) ' trees']) | ||
break; | ||
end | ||
|
||
end | ||
|
||
finalconf = 1 ./ (1+exp(-y.*data_confidences)); | ||
finalerr = (y.*data_confidences < 0); | ||
disp(['confidence:: mean: ' num2str(mean(finalconf)) ... | ||
' pos: ' num2str(mean(finalconf(y==1))) ... | ||
' neg: ' num2str(mean(finalconf(y~=1)))]); | ||
disp(['training error:: mean: ' num2str(mean(finalerr)) ... | ||
' pos: ' num2str(mean(finalerr(y==1))) ... | ||
' neg: ' num2str(mean(finalerr(y~=1)))]); | ||
all_conf(:, c) = data_confidences+classifier.h0(c); | ||
|
||
end | ||
|
||
% compute and display training error | ||
[tmp, assigned_label] = max(all_conf, [], 2); | ||
conf_matrix = zeros(num_classes, num_classes); | ||
for c = 1:num_classes | ||
indices = find(gid==c); | ||
for c2 = 1:num_classes | ||
conf_matrix(c, c2) = mean(assigned_label(indices)==c2); | ||
end | ||
disp([gn{c} ' error: ' num2str(mean(assigned_label(indices)~=c))]); | ||
end | ||
disp('Confusion Matrix: '); | ||
disp(num2str(conf_matrix)); | ||
disp(['total error: ' num2str(mean(assigned_label~=gid))]); | ||
|
||
|
||
|
Oops, something went wrong.