diff --git a/boosting/test_boosted_dt_mc.m b/boosting/test_boosted_dt_mc.m new file mode 100644 index 0000000..988133f --- /dev/null +++ b/boosting/test_boosted_dt_mc.m @@ -0,0 +1,42 @@ +function confidences = test_boosted_dt_mc(classifier, features) + % confidences = test_boosted_dt_mc(classifier, features) + % + % Returns a log likelihod ratio for each class in the classifier + % + % Input: + % classifier: boosted decision tree classifier + % features: classifier features (ndata, nvariables) + % Output: + % confidences(ndata, nclasses): + % P(class=k|features) \propto 1./(1+exp(-confidences(k))) + + npred = classifier.wcs(1).dt.npred; + if size(features, 2)~=npred + error('Incorrect number of attributes') + end + + wcs = classifier.wcs; + nclasses = size(wcs, 2); + + ntrees = size(wcs, 1); + + confidences = zeros(size(features, 1), nclasses); + for c = 1:nclasses + for t = 1:ntrees + if ~isempty(wcs(t,c).dt) + if 0 + dt = wcs(t,c).dt; + [var, cut, children, catsplit] = tree_getParameters(dt); + nodes = treevalc(int32(var), cut, int32(children(:, 1)), ... + int32(children(:, 2)), catsplit(:, 1), features'); + %disp(num2str(nodes)); + else + [class_indices, nodes, classes] = treeval(wcs(t, c).dt, features); + end + confidences(:, c) = confidences(:, c) + wcs(t, c).confidences(nodes); + end + end + confidences(:, c) = confidences(:, c) + classifier.h0(c); + end + + diff --git a/boosting/test_boosted_kde_2c.m b/boosting/test_boosted_kde_2c.m new file mode 100644 index 0000000..0e670d9 --- /dev/null +++ b/boosting/test_boosted_kde_2c.m @@ -0,0 +1,12 @@ +function p = test_boosted_kde_2c(density, x, data) +% used to evaluate the likelihood of a point from a kernel density estimate +% density is the density function +% x are the points at which f is defined (assumed to be equally spaced) +% data are the data points to be evaluated +% p is the value of f(xi) where x(xi) is the closest value in x to y + +n = length(x); +wx = x(2)-x(1); +indices = round((data- x(1))/wx)+1; +indices = min(max(indices, 1), n); +p = density(indices); \ No newline at end of file diff --git a/boosting/train_boosted_dt_2c.m b/boosting/train_boosted_dt_2c.m new file mode 100644 index 0000000..cedc92e --- /dev/null +++ b/boosting/train_boosted_dt_2c.m @@ -0,0 +1,91 @@ +function classifier = train_boosted_dt_2c(features, cat_features, ... + labels, num_iterations, nodespertree, stopval, w) +% classifier = train_boosted_dt_2c(features, cat_features, ... +% labels, num_iterations, nodespertree, stopval, w) +% +% Trains a two-class classifier based on boosted decision trees. Boosting done by the +% logistic regression version of Adaboost (Adaboost.L - Collins, Schapire, +% Singer 2002). At each iteration, a set of decision trees is created, with +% confidences equal to 1/2*ln(P+/P-), according to the +% weighted distribution. Weights are assigned as +% w(i,j) = 1 / (1+exp(sum{t in iterations}[yij*ht(xi, j)])). +% features(ndata, nfeatures) +% cat_features - discrete-valued output feature indices (could be []) +% labels - {-1, 1} +% num_iterations - the number of trees to create + + +num_data = length(labels); + +cl = [-1 1]; +y = labels; + +if ~exist('stopval', 'var') || isempty(stopval) + stopval = 0; +end + +if ~exist('w', 'var') || isempty(w) + w = ones(num_data, 1); +end +w = w/sum(w); + +classifier.h0 = 0; + +% for i = 1:2 +% indices = find(y==cl(i)); +% count = numel(indices); +% if cl(i)==1 +% classifier.h0 = log(count / (num_data-count)); +% end +% w(indices) = 1 / count/2; +% end + +data_confidences = zeros(num_data, 1); +aveconf = []; + +for t = 1:num_iterations + % learn decision tree based on weighted distribution + dt = treefitw(features, y, w, 1/num_data/2, 'catidx', cat_features, 'method', 'classification', 'maxnodes', nodespertree*4); + [tmp, level] = min(abs(dt.ntermnodes-nodespertree)); + dt = treeprune(dt, 'level', level-1); + % assign partition confidences + temp_c = dt.classname; + pi = (strcmp(temp_c{1},'1')) + (2*strcmp(temp_c{2},'1')); + ni = (strcmp(temp_c{1},'-1')) + (2*strcmp(temp_c{2},'-1')); + classprob = dt.classprob; + confidences = 1/2*(log(classprob(:, pi)) - log(classprob(:, ni))); + + % assign weights + [class_indices, nodes, classes] = treeval(dt, features); + data_confidences = data_confidences + confidences(nodes); + w = 1 ./ (1+exp(y.*data_confidences)); + w = w / sum(w); + + pconf = mean(1./(1+exp(-data_confidences(y==1)))); + nconf = mean(1./(1+exp(-data_confidences(y==-1)))); + + disp(['t: ', num2str(t), ' c: ' num2str(mean(1 ./ (1+exp(-y.*data_confidences)))) ' e: ' ... + num2str(mean(y.*data_confidences < 0)) ' c_p: ' num2str(pconf) ' c_n: ' num2str(nconf)]); + + classifier.wcs(t,1).dt = dt; + classifier.wcs(t,1).confidences = confidences; + %pause(0.1); + + aveconf(t) = mean(1 ./ (1+exp(-y.*data_confidences))); + if t>10 && (aveconf(t)-aveconf(t-10) < stopval) + disp(num2str(aveconf)) + disp(['Stopping after ' num2str(t) ' trees']) + break; + end + +end + +disp(['mean conf = ' num2str(mean(1 ./ (1+exp(-y.*data_confidences))))]); +disp(['training error: ' num2str(mean(y.*data_confidences < 0))]); + + + + + + + \ No newline at end of file diff --git a/boosting/train_boosted_dt_mc.m b/boosting/train_boosted_dt_mc.m new file mode 100644 index 0000000..2005752 --- /dev/null +++ b/boosting/train_boosted_dt_mc.m @@ -0,0 +1,155 @@ +function classifier = train_boosted_dt_mc(features, cat_features, labels, ... + num_iterations, num_nodes, stopval, init_weights, varargin) +% +%classifier = train_boosted_dt_mc(features, cat_features, labels, ... +% num_iterations, num_nodes, stopval, init_weights, varargin) +% +% Train a classifier based on boosted decision trees. Boosting done by the +% logistic regression version of Adaboost (Adaboost.L - Collins, Schapire, +% Singer 2002). At each +% iteration, a set of decision trees is created for each class, with +% confidences equal to 1/2*ln(P+/P-) for that class, according to the +% weighted distribution. Final classification is based on the largest +% confidence label (possibly incorporating a prior as h0(c) = +% 1/2*ln(Pc/(1-Pc)). Weights are assigned as +% w(i,j) = 1 / (1+exp(sum{t in iterations}[yij*ht(xi, j)])). + +if length(varargin) == 1 % class names supplied + gn = varargin{1}; + gid = zeros(size(labels)); + for c = 1:length(gn) + ind = find(strcmp(labels, gn{c})); + gid(ind) = c; + if ~isempty(init_weights) + disp([gn{c} ': ' num2str(sum(init_weights(ind)))]); + else + disp([gn{c} ': ' num2str(length(ind))]); + end + end + ind = find(gid==0); + gid(ind) = []; + labels(ind) = []; + features(ind, :) = []; +else + [gid, gn] = grp2idx(labels); +end + +if ~exist('stopval', 'var') || isempty(stopval) + stopval = 0; +end +if ~exist('init_weights', 'var') + init_weights = []; +end + +classifier.names = gn; + +num_classes = length(gn); +num_data = length(gid); + +if isempty(init_weights) + init_weights = ones(num_data, 1)/num_data; +else + init_weights = init_weights / sum(init_weights); +end + +% if no examples from a class are present, create one dummy example for +% that class with very small weight +for c = 1:numel(gn) + if ~any(gid==c) + disp(['warning: no examples from class ' gn(c)]) + gid(end+1) = c; + features(end+1, :) = zeros(size(features(end, 1))); + num_data = num_data + 1; + init_weights(end+1) = min(init_weights)/2; + end +end + +all_conf = zeros(num_data, num_classes); +for c = 1:num_classes + + disp(['class: ' num2str(gn{c})]); + y = (gid == c)*2-1; + cl = [-1 1]; + nc = 2; + w = zeros(num_data, 1); + cw = zeros(num_classes, 1); + for i = 1:2 + indices = find(y==cl(i)); + %count = sum(init_weights(indices)); + %w(indices) = init_weights(indices) / count / 2; + w(indices) = init_weights(indices); + + if cl(i)==1 + %classifier.h0(c) = log(count / (1-count)); + classifier.h0(c) = 0; + end + + end + + data_confidences = zeros(num_data, 1); + aveconf = []; + + for t = 1:num_iterations + % learn decision tree based on weighted distribution + dt = treefitw(features, y, w, 1/num_data/2, 'catidx', cat_features, 'method', 'classification', 'maxnodes', num_nodes*4); + [tmp, level] = min(abs(dt.ntermnodes-num_nodes)); + dt = treeprune(dt, 'level', level-1); + + % assign partition confidences + temp_c = dt.classname; + pi = (strcmp(temp_c{1},'1')) + (2*strcmp(temp_c{2},'1')); + ni = (strcmp(temp_c{1},'-1')) + (2*strcmp(temp_c{2},'-1')); + + classprob = dt.classprob; + confidences = 1/2*(log(classprob(:, pi)) - log(classprob(:, ni))); + + % assign weights + [class_indices, nodes, classes] = treeval(dt, features); + data_confidences = data_confidences + confidences(nodes); + + w = 1 ./ (1+exp(y.*data_confidences)); + w = w / sum(w); + + %disp(['c: ' num2str(mean(1 ./ (1+exp(-y.*data_confidences)))) ' e: ' num2str(mean(y.*data_confidences < 0)) ' w: ' num2str(max(w))]); + + classifier.wcs(t, c).dt = dt; + classifier.wcs(t, c).confidences = confidences; + + + aveconf(t) = mean(1 ./ (1+exp(-y.*data_confidences))); + if t>10 && (aveconf(t)-aveconf(t-10) < stopval) + disp(num2str(aveconf)) + disp(['Stopping after ' num2str(t) ' trees']) + break; + end + + end + + finalconf = 1 ./ (1+exp(-y.*data_confidences)); + finalerr = (y.*data_confidences < 0); + disp(['confidence:: mean: ' num2str(mean(finalconf)) ... + ' pos: ' num2str(mean(finalconf(y==1))) ... + ' neg: ' num2str(mean(finalconf(y~=1)))]); + disp(['training error:: mean: ' num2str(mean(finalerr)) ... + ' pos: ' num2str(mean(finalerr(y==1))) ... + ' neg: ' num2str(mean(finalerr(y~=1)))]); + all_conf(:, c) = data_confidences+classifier.h0(c); + +end + +% compute and display training error +[tmp, assigned_label] = max(all_conf, [], 2); +conf_matrix = zeros(num_classes, num_classes); +for c = 1:num_classes + indices = find(gid==c); + for c2 = 1:num_classes + conf_matrix(c, c2) = mean(assigned_label(indices)==c2); + end + disp([gn{c} ' error: ' num2str(mean(assigned_label(indices)~=c))]); +end +disp('Confusion Matrix: '); +disp(num2str(conf_matrix)); +disp(['total error: ' num2str(mean(assigned_label~=gid))]); + + + \ No newline at end of file diff --git a/boosting/train_boosted_kde_2c.m b/boosting/train_boosted_kde_2c.m new file mode 100644 index 0000000..951f87d --- /dev/null +++ b/boosting/train_boosted_kde_2c.m @@ -0,0 +1,112 @@ +function density = train_boosted_kde_2c(data, labels, ranges, num_iter) +% Try to learn ln(P(x1, x2 | +)/P(x1, x2 | -), where + indicates that a pair of points, +% x, are in the same cluster and - indicates that the pair are in different +% clusters. Use boosting to estimate the parameters of the density in a +% naive structure. +% density(num_features).{x, log_ratio} + + +ndata = numel(labels); +nfeatures = size(data, 2); + +if ~exist('ranges') || isempty(ranges) + ranges = cell(nfeatures, 1); +end + +for f = 1:numel(ranges) + if isempty(ranges{f}) + ranges{f} = 'unbounded'; + end +end + +pos_indices = find(labels==1); +neg_indices = find(labels==-1); +npos = length(pos_indices); +nneg = length(neg_indices); +%weights = 1/num_data*ones(num_data, 1); +% weights = zeros(ndata, 1); +% weights(pos_indices) = 1/2/npos; +% weights(neg_indices) = 1/2/nneg; +weights = repmat(1/ndata, [ndata 1]); +pos_data = data(pos_indices, :); +neg_data = data(neg_indices, :); + +kernel_width = zeros(nfeatures, 1); + +% get optimal kernel width and xrange +% this window parameter is recommended by Silverman(1986) for non-normal plots +for f = 1:nfeatures + + y = data(:, f); + s = std(y); + sorty = sort(y); + n = length(y); + iq = sorty(round(3/4*n))-sorty(round(n/4)); + density(f).kernelwidth = 0.9*min(s, iq/1.34)*(1/n)^(1/5); + %disp(num2str(density(f).kernelwidth)) + [tmp, density(f).x] = ksdensity(y, 'weights', weights, ... + 'width', density(f).kernelwidth, 'support', ranges{f}); + density(f).log_ratio = zeros(size(density(f).x)); +end + +total_confidences = zeros(ndata, 1); +for m = 1:num_iter + pos_weights = weights(pos_indices); + neg_weights = weights(neg_indices); + tmp_confidences = zeros(ndata, 1); + % update densities + for f = 1:nfeatures + pos_f = ksdensity(pos_data(:, f), density(f).x, 'weights', pos_weights, ... + 'width', density(f).kernelwidth, 'support', ranges{f}); + pos_f = pos_f + 1/(ndata/2); + pos_f = pos_f / sum(pos_f) * sum(pos_weights); + neg_f = ksdensity(neg_data(:, f), density(f).x, 'weights', neg_weights, ... + 'width', density(f).kernelwidth, 'support', ranges{f}); + neg_f = neg_f + 1/(ndata/2); + neg_f = neg_f / sum(neg_f) * sum(neg_weights); + + tmp_ratio = (log(pos_f)-log(neg_f)); + %density(f).log_ratio = density(f).log_ratio + tmp_ratio; + curr_ratio(f).log_ratio = tmp_ratio; + tmp_confidences = tmp_confidences + ... + test_boosted_kde_2c(tmp_ratio, density(f).x, data(:, f))'; + end + + % get alpha parameter for confidence + alpha = fminbnd(@compute_expected_confidence, 0.001, 2.0, [], labels, tmp_confidences, weights); + %disp(['alpha = ' num2str(alpha)]); + for f = 1:nfeatures + density(f).log_ratio = density(f).log_ratio + alpha*curr_ratio(f).log_ratio; + end + + weights = weights .* exp(-alpha*tmp_confidences.*labels); + sumw = sum(weights); + %disp(['sum w = ' num2str(sumw)]); + weights = weights / sumw; + + total_confidences = total_confidences + alpha*tmp_confidences; + + disp(['training error: n_err = ' num2str(mean(total_confidences(neg_indices)>=0)) ' p_err = ' ... + num2str(mean(total_confidences(pos_indices)<0))]); + if 0 + [f1, x] = ksdensity(total_confidences(pos_indices)); + f2 = ksdensity(total_confidences(neg_indices), x); + figure(1), plot(x, f1, 'b', x, f2, 'r'); + pause(0.5); + end + +end + + + + +function expected_confidence = compute_expected_confidence(alpha, labels, confidences, weights) +% used to choose alpha +% alpha is chosen so that the expected confidence: +% E(weight*exp(-alpha*label*confidence)*label*confidence) = 0 +% Note: Absolute value used so that minimization results in best confidence +% confidence. Normalization is unnecesary for finding alpha, though. +new_weights = exp(-alpha*labels.*confidences).*weights; +expected_confidence = sum(new_weights); +%expected_confidence = abs(sum(new_weights.*labels.*confidences)/sum(new_weights)); + \ No newline at end of file diff --git a/boosting/train_boosted_stubs_mc.m b/boosting/train_boosted_stubs_mc.m new file mode 100644 index 0000000..9dfba97 --- /dev/null +++ b/boosting/train_boosted_stubs_mc.m @@ -0,0 +1,154 @@ +function classifier = train_boosted_dt_mc(features, cat_features, labels, ... + num_iterations, num_nodes, stopval, init_weights, varargin) +% +%classifier = train_boosted_dt_mc(features, cat_features, labels, ... +% num_iterations, num_nodes, stopval, init_weights, varargin) +% +% Train a classifier based on boosted decision trees. Boosting done by the +% logistic regression version of Adaboost (Adaboost.L - Collins, Schapire, +% Singer 2002). At each +% iteration, a set of decision trees is created for each class, with +% confidences equal to 1/2*ln(P+/P-) for that class, according to the +% weighted distribution. Final classification is based on the largest +% confidence label (possibly incorporating a prior as h0(c) = +% 1/2*ln(Pc/(1-Pc)). Weights are assigned as +% w(i,j) = 1 / (1+exp(sum{t in iterations}[yij*ht(xi, j)])). + +if length(varargin) == 1 % class names supplied + gn = varargin{1}; + gid = zeros(size(labels)); + for c = 1:length(gn) + ind = find(strcmp(labels, gn{c})); + gid(ind) = c; + if ~isempty(init_weights) + disp([gn{c} ': ' num2str(sum(init_weights(ind)))]); + else + disp([gn{c} ': ' num2str(length(ind))]); + end + end + ind = find(gid==0); + gid(ind) = []; + labels(ind) = []; + features(ind, :) = []; +else + [gid, gn] = grp2idx(labels); +end + +if ~exist('stopval', 'var') || isempty(stopval) + stopval = 0; +end +if ~exist('init_weights', 'var') + init_weights = []; +end + +classifier.names = gn; + +num_classes = length(gn); +num_data = length(gid); + +if isempty(init_weights) + init_weights = ones(num_data, 1)/num_data; +else + init_weights = init_weights / sum(init_weights); +end + +% if no examples from a class are present, create one dummy example for +% that class with very small weight +for c = 1:numel(gn) + if ~any(gid==c) + disp(['warning: no examples from class ' gn(c)]) + gid(end+1) = c; + features(end+1, :) = zeros(size(features(end, 1))); + num_data = num_data + 1; + init_weights(end+1) = min(init_weights)/2; + end +end + +all_conf = zeros(num_data, num_classes); +for c = 1:num_classes + + disp(['class: ' num2str(gn{c})]); + y = (gid == c)*2-1; + cl = [-1 1]; + nc = 2; + w = zeros(num_data, 1); + cw = zeros(num_classes, 1); + for i = 1:2 + indices = find(y==cl(i)); + %count = sum(init_weights(indices)); + %w(indices) = init_weights(indices) / count / 2; + w(indices) = init_weights(indices); + + if cl(i)==1 + %classifier.h0(c) = log(count / (1-count)); + classifier.h0(c) = 0; + end + + end + + data_confidences = zeros(num_data, 1); + aveconf = []; + + for t = 1:num_iterations + % learn decision tree based on weighted distribution + dt = treefitw(features, y, w, 1/num_data/2, 'catidx', cat_features, 'method', 'classification', 'maxnodes', num_nodes*4); + [tmp, level] = min(abs(dt.ntermnodes-num_nodes)); + dt = treeprune(dt, 'level', level-1); + + % assign partition confidences + pi = (strcmp(dt.classname{1},'1')) + (2*strcmp(dt.classname{2},'1')); + ni = (strcmp(dt.classname{1},'-1')) + (2*strcmp(dt.classname{2},'-1')); + + classprob = dt.classprob; + confidences = 1/2*(log(classprob(:, pi)) - log(classprob(:, ni))); + + % assign weights + [class_indices, nodes, classes] = treeval(dt, features); + data_confidences = data_confidences + confidences(nodes); + + w = 1 ./ (1+exp(y.*data_confidences)); + w = w / sum(w); + + disp(['c: ' num2str(mean(1 ./ (1+exp(-y.*data_confidences)))) ' e: ' num2str(mean(y.*data_confidences < 0)) ' w: ' num2str(max(w))]); + + classifier.wcs(t, c).dt = dt; + classifier.wcs(t, c).confidences = confidences; + + + aveconf(t) = mean(1 ./ (1+exp(-y.*data_confidences))); + if t>10 && (aveconf(t)-aveconf(t-10) < stopval) + disp(num2str(aveconf)) + disp(['Stopping after ' num2str(t) ' trees']) + break; + end + + end + + finalconf = 1 ./ (1+exp(-y.*data_confidences)); + finalerr = (y.*data_confidences < 0); + disp(['confidence:: mean: ' num2str(mean(finalconf)) ... + ' pos: ' num2str(mean(finalconf(y==1))) ... + ' neg: ' num2str(mean(finalconf(y~=1)))]); + disp(['training error:: mean: ' num2str(mean(finalerr)) ... + ' pos: ' num2str(mean(finalerr(y==1))) ... + ' neg: ' num2str(mean(finalerr(y~=1)))]); + all_conf(:, c) = data_confidences+classifier.h0(c); + +end + +% compute and display training error +[tmp, assigned_label] = max(all_conf, [], 2); +conf_matrix = zeros(num_classes, num_classes); +for c = 1:num_classes + indices = find(gid==c); + for c2 = 1:num_classes + conf_matrix(c, c2) = mean(assigned_label(indices)==c2); + end + disp([gn{c} ' error: ' num2str(mean(assigned_label(indices)~=c))]); +end +disp('Confusion Matrix: '); +disp(num2str(conf_matrix)); +disp(['total error: ' num2str(mean(assigned_label~=gid))]); + + + \ No newline at end of file diff --git a/boosting/tree_getNewVersion.m b/boosting/tree_getNewVersion.m new file mode 100644 index 0000000..4b776b7 --- /dev/null +++ b/boosting/tree_getNewVersion.m @@ -0,0 +1,8 @@ +function dt = tree_getNewVersion(dt) + +cut_old = dt.cut; +dt.cut = num2cell(dt.cut); +ind = find(dt.var<0); +for k = 1:numel(ind) + dt.cut{ind(k)} = dt.catsplit(cut_old(ind(k)), :); +end diff --git a/boosting/tree_getParameters.m b/boosting/tree_getParameters.m new file mode 100644 index 0000000..b9b4ed2 --- /dev/null +++ b/boosting/tree_getParameters.m @@ -0,0 +1,26 @@ +function [var, cut, children, catsplit] = tree_getParameters(dt) + +if iscell(dt.cut) + var = dt.var; + children = dt.children; + + cut = zeros(size(dt.cut)); + catind = var<0; + ncat = sum(catind); + catsplit = cell(ncat, 2); + ncat = 0; + for k = 1:numel(cut) + if var(k)>=0 + cut(k) = dt.cut{k}; + else + ncat = ncat+1; + cut(k) = ncat; + catsplit(ncat, :) = dt.cut{k}; + end + end +else + var = dt.var; + cut = dt.cut; + children = dt.children; + catsplit = dt.catsplit; +end diff --git a/boosting/treevalc.cpp b/boosting/treevalc.cpp new file mode 100644 index 0000000..2c458d4 --- /dev/null +++ b/boosting/treevalc.cpp @@ -0,0 +1,133 @@ +#include "mex.h" +#include +#include +/** + * Return the decision tree node corresponding to the given value set + * + * var[n]: the attribute ids for node n + * cut[n]: the threshold value for node n + * left_child[n]: the node id of the left child of node n, 0 if node n is terminal + * right_child[n]: the node id of the right child of node n, 0 if node n is terminal + * ncatsplit[c]: the number of values resulting in a left branch + * catsplit[c]: the values that would result in a left branch + * attributes: the attribute (variable) values for each feature + **/ +void +treevalc(int* var, double* cut, int* left_child, int* right_child, + int* ncatsplit, double** catsplit, + double* attributes, + int* node_id) { + + int currnode = 0; + + int nextnode; + int currvar; + double currval; + int cid, v; + int numvals; + double* vals; + + /* printf("init nodes: %d %d \n", left_child[currnode], right_child[currnode]); */ + + /* until reached terminal node */ + while ((left_child[currnode] != 0) && (right_child[currnode] != 0)) { + + /*printf("currnode: %d\n", currnode);*/ + + nextnode = -1; + + currvar = abs(var[currnode])-1; + currval = attributes[currvar]; + + /* decision based on thresholded float value */ + if (var[currnode] > 0) { + + /*printf("currvar: %d\n", currvar);*/ + + /* branch left */ + if (currval < cut[currnode]) { + nextnode = left_child[currnode]; + } + /* branch right */ + else { + nextnode = right_child[currnode]; + } + } + /* decision based on discrete value */ + else { + numvals = ncatsplit[(int)cut[currnode]-1]; + vals = catsplit[(int)cut[currnode]-1]; + for (v = 0; v < numvals; v++) { + if (currval == vals[v]) { + nextnode = left_child[currnode]; + break; + } + } + if (nextnode == -1) { + nextnode = right_child[currnode]; + } + } + + currnode = nextnode-1; + /* printf("curr node: %d \n", currnode);*/ + } + + *node_id = currnode+1; + +} + +/** + * plhs = {var, cut, left_child, right_child, catsplit(cell array), attributes(numatt, numdata)} + * + */ +void mexFunction(int nlhs, mxArray *plhs[], + int nrhs, const mxArray *prhs[]) +{ + + if (nrhs != 6) { + printf("Error: wrong number of input arguments: %d.\n", nlhs); + printf("Syntax: node_ids = treevalc(var, cut, left_child, right_child, catsplit, attributes)\n"); + } + + int* var = (int*)mxGetPr(prhs[0]); + double* cut = mxGetPr(prhs[1]); + int* left_child = (int*)mxGetPr(prhs[2]); + int* right_child = (int*)mxGetPr(prhs[3]); + /* get catsplit variables */ + int nsplits = mxGetNumberOfElements(prhs[4]); + int* ncatsplit = (int*)malloc(sizeof(int) * nsplits); + double** catsplit = (double**)malloc(sizeof(double*) * nsplits); + + int n = 0; + for (n = 0; n < nsplits; n++) { + mxArray* catsplit_cell_mx = mxGetCell(prhs[4], n); + if (catsplit_cell_mx == 0) { + printf("null cell"); + } + ncatsplit[n] = mxGetNumberOfElements(catsplit_cell_mx); + catsplit[n] = (double*)mxGetPr(catsplit_cell_mx); + } + + int numatt = mxGetM(prhs[5]); + int numdata = mxGetN(prhs[5]); + + /* printf("num data = %d num att = %d\n", numdata, numatt);*/ + + double* all_attributes = mxGetPr(prhs[5]); + + plhs[0] = mxCreateDoubleMatrix(numdata, 1, mxREAL); + double* node_ids = mxGetPr(plhs[0]); + + + int tmp_id; + for (n = 0; n < numdata; n++) { + treevalc(var, cut, left_child, right_child, ncatsplit, catsplit, + &all_attributes[numatt*n], &tmp_id); + node_ids[n] = (double)(tmp_id); + /* printf("final node id: %d\n", tmp_id); */ + } + + free(catsplit); + free(ncatsplit); + +} diff --git a/boosting/treevalc.mexa64 b/boosting/treevalc.mexa64 new file mode 100644 index 0000000..8e5dd28 Binary files /dev/null and b/boosting/treevalc.mexa64 differ diff --git a/boosting/treevalc.mexw64 b/boosting/treevalc.mexw64 new file mode 100644 index 0000000..281d80f Binary files /dev/null and b/boosting/treevalc.mexw64 differ diff --git a/boosting/weightedstats/ksdensityw.asv b/boosting/weightedstats/ksdensityw.asv new file mode 100644 index 0000000..7a2918e --- /dev/null +++ b/boosting/weightedstats/ksdensityw.asv @@ -0,0 +1,173 @@ +function [f,x,u]=ksdensityw(y,w,varargin) +%KSDENSITY Compute density estimate +% [F,XI]=KSDENSITY(X) computes a probability density estimate of the sample +% in the vector X. F is the vector of density values evaluated at the +% points in XI. The estimate is based on a normal kernel function, using a +% window parameter (bandwidth) that is a function of the number of points +% in X. The density is evaluated at 100 equally-spaced points covering +% the range of the data in X. +% +% F=KSDENSITY(X,XI) specifies the vector XI of values where the density +% estimate is to be evaluated. +% +% [F,XI,U]=KSDENSITY(...) also returns the bandwidth of the kernel smoothing +% window. +% +% DWH: w are the weights for y (w = 1/n for ksdenstiy) +% +% [...]=KSDENSITY(...,'PARAM1',val1,'PARAM2',val2,...) specifies parameter +% name/value pairs to control the density estimation. Valid parameters +% are the following: +% +% Parameter Value +% 'kernel' The type of kernel smoother to use, chosen from among +% 'normal' (default), 'box', 'triangle', and +% 'epanechinikov'. +% 'npoints' The number of equally-spaced points in XI. +% 'width' The bandwidth of the kernel smoothing window. The default +% is optimal for estimating normal densities, but you +% may want to choose a smaller value to reveal features +% such as multiple modes. +% +% In place of the kernel functions listed above, you can specify another +% function by using @ (such as @normpdf) or quotes (such as 'normpdf'). +% The function must take a single argument that is an array of distances +% between data values and places where the density is evaluated, and +% return an array of the same size containing corresponding values of +% the kernel function. +% +% Example: +% x = [randn(30,1); 5+randn(30,1)]; +% [f,xi] = ksdensity(x); +% plot(xi,f); +% This example generates a mixture of two normal distributions, and +% plots the estimated density. +% +% See also HIST, @. + +% Reference: +% A.W. Bowman and A. Azzalini (1997), "Applied Smoothing +% Techniques for Data Analysis," Oxford University Press. + +% Copyright 1993-2002 The MathWorks, Inc. +% $Revision: 1.7 $ $Date: 2002/03/21 20:36:29 $ + +% Get y vector and its dimensions +if (prod(size(y)) > length(y)), error('X must be a vector'); end +y = y(:); +y(isnan(y)) = []; +n = length(y); +ymin = min(y); +ymax = max(y); + +% Maybe x was specified, or maybe not +if ~isempty(varargin) + if ~ischar(varargin{1}) + x = varargin{1}; + varargin(1) = []; + end +end + +% Process additional name/value pair arguments +okargs = {'width' 'npoints' 'kernel'}; +defaults = {[] [] 'normal'}; +[emsg,u,m,kernel] = statgetargs(okargs, defaults, varargin{:}); +error(emsg); + +% Default window parameter is optimal for normal distribution +if (isempty(u)), + med = median(y); + sig = median(abs(y-med)) / 0.6745; + if sig<=0, sig = ymax-ymin; end + if sig>0 + u = sig * (4/(3*n))^(1/5); + else + u = 1; + end +end + +% Check other arguments or get defaults. +if ~exist('x','var') + if isempty(m), m=100; end + x = linspace(ymin-2*u, ymax+2*u, m); +elseif (prod(size(x)) > length(x)) + error('XI must be a vector'); +end +xsize = size(x); +x = x(:); +m = length(x); + +okkernels = {'normal' 'epanechinikov' 'box' 'triangle'}; +if isempty(kernel) + kernel = okkernels{1}; +elseif ~(isa(kernel,'function_handle') | isa(kernel,'inline')) + if ~ischar(kernel) + error('Smoothing kernel must be a function.'); + end + knum = strmatch(lower(kernel), okkernels); + if (length(knum) == 1) + kernel = okkernels{knum}; + end +end + +blocksize = 1e6; +if n*m<=blocksize + % Compute kernel density estimate in one operation + z = (repmat(x',n,1)-repmat(y,1,m))/u; + w2 = repmat(w, 1, m); + f = sum(feval(kernel, z).*w2,1); +else + % For large vectors, process blocks of elements as a group + M = max(1,ceil(blocksize/n)); + mrem = rem(m,M); + if mrem==0, mrem = min(M,m); end + x = x'; + + f = zeros(1,m); + ii = 1:mrem; + z = (repmat(x(ii),n,1)-repmat(y,1,mrem))/u; + w2 = repmat(w, 1, mrem); + f(ii) = sum(feval(kernel, z).*w2,1); + z = zeros(n,M); + for j=mrem+1:M:m + ii = j:j+M-1; + z(:) = (repmat(x(ii),n,1)-repmat(y,1,M))/u; + w2(:) = repmat(w, 1, M); + f(ii) = sum(feval(kernel, z).*w2,1); + end +end + +f = reshape(f./(n*u), xsize)*length(w); + +% ----------------------------- +% The following are functions that define smoothing kernels k(z). +% Each function takes a single input Z and returns the value of +% the smoothing kernel. These sample kernels are designed to +% produce outputs that are somewhat comparable (differences due +% to shape rather than scale), so they are all probability +% density functions with unit variance. +% +% The density estimate has the form +% f(x;k,h) = mean over i=1:n of k((x-y(i))/h) / h + +function f = normal(z) +%NORMAL Normal density kernel. +%f = normpdf(z); +f = exp(-0.5 * z .^2) ./ sqrt(2*pi); + +function f = epanechinikov(z) +%EPANECHINIKOV Epanechinikov's asymptotically optimal kernel. +a = sqrt(5); +z = max(-a, min(z,a)); +f = .75 * (1 - .2*z.^2) / a; + +function f = box(z) +%BOX Box-shaped kernel +a = sqrt(3); +f = (abs(z)<=a) ./ (2 * a); + +function f = triangle(z) +%TRIANGLE Triangular kernel. +a = sqrt(6); +z = abs(z); +f = (z<=a) .* (1 - z/a) / a; \ No newline at end of file diff --git a/boosting/weightedstats/ksdensityw.m b/boosting/weightedstats/ksdensityw.m new file mode 100644 index 0000000..1914e9d --- /dev/null +++ b/boosting/weightedstats/ksdensityw.m @@ -0,0 +1,174 @@ +function [f,x,u]=ksdensityw(y,w,varargin) +%KSDENSITY Compute density estimate +% [F,XI]=KSDENSITY(X) computes a probability density estimate of the sample +% in the vector X. F is the vector of density values evaluated at the +% points in XI. The estimate is based on a normal kernel function, using a +% window parameter (bandwidth) that is a function of the number of points +% in X. The density is evaluated at 100 equally-spaced points covering +% the range of the data in X. +% +% F=KSDENSITY(X,XI) specifies the vector XI of values where the density +% estimate is to be evaluated. +% +% [F,XI,U]=KSDENSITY(...) also returns the bandwidth of the kernel smoothing +% window. +% +% DWH: w are the weights for y (w = 1/n for ksdenstiy) +% +% [...]=KSDENSITY(...,'PARAM1',val1,'PARAM2',val2,...) specifies parameter +% name/value pairs to control the density estimation. Valid parameters +% are the following: +% +% Parameter Value +% 'kernel' The type of kernel smoother to use, chosen from among +% 'normal' (default), 'box', 'triangle', and +% 'epanechinikov'. +% 'npoints' The number of equally-spaced points in XI. +% 'width' The bandwidth of the kernel smoothing window. The default +% is optimal for estimating normal densities, but you +% may want to choose a smaller value to reveal features +% such as multiple modes. +% +% In place of the kernel functions listed above, you can specify another +% function by using @ (such as @normpdf) or quotes (such as 'normpdf'). +% The function must take a single argument that is an array of distances +% between data values and places where the density is evaluated, and +% return an array of the same size containing corresponding values of +% the kernel function. +% +% Example: +% x = [randn(30,1); 5+randn(30,1)]; +% [f,xi] = ksdensity(x); +% plot(xi,f); +% This example generates a mixture of two normal distributions, and +% plots the estimated density. +% +% See also HIST, @. + +% Reference: +% A.W. Bowman and A. Azzalini (1997), "Applied Smoothing +% Techniques for Data Analysis," Oxford University Press. + +% Copyright 1993-2002 The MathWorks, Inc. +% $Revision: 1.7 $ $Date: 2002/03/21 20:36:29 $ + +% Get y vector and its dimensions +if (prod(size(y)) > length(y)), error('X must be a vector'); end +y = y(:); +y(isnan(y)) = []; +n = length(y); +ymin = min(y); +ymax = max(y); + +% Maybe x was specified, or maybe not +if ~isempty(varargin) + if ~ischar(varargin{1}) + x = varargin{1}; + varargin(1) = []; + end +end + +% Process additional name/value pair arguments +okargs = {'width' 'npoints' 'kernel'}; +defaults = {[] [] 'normal'}; +[emsg,u,m,kernel] = statgetargs(okargs, defaults, varargin{:}); +error(emsg); + +% Default window parameter is optimal for normal distribution +if (isempty(u)), + med = median(y); + sig = median(abs(y-med)) / 0.6745; + if sig<=0, sig = ymax-ymin; end + if sig>0 + u = sig * (4/(3*n))^(1/5); + else + u = 1; + end +end + +% Check other arguments or get defaults. +if ~exist('x','var') + if isempty(m), m=100; end + x = linspace(ymin-2*u, ymax+2*u, m); +elseif (prod(size(x)) > length(x)) + error('XI must be a vector'); +end +xsize = size(x); +x = x(:); +m = length(x); + +okkernels = {'normal' 'epanechinikov' 'box' 'triangle'}; +if isempty(kernel) + kernel = okkernels{1}; +elseif ~(isa(kernel,'function_handle') | isa(kernel,'inline')) + if ~ischar(kernel) + error('Smoothing kernel must be a function.'); + end + knum = strmatch(lower(kernel), okkernels); + if (length(knum) == 1) + kernel = okkernels{knum}; + end +end + +blocksize = 1e6; +if n*m<=blocksize + % Compute kernel density estimate in one operation + z = (repmat(x',n,1)-repmat(y,1,m))/u; + w2 = repmat(w, 1, m); + f = sum(feval(kernel, z).*w2,1); +else + % For large vectors, process blocks of elements as a group + M = max(1,ceil(blocksize/n)); + mrem = rem(m,M); + if mrem==0, mrem = min(M,m); end + x = x'; + + f = zeros(1,m); + ii = 1:mrem; + z = (repmat(x(ii),n,1)-repmat(y,1,mrem))/u; + w2 = repmat(w, 1, mrem); + f(ii) = sum(feval(kernel, z).*w2,1); + z = zeros(n,M); + w2 = zeros(n,M); + for j=mrem+1:M:m + ii = j:j+M-1; + z(:) = (repmat(x(ii),n,1)-repmat(y,1,M))/u; + w2(:) = repmat(w, 1, M); + f(ii) = sum(feval(kernel, z).*w2,1); + end +end + +f = reshape(f./u, xsize); + +% ----------------------------- +% The following are functions that define smoothing kernels k(z). +% Each function takes a single input Z and returns the value of +% the smoothing kernel. These sample kernels are designed to +% produce outputs that are somewhat comparable (differences due +% to shape rather than scale), so they are all probability +% density functions with unit variance. +% +% The density estimate has the form +% f(x;k,h) = mean over i=1:n of k((x-y(i))/h) / h + +function f = normal(z) +%NORMAL Normal density kernel. +%f = normpdf(z); +f = exp(-0.5 * z .^2) ./ sqrt(2*pi); + +function f = epanechinikov(z) +%EPANECHINIKOV Epanechinikov's asymptotically optimal kernel. +a = sqrt(5); +z = max(-a, min(z,a)); +f = .75 * (1 - .2*z.^2) / a; + +function f = box(z) +%BOX Box-shaped kernel +a = sqrt(3); +f = (abs(z)<=a) ./ (2 * a); + +function f = triangle(z) +%TRIANGLE Triangular kernel. +a = sqrt(6); +z = abs(z); +f = (z<=a) .* (1 - z/a) / a; \ No newline at end of file diff --git a/boosting/weightedstats/private/addbisa.m b/boosting/weightedstats/private/addbisa.m new file mode 100644 index 0000000..7be98c4 --- /dev/null +++ b/boosting/weightedstats/private/addbisa.m @@ -0,0 +1,198 @@ +function s = addbisa(s) +%ADDBISA Add the Birnbaum-Saunders distribution. + +% Copyright 1993-2004 The MathWorks, Inc. +% $Revision: 1.1.6.8 $ $Date: 2004/01/24 09:35:04 $ + +j = length(s) + 1; +s(j).name = 'Birnbaum-Saunders'; +s(j).code = 'birnbaumsaunders'; +s(j).pnames = {'beta' 'gamma'}; +s(j).pdescription = {'scale' 'shape'}; +s(j).prequired = [false false]; +s(j).fitfunc = @bisafit; +s(j).likefunc = @bisalike; +s(j).cdffunc = @bisacdf; +s(j).pdffunc = @bisapdf; +s(j).invfunc = @bisainv; +s(j).statfunc = @bisastat; +s(j).loginvfunc = []; +s(j).logcdffunc = []; +s(j).hasconfbounds = false; +s(j).censoring = true; +s(j).paramvec = true; +s(j).support = [0 Inf]; +s(j).closedbound = [false false]; +s(j).iscontinuous = true; +s(j).islocscale = false; +s(j).uselogpp = false; + + +% ==== Birnbaum-Saunders distribution functions ==== + +% these distribution functions do not yet handle arrays of parameters + +function y = bisapdf(x, beta, gamma) +%BISAPDF Birnbaum-Saunders probability density function (pdf). +beta(beta <= 0) = NaN; +gamma(gamma <= 0) = NaN; + +nonpos = (x <= 0); +x(nonpos) = realmin; +z = (sqrt(x./beta) - sqrt(beta./x)) ./ gamma; +w = (sqrt(x./beta) + sqrt(beta./x)) ./ gamma; +ynorm = exp(-0.5 .* z.^2) ./ sqrt(2.*pi); +y = ynorm .* w ./ (2.*x); +% this would happen automatically for x==0, but generates DivideByZero warnings +y(nonpos) = 0; + + +function p = bisacdf(x, beta, gamma) +%BISACDF Birnbaum-Saunders cumulative distribution function (cdf). +beta(beta <= 0) = NaN; +gamma(gamma <= 0) = NaN; + +nonpos = (x <= 0); +x(nonpos) = realmin; +z = (sqrt(x./beta) - sqrt(beta./x)) ./ gamma; +p = 0.5 * erfc(-z ./ sqrt(2)); +% this would happen automatically for x==0, but generates DivideByZero warnings +p(nonpos) = 0; + + +function x = bisainv(p, beta, gamma) +%BISAINV Inverse of the Birnbaum-Saunders cumulative distribution function (cdf). +beta(beta <= 0) = NaN; +gamma(gamma <= 0) = NaN; + +p(p < 0 | 1 < p) = NaN; +gamz = -sqrt(2).*erfcinv(2*p) .* gamma; +x = 0.25 .* beta .* (gamz + sqrt(4+gamz.^2)).^2; +x(p == 0 & ~isnan(beta) & ~isnan(gamma)) = 0; +% x(p == 1 & ~isnan(beta) & ~isnan(gamma)) = Inf; % get this automatically + +function r = bisarnd(beta, gamma, varargin) +%BISARND Random arrays from the Birnbaum-Saunders distribution. +beta(beta <= 0) = NaN; +gamma(gamma <= 0) = NaN; + +[err, sizeOut] = statsizechk(2,beta,gamma,varargin{:}); +if err > 0 + error('stats:bisarnd:InconsistentSizes','Size information is inconsistent.'); +end + +plusminus = 2.*(rand(sizeOut)>.5) - 1; % plus or minus one, w.p. 1/2 +gamz = gamma.*randn(sizeOut); +r = 0.5.*beta .* (2 + gamz.^2 + plusminus.*gamz.*sqrt(4 + gamz.^2)); +% gamz = randn(sizeOut) .* gamma; +% r = 0.25 .* beta .* (gamz + sqrt(4+gamz.^2)).^2; + + +function [m,v] = bisastat(beta, gamma) +%BISASTAT Mean and variance for the Birnbaum-Saunders distribution. +beta(beta <= 0) = NaN; +gamma(gamma <= 0) = NaN; + +m = beta .* (0.5 .* gamma.^2 + 1); +v = (beta.*gamma).^2 .* (1.25 .* gamma.^2 + 1); + + +function [nlogL,acov] = bisalike(params,data,cens,freq) +%BISALIKE Negative log-likelihood for the Birnbaum-Saunders distribution. +if nargin < 4 || isempty(freq), freq = ones(size(data)); end +if nargin < 3 || isempty(cens), cens = zeros(size(data)); end + +nlogL = bisa_nloglf(params, data, cens, freq); +if nargout > 1 + acov = mlecov(params, data, 'nloglf',@bisa_nloglf, 'cens',cens, 'freq',freq); +end + + +% ==== Birnbaum-Saunders fitting functions ==== + +function [phat,pci] = bisafit(x,alpha,cens,freq,opts) +%BISAFIT Parameter estimates and confidence intervals for Birnbaum-Saunders data. + +if nargin < 2 || isempty(alpha), alpha = .05; end +if nargin < 3 || isempty(cens), cens = zeros(size(x)); end +if nargin < 4 || isempty(freq), freq = ones(size(x)); end +if nargin < 5, opts = []; end + +if any(x <= 0) + error('stats:bisafit:BadData','The data in X must be positive'); +end + +% Starting points as suggested by Birnbaum and Saunders +xunc = x(cens==0); xbarunc = mean(xunc); xinvbarunc = mean(1./xunc); +start = [sqrt(xbarunc./xinvbarunc) 2.*sqrt(sqrt(xbarunc.*xinvbarunc) - 1)]; + +% The default options include turning statsfminbx's display off. This +% function gives its own warning/error messages, and the caller can turn +% display on to get the text output from statsfminbx if desired. +options = statset(statset('bisafit'), opts); +tolBnd = options.TolBnd; +options = optimset(options); +dfltOptions = struct('DerivativeCheck','off', 'HessMult',[], ... + 'HessPattern',ones(2,2), 'PrecondBandWidth',Inf, ... + 'TypicalX',ones(2,1), 'MaxPCGIter',1, 'TolPCG',0.1); + +% Maximize the log-likelihood with respect to mu and sigma. +funfcn = {'fungrad' 'bisafit' @bisa_nloglf [] []}; +[phat, nll, lagrange, err, output] = ... + statsfminbx(funfcn, start, [tolBnd; tolBnd], [Inf; Inf], ... + options, dfltOptions, 1, x, cens, freq); +if (err == 0) + % statsfminbx may print its own output text; in any case give something + % more statistical here, controllable via warning IDs. + if output.funcCount >= options.MaxFunEvals + wmsg = 'Maximum likelihood estimation did not converge. Function evaluation limit exceeded.'; + else + wmsg = 'Maximum likelihood estimation did not converge. Iteration limit exceeded.'; + end + warning('stats:bisafit:IterOrEvalLimit',wmsg); +elseif (err < 0) + error('stats:bisafit:NoSolution',... + 'Unable to reach a maximum likelihood solution.'); +end + +% Compute CIs using a normal approximation for phat. +if nargout > 1 + acov = mlecov(phat, x, 'nloglf',@bisa_nloglf, 'cens',cens, 'freq',freq); + probs = [alpha/2; 1-alpha/2]; + se = sqrt(diag(acov))'; + pci = norminv([probs probs], [phat; phat], [se; se]); +end + + +function [nll,ngrad] = bisa_nloglf(params, data, cens, freq) +%BISA_NLOGLF Objective function for Birnbaum-Saunders maximum likelihood. + +beta = params(1); +gamma = params(2); +z = (sqrt(data./beta) - sqrt(beta./data)) ./ gamma; +w = (sqrt(data./beta) + sqrt(beta./data)) ./ gamma; + +logphi = -0.5 .* (z.^2 + log(2.*pi)); +L = logphi + log(w) - log(2.*data); +ncen = sum(freq.*cens); +if ncen > 0 + cen = (cens == 1); + zcen = z(cen); + Scen = 0.5 * erfc(zcen ./ sqrt(2)); + L(cen) = log(Scen); +end +nll = -sum(freq .* L); + +if nargout > 1 + dL1 = (w.^2 - 1) .* 0.5.*z./(w.*beta); + dL2 = (z.^2 - 1) ./ gamma; + if ncen > 0 + phicen = exp(logphi(cen)); + wcen = w(cen); + d1Scen = phicen .* 0.5.*wcen./beta; + d2Scen = phicen .* zcen./gamma; + dL1(cen) = d1Scen ./ Scen; + dL2(cen) = d2Scen ./ Scen; + end + ngrad = -[sum(freq .* dL1) sum(freq .* dL2)]; +end diff --git a/boosting/weightedstats/private/addinvg.m b/boosting/weightedstats/private/addinvg.m new file mode 100644 index 0000000..a145c71 --- /dev/null +++ b/boosting/weightedstats/private/addinvg.m @@ -0,0 +1,281 @@ +function s = addinvg(s) +%ADDINVG Add the inverse Gaussian distribution. + +% Copyright 1993-2004 The MathWorks, Inc. +% $Revision: 1.1.6.10 $ $Date: 2004/04/15 01:01:53 $ + +j = length(s) + 1; +s(j).name = 'Inverse Gaussian'; +s(j).code = 'inversegaussian'; +s(j).pnames = {'mu' 'lambda'}; +s(j).pdescription = {'scale' 'shape'}; +s(j).prequired = [false false]; +s(j).fitfunc = @invgfit; +s(j).likefunc = @invglike; +s(j).cdffunc = @invgcdf; +s(j).pdffunc = @invgpdf; +s(j).invfunc = @invginv; +s(j).statfunc = @invgstat; +s(j).loginvfunc = []; +s(j).logcdffunc = []; +s(j).hasconfbounds = false; +s(j).censoring = true; +s(j).paramvec = true; +s(j).support = [0 Inf]; +s(j).closedbound = [false false]; +s(j).iscontinuous = true; +s(j).islocscale = false; +s(j).uselogpp = false; + + +% ==== inverse Gaussian distribution functions ==== + +% these distribution functions do not yet handle arrays of parameters + +function y = invgpdf(x, mu, lambda) +%INVGPDF inverse Gaussian probability density function (pdf). +mu(mu <= 0) = NaN; +lambda(lambda <= 0) = NaN; + +nonpos = (x <= 0); +x(nonpos)= realmin; +y = sqrt(lambda./(2.*pi.*x.^3)) .* exp(-0.5.*lambda.*(x./mu - 2 + mu./x)./mu); +% this would happen automatically for x==0, but generates DivideByZero warnings +y(nonpos) = 0; + + +function p = invgcdf(x, mu, lambda) +%INVGCDF inverse Gaussian cumulative distribution function (cdf). +mu(mu <= 0) = NaN; +lambda(lambda <= 0) = NaN; + +nonpos = (x <= 0); +x(nonpos)= realmin; +z1 = (x./mu - 1).*sqrt(lambda./x); +z2 = -(x./mu + 1).*sqrt(lambda./x); +p = 0.5.*erfc(-z1./sqrt(2)) + exp(2.*lambda./mu) .* 0.5.*erfc(-z2./sqrt(2)); +% this would happen automatically for x==0, but generates DivideByZero warnings +p(nonpos) = 0; + + +function x = invginv(p, mu, lambda) +%INVGINV Inverse of the inverse Gaussian cumulative distribution function (cdf). +mu(mu <= 0) = NaN; +lambda(lambda <= 0) = NaN; + +k = (0 < p & p < 1); +allOK = all(k(:)); +if isa(p,'single') + h = ones(size(p),'single'); + reltol = eps('single').^(3/4); + mynan = single(NaN); +else + h = ones(size(p)); + reltol = eps.^(3/4); + mynan = NaN; +end + +% Fill in NaNs for out of range cases, fill in edges cases when P is 0 or 1. +if ~allOK + x = repmat(mynan, size(k)); + x(p == 0 & ~isnan(mu) & ~isnan(lambda)) = 0; + x(p == 1 & ~isnan(mu) & ~isnan(lambda)) = Inf; + + % Remove the bad/edge cases, leaving the easy cases. If there's + % nothing remaining, return. + if any(k(:)) + if numel(p) > 1 + p = p(k); + h = h(k); + end + else + return; + end +end + +% Newton's Method to find a root of invgcdf(x,mu,lambda) = p +% +% Choose a starting guess for q. Use quantiles from a lognormal +% distribution with the same mean (==1) and variance (==lambda0) as +% IG(1,lambda0). +lambda0 = lambda/mu; +sigsqLN = log(1./lambda0 + 1); +muLN = -0.5 .* sigsqLN; +q = exp(muLN - sqrt(2.*sigsqLN).*erfcinv(2*p)); + +% Break out of the iteration loop when the relative size of the last step +% is small for all elements of q. +maxiter = 500; +iter = 0; +while any(abs(h(:)) > reltol*q(:)) + iter = iter + 1; + if iter > maxiter + % Too many iterations. This should not happen. + didnt = find(abs(h) > reltol*q); didnt = didnt(1); + if numel(mu) == 1, mubad = mu; else mubad = mu(didnt); end + if numel(lambda) == 1, lambdabad = b; else lambdabad = lambda(didnt); end + if numel(p) == 1, pbad = p; else pbad = p(didnt); end + warning('stats:addinvg:NoConvergence',... + 'INVGINV did not converge for mu = %g, lambda = %g, p = %g.',... + mubad,lambdabad,pbad); + break + end + + h = (invgcdf(q,1,lambda0) - p) ./ max(invgpdf(q,1,lambda0),realmin); + qnew = q - h; + % Make sure that the current iterates stay positive. When Newton's + % Method suggests steps that lead to negative values, take a step + % 9/10ths of the way to zero instead. + ksmall = find(qnew <= 0); + if ~isempty(ksmall) + qnew(ksmall) = q(ksmall) / 10; + h = q - qnew; + end + q = qnew; +end + +% Add in the scale factor, and broadcast the values to the correct place if +% need be. +if allOK + x = q .* mu; +else + x(k) = q .* mu; +end + + +function r = invgrnd(mu, lambda, varargin) +%INVGRND Random arrays from the inverse Gaussian distribution. +mu(mu <= 0) = NaN; +lambda(lambda <= 0) = NaN; + +[err, sizeOut] = statsizechk(2,mu,lambda,varargin{:}); +if err > 0 + error('stats:invgrnd:InconsistentSizes','Size information is inconsistent.'); +end + +c = mu.*chi2rnd(1,sizeOut); +r = (mu./(2.*lambda)) .* (2.*lambda + c - sqrt(4.*lambda.*c + c.^2)); +invert = (rand(sizeOut).*(mu+r) > mu); +r(invert) = mu.^2 ./ r(invert); + + +function [m,v] = invgstat(mu, lambda) +%INVGSTAT Mean and variance for the inverse Gaussian distribution. +mu(mu <= 0) = NaN; +lambda(lambda <= 0) = NaN; + +m = mu; +v = mu.^3 ./ lambda; + + +function [nlogL,acov] = invglike(params,data,cens,freq) +%INVGLIKE Negative log-likelihood for the inverse Gaussian distribution. +if nargin < 4 || isempty(freq), freq = ones(size(data)); end +if nargin < 3 || isempty(cens), cens = zeros(size(data)); end + +nlogL = invg_nloglf(params, data, cens, freq); +if nargout > 1 + acov = mlecov(params, data, 'nloglf',@invg_nloglf, 'cens',cens, 'freq',freq); +end + + +% ==== inverse Gaussian fitting functions ==== + +function [phat,pci] = invgfit(x,alpha,cens,freq,opts) +%INVGFIT Parameter estimates and confidence intervals for inverse Gaussian data. + +if nargin < 2 || isempty(alpha), alpha = .05; end +if nargin < 3 || isempty(cens), cens = zeros(size(x)); end +if nargin < 4 || isempty(freq), freq = ones(size(x)); end +if nargin < 5, opts = []; end + +if any(x <= 0) + error('stats:invgfit:BadData','The data in X must be positive'); +end + +ncen = sum(freq.*cens); +if ncen == 0 + xbar = mean(x); + phat = [xbar 1./mean(1./x - 1./xbar)]; + +else + % MLEs of the uncensored data as starting point + xunc = x(cens == 0); xbarunc = mean(xunc); + start = [xbarunc 1./mean(1./xunc - 1./xbarunc)]; + + % The default options include turning statsfminbx's display off. This + % function gives its own warning/error messages, and the caller can + % turn display on to get the text output from statsfminbx if desired. + options = statset(statset('invgfit'), opts); + tolBnd = options.TolBnd; + options = optimset(options); + dfltOptions = struct('DerivativeCheck','off', 'HessMult',[], ... + 'HessPattern',ones(2,2), 'PrecondBandWidth',Inf, ... + 'TypicalX',ones(2,1), 'MaxPCGIter',1, 'TolPCG',0.1); + + % Maximize the log-likelihood with respect to mu and lambda. + funfcn = {'fungrad' 'invgfit' @invg_nloglf [] []}; + [phat, nll, lagrange, err, output] = ... + statsfminbx(funfcn, start, [tolBnd; tolBnd], [Inf; Inf], ... + options, dfltOptions, 1, x, cens, freq); + if (err == 0) + % statsfminbx may print its own output text; in any case give something + % more statistical here, controllable via warning IDs. + if output.funcCount >= options.MaxFunEvals + wmsg = 'Maximum likelihood estimation did not converge. Function evaluation limit exceeded.'; + else + wmsg = 'Maximum likelihood estimation did not converge. Iteration limit exceeded.'; + end + warning('stats:invgfit:IterOrEvalLimit',wmsg); + elseif (err < 0) + error('stats:invgfit:NoSolution',... + 'Unable to reach a maximum likelihood solution.'); + end +end + +% Compute CIs using a normal approximation for phat. +if nargout > 1 + acov = mlecov(phat, x, 'nloglf',@invg_nloglf, 'cens',cens, 'freq',freq); + probs = [alpha/2; 1-alpha/2]; + se = sqrt(diag(acov))'; + pci = norminv([probs probs], [phat; phat], [se; se]); +end + + +function [nll,ngrad] = invg_nloglf(params, x, cens, freq) +%INVG_NLOGLF Objective function for inverse Gaussian maximum likelihood. + +mu = params(1); +lambda = params(2); + +L = .5.*log(lambda) - 1.5.*log(x) - lambda.*(x./mu-1).^2 ./ (2.*x); +ncen = sum(freq.*cens); +if ncen > 0 + cen = (cens == 1); + xcen = x(cen); + tmpsqrt = sqrt(lambda./xcen); + tmpexp = exp(2.*lambda./mu); + zcen = -(xcen./mu-1) .* tmpsqrt; + wcen = -(xcen./mu+1) .* tmpsqrt; + Phizcen = 0.5.*erfc(-zcen./sqrt(2)); + Phiwcen = 0.5.*erfc(-wcen./sqrt(2)); + Scen = Phizcen - tmpexp .* Phiwcen; + L(cen) = log(Scen); +end +nll = -sum(freq .* L); + +if nargout > 1 + dL1 = lambda.*(x-mu)./mu.^3; + dL2 = 1./(2.*lambda) - (x./mu-1).^2 ./ (2.*x); + if ncen > 0 + phizcen = exp(-0.5.*zcen.^2)./sqrt(2.*pi); + phiwcen = exp(-0.5.*wcen.^2)./sqrt(2.*pi); + dS1cen = (phizcen - tmpexp.*phiwcen).*(xcen./mu.^2).*tmpsqrt ... + + 2.*Phiwcen.*tmpexp.*lambda./mu.^2; + dS2cen = 0.5.*(phizcen.*zcen - tmpexp.*phiwcen.*wcen)./lambda ... + - 2.*Phiwcen.*tmpexp./mu; + dL1(cen) = dS1cen ./ Scen; + dL2(cen) = dS2cen ./ Scen; + end + ngrad = -[sum(freq .* dL1) sum(freq .* dL2)]; +end diff --git a/boosting/weightedstats/private/addlogi.m b/boosting/weightedstats/private/addlogi.m new file mode 100644 index 0000000..a11b769 --- /dev/null +++ b/boosting/weightedstats/private/addlogi.m @@ -0,0 +1,411 @@ +function s = addlogi(s) +%ADDLOGI Add the logistic adistributions. + +% Copyright 1993-2004 The MathWorks, Inc. +% $Revision: 1.1.6.9 $ $Date: 2004/01/24 09:35:06 $ + +j = length(s) + 1; +s(j).name = 'Logistic'; +s(j).code = 'logistic'; +s(j).pnames = {'mu' 'sigma'}; +s(j).pdescription = {'location' 'scale'}; +s(j).prequired = [false false]; +s(j).fitfunc = @logifit; +s(j).likefunc = @logilike; +s(j).cdffunc = @logicdf; +s(j).pdffunc = @logipdf; +s(j).invfunc = @logiinv; +s(j).statfunc = @logistat; +s(j).loginvfunc = []; +s(j).logcdffunc = []; +s(j).hasconfbounds = false; +s(j).censoring = true; +s(j).paramvec = true; +s(j).support = [-Inf Inf]; +s(j).closedbound = [false false]; +s(j).iscontinuous = true; +s(j).islocscale = true; +s(j).uselogpp = false; + +j = j + 1; +s(j).name = 'Log-Logistic'; +s(j).code = 'loglogistic'; +s(j).pnames = {'mu' 'sigma'}; +s(j).pdescription = {'log location' 'log scale'}; +s(j).prequired = [false false]; +s(j).fitfunc = @loglfit; +s(j).likefunc = @logllike; +s(j).cdffunc = @loglcdf; +s(j).pdffunc = @loglpdf; +s(j).invfunc = @loglinv; +s(j).statfunc = @loglstat; +s(j).loginvfunc = @logiinv; +s(j).logcdffunc = @logicdf; +s(j).hasconfbounds = false; +s(j).censoring = true; +s(j).paramvec = true; +s(j).support = [0 Inf]; +s(j).closedbound = [false false]; +s(j).iscontinuous = true; +s(j).islocscale = true; +s(j).uselogpp = true; + + +% ==== Logistic distribution functions ==== + +% these distribution functions do not yet handle arrays of parameters + +function y = logipdf(x, mu, sigma) +%LOGIPDF Logistic probability density function (pdf). +if (nargin<2), mu=0; end +if (nargin<3), sigma=1; end +sigma(sigma <= 0) = NaN; + +z = (x - mu) ./ sigma; +k = (z>350); if any(k), z(k) = -z(k); end % prevent Inf/Inf +y = exp(z) ./ ((1 + exp(z)).^2 .* sigma); + + +function p = logicdf(x, mu, sigma) +%LOGICDF Logistic cumulative distribution function (cdf). +if (nargin<2), mu=0; end +if (nargin<3), sigma=1; end +sigma(sigma <= 0) = NaN; + +p = 1 ./ (1 + exp(-(x - mu) ./ sigma)); + + +function x = logiinv(p, mu, sigma) +%LOGIINV Inverse of the logistic cumulative distribution function (cdf). +if (nargin<2), mu=0; end +if (nargin<3), sigma=1; end +sigma(sigma <= 0) = NaN; + +x = logit(p).*sigma + mu; + + +function r = logirnd(mu, sigma, varargin) +%LOGIRND Random arrays from the logistic distribution. +if (nargin<1), mu=0; end +if (nargin<2), sigma=1; end +sigma(sigma <= 0) = NaN; + +[err, sizeOut] = statsizechk(2,mu,sigma,varargin{:}); +if err > 0 + error('stats:logirnd:InconsistentSizes','Size information is inconsistent.'); +end + +p = rand(sizeOut); +r = log(p./(1-p)).*sigma + mu; + + +function [m,v] = logistat(mu, sigma) +%LOGISTAT Mean and variance for the logistic distribution. +if (nargin<1), mu=0; end +if (nargin<2), sigma=1; end +sigma(sigma <= 0) = NaN; + +m = mu; +v = sigma.^2 .* pi.^2 ./ 3; + + +function [nlogL,acov] = logilike(params,data,cens,freq) +%LOGILIKE Negative log-likelihood for the logistic distribution. +if nargin < 4 || isempty(freq), freq = ones(size(data)); end +if nargin < 3 || isempty(cens), cens = zeros(size(data)); end + +nlogL = logi_nloglf(params, data, cens, freq); +if nargout > 1 + acov = mlecov(params, data, 'nloglf',@logi_nloglf, 'cens',cens, 'freq',freq); +end + + +% ==== Logistic fitting functions ==== + +function [phat,pci] = logifit(x,alpha,cens,freq,opts) +%LOGIFIT Parameter estimates and confidence intervals for logistic data. + +if nargin < 2 || isempty(alpha), alpha = .05; end +if nargin < 3 || isempty(cens), cens = zeros(size(x)); end +if nargin < 4 || isempty(freq), freq = ones(size(x)); end +if nargin < 5, opts = []; end + +% Moment estimators as starting point +xunc = x(cens == 0); +start = [mean(xunc) std(xunc).*sqrt(3)./pi]; + +% The default options include turning statsfminbx's display off. This +% function gives its own warning/error messages, and the caller can turn +% display on to get the text output from statsfminbx if desired. +options = statset(statset('logifit'), opts); +tolBnd = options.TolBnd; +options = optimset(options); +dfltOptions = struct('DerivativeCheck','off', 'HessMult',[], ... + 'HessPattern',ones(2,2), 'PrecondBandWidth',Inf, ... + 'TypicalX',ones(2,1), 'MaxPCGIter',1, 'TolPCG',0.1); + +% Maximize the log-likelihood with respect to mu and sigma. +funfcn = {'fungrad' 'logifit' @logi_nloglf [] []}; +[phat, nll, lagrange, err, output] = ... + statsfminbx(funfcn, start, [-Inf; tolBnd], [Inf; Inf], ... + options, dfltOptions, 1, x, cens, freq); +if (err == 0) + % statsfminbx may print its own output text; in any case give something + % more statistical here, controllable via warning IDs. + if output.funcCount >= options.MaxFunEvals + wmsg = 'Maximum likelihood estimation did not converge. Function evaluation limit exceeded.'; + else + wmsg = 'Maximum likelihood estimation did not converge. Iteration limit exceeded.'; + end + warning('stats:logifit:IterOrEvalLimit',wmsg); +elseif (err < 0) + error('stats:logifit:NoSolution',... + 'Unable to reach a maximum likelihood solution.'); +end + +if nargout > 1 + acov = mlecov(phat, x, 'nloglf',@logi_nloglf, 'cens',cens, 'freq',freq); + probs = [alpha/2; 1-alpha/2]; + se = sqrt(diag(acov))'; + + % Compute the CI for mu using a normal approximation for muhat. + pci(:,1) = norminv(probs, phat(1), se(1)); + + % Compute the CI for sigma using a normal approximation for + % log(sigmahat), and transform back to the original scale. + % se(log(sigmahat)) is se(sigmahat) / sigmahat. + logsigci = norminv(probs, log(phat(2)), se(2)./phat(2)); + pci(:,2) = exp(logsigci); +end + + +function [nll,ngrad] = logi_nloglf(parms, x, cens, freq) +%LOGI_NLOGLF Objective function for logistic maximum likelihood. +mu = parms(1); +sigma = parms(2); +z = (x - mu) ./ sigma; +logitz = 1 ./ (1 + exp(-z)); +clogitz = 1 ./ (1 + exp(z)); +logclogitz = log(clogitz); +k = (z > 700); if any(k), logclogitz(k) = z(k); end % fix intermediate overflow + +L = z + 2.*logclogitz - log(sigma); +ncen = sum(freq.*cens); +if ncen > 0 + cen = (cens == 1); + L(cen) = logclogitz(cen); +end +nll = -sum(freq .* L); + +if nargout > 1 + t = (2.*logitz - 1) ./ sigma; + dL1 = t; + dL2 = z.*t - 1./sigma; + if ncen > 0 + t = logitz(cen) ./ sigma; + dL1(cen) = t; + dL2(cen) = z(cen) .* t; + end + ngrad = -[sum(freq .* dL1) sum(freq .* dL2)]; +end + + + +% ==== Log-Logistic distribution functions ==== + +% these distribution functions do not yet handle arrays of parameters + +function y = loglpdf(x, mu, sigma) +%LOGLPDF Log-logistic probability density function (pdf). +if (nargin<2), mu=0; end +if (nargin<3), sigma=1; end +sigma(sigma <= 0) = NaN; + +nonpos = (x <= 0); +x(nonpos) = realmin; +z = (log(x) - mu) ./ sigma; +c = ones(size(z)); +k = (z>350); % prevent Inf/Inf +if any(k) + z(k) = -z(k); + c(k) = -1; +end +y = exp(z.*(1-c.*sigma) - mu) ./ ((1 + exp(z)).^2 .* sigma); +y(nonpos) = 0; +% the first and third of these would happen automatically for x==0, but +% generate LogOfZero warnings. the second would be NaN. +y(x==0 & sigma<1) = 0; +y(x==0 & sigma==1) = 1; +y(x==0 & sigma>1) = Inf; + + +function p = loglcdf(x, mu, sigma) +%LOGLCDF Log-logistic cumulative distribution function (cdf). +if (nargin<2), mu=0; end +if (nargin<3), sigma=1; end +sigma(sigma <= 0) = NaN; + +nonpos = (x <= 0); +x(nonpos) = realmin; +p = 1 ./ (1 + exp(-(log(x) - mu) ./ sigma)); +% this would happen automatically for x==0, but generates LogOfZero warnings +p(nonpos) = 0; + + +function x = loglinv(p, mu, sigma) +%LOGLINV Inverse of the log-logistic cumulative distribution function (cdf). +if (nargin<2), mu=0; end +if (nargin<3), sigma=1; end +sigma(sigma <= 0) = NaN; + +x = exp(logit(p).*sigma + mu); + + +function r = loglrnd(mu, sigma, varargin) +%LOGLRND Random arrays from the log-logistic distribution. +if (nargin<1), mu=0; end +if (nargin<2), sigma=1; end +sigma(sigma <= 0) = NaN; + +[err, sizeOut] = statsizechk(2,mu,sigma,varargin{:}); +if err > 0 + error('stats:loglrnd:InconsistentSizes','Size information is inconsistent.'); +end + +p = rand(sizeOut); +r = exp(log(p./(1-p)).*sigma + mu); + + +function [m,v] = loglstat(mu, sigma) +%LOGLSTAT Mean and variance for the log-logistic distribution. +if (nargin<1), mu=0; end +if (nargin<2), sigma=1; end +sigma(sigma <= 0) = NaN; + +if sigma < 1 + m = exp(mu + gammaln(1+sigma) + gammaln(1-sigma)); +else + m = Inf; +end +if sigma < .5 + v = exp(2.*mu + gammaln(1+2.*sigma) + gammaln(1-2.*sigma)) - m.^2; +else + v = Inf; +end + + +function [nlogL,acov] = logllike(params,data,cens,freq) +%LOGLLIKE Negative log-likelihood for the log-logistic distribution. +if nargin < 4 || isempty(freq), freq = ones(size(data)); end +if nargin < 3 || isempty(cens), cens = zeros(size(data)); end + +nlogL = logl_nloglf(params, data, cens, freq); +if nargout > 1 + acov = mlecov(params, data, 'nloglf',@logl_nloglf, 'cens',cens, 'freq',freq); +end + + +% ==== Log-Logistic fitting functions ==== + +function [phat,pci] = loglfit(x,alpha,cens,freq,opts) +%LOGLFIT Parameter estimates and confidence intervals for log-logistic data. + +if nargin < 2 || isempty(alpha), alpha = .05; end +if nargin < 3 || isempty(cens), cens = zeros(size(x)); end +if nargin < 4 || isempty(freq), freq = ones(size(x)); end +if nargin < 5, opts = []; end + +if any(x <= 0) + error('stats:loglfit:BadData','The data in X must be positive'); +end + +% Moment estimators as starting point +logxunc = log(x(cens == 0)); +start = [mean(logxunc) std(logxunc).*sqrt(3)./pi]; + +% The default options include turning statsfminbx's display off. This +% function gives its own warning/error messages, and the caller can turn +% display on to get the text output from statsfminbx if desired. +options = statset(statset('loglfit'), opts); +tolBnd = options.TolBnd; +options = optimset(options); +dfltOptions = struct('DerivativeCheck','off', 'HessMult',[], ... + 'HessPattern',ones(2,2), 'PrecondBandWidth',Inf, ... + 'TypicalX',ones(2,1), 'MaxPCGIter',1, 'TolPCG',0.1); + +% Maximize the log-likelihood with respect to mu and sigma. +funfcn = {'fungrad' 'loglfit' @logl_nloglf [] []}; +[phat, nll, lagrange, err, output] = ... + statsfminbx(funfcn, start, [-Inf; tolBnd], [Inf; Inf], ... + options, dfltOptions, 1, x, cens, freq); +if (err == 0) + % statsfminbx may print its own output text; in any case give something + % more statistical here, controllable via warning IDs. + if output.funcCount >= options.MaxFunEvals + wmsg = 'Maximum likelihood estimation did not converge. Function evaluation limit exceeded.'; + else + wmsg = 'Maximum likelihood estimation did not converge. Iteration limit exceeded.'; + end + warning('stats:loglfit:IterOrEvalLimit',wmsg); +elseif (err < 0) + error('stats:loglfit:NoSolution',... + 'Unable to reach a maximum likelihood solution.'); +end + +if nargout > 1 + acov = mlecov(phat, x, 'nloglf',@logi_nloglf, 'cens',cens, 'freq',freq); + probs = [alpha/2; 1-alpha/2]; + se = sqrt(diag(acov))'; + + % Compute the CI for mu using a normal approximation for muhat. + pci(:,1) = norminv(probs, phat(1), se(1)); + + % Compute the CI for sigma using a normal approximation for + % log(sigmahat), and transform back to the original scale. + % se(log(sigmahat)) is se(sigmahat) / sigmahat. + logsigci = norminv(probs, log(phat(2)), se(2)./phat(2)); + pci(:,2) = exp(logsigci); +end + + +function [nll,ngrad] = logl_nloglf(parms, x, cens, freq) +%LOGL_NLOGLF Objective function for log-logistic maximum likelihood. +mu = parms(1); +sigma = parms(2); +logx = log(x); +z = (logx - mu) ./ sigma; +logitz = 1 ./ (1 + exp(-z)); +clogitz = 1 ./ (1 + exp(z)); +logclogitz = log(clogitz); +k = (z > 700); if any(k), logclogitz(k) = z(k); end % fix intermediate overflow + +L = z + 2.*logclogitz - log(sigma) - logx; +ncen = sum(freq.*cens); +if ncen > 0 + cen = (cens == 1); + L(cen) = logclogitz(cen); +end +nll = -sum(freq .* L); + +if nargout > 1 + t = (2.*logitz - 1) ./ sigma; + dL1 = t; + dL2 = z.*t - 1./sigma; + if ncen > 0 + t = logitz(cen) ./ sigma; + dL1(cen) = t; + dL2(cen) = z(cen) .* t; + end + ngrad = -[sum(freq .* dL1) sum(freq .* dL2)]; +end + + +% ==== utility functions ==== + +function logitp = logit(p) +%LOGIT Logistic transformation, handling edge and out of range. +logitp = repmat(NaN,size(p)); +logitp(p==0) = -Inf; +logitp(p==1) = Inf; +ok = (0

0 + error('stats:nakarnd:InconsistentSizes','Size information is inconsistent.'); +end + +r = sqrt(gamrnd(mu,omega./mu,sizeOut)); + + +function [m,v] = nakastat(mu, omega) +%NAKASTAT Mean and variance for the Nakagami distribution. +mu(mu <= 0) = NaN; +omega(omega <= 0) = NaN; + +gamratio = exp(gammaln(mu+.5) - gammaln(mu)); +m = gamratio .* sqrt(omega./mu); +v = omega .* (1 - gamratio.^2 ./ mu); + + +function [nlogL,acov] = nakalike(params,data,cens,freq) +%NAKALIKE Negative log-likelihood for the Nakagami distribution. +if nargin < 4 || isempty(freq), freq = ones(size(data)); end +if nargin < 3 || isempty(cens), cens = zeros(size(data)); end + +nlogL = naka_nloglf(params, data, cens, freq); +if nargout > 1 + acov = mlecov(params, data, 'nloglf',@naka_nloglf, 'cens',cens, 'freq',freq); +end + + +% ==== Nakagami fitting functions ==== + +function [phat,pci] = nakafit(x,alpha,cens,freq,opts) +%NAKAFIT Parameter estimates and confidence intervals for Nakagami data. + +if nargin < 2 || isempty(alpha), alpha = .05; end +if nargin < 3 || isempty(cens), cens = zeros(size(x)); end +if nargin < 4 || isempty(freq), freq = ones(size(x)); end +if nargin < 5, opts = []; end + +if any(x <= 0) + error('stats:nakafit:BadData','The data in X must be positive'); +end + +phat = gamfit(x.^2,alpha,cens,freq,opts); +phat(2) = phat(1).*phat(2); % (a,b) -> (mu,omega) +if nargout > 1 + acov = mlecov(phat, x, 'nloglf',@naka_nloglf, 'cens',cens, 'freq',freq); + probs = [alpha/2; 1-alpha/2]; + se = sqrt(diag(acov))'; + pci = norminv(repmat(probs,1,numel(phat)), [phat; phat], [se; se]); + % CI on the log scale for omega? +end + + +function [nll,ngrad] = naka_nloglf(parms, x, cens, freq) +%NAKA_NLOGLF Objective function for Nakagami maximum likelihood. + +% do all the calculations in terms of the gamma dist'n +a = parms(1); +b = parms(2)./parms(1); % (mu,omega) -> (a,b) +loggama = gammaln(a); +logb = log(b); + +xsq = x.^2; +z = xsq ./ b; +logz = log(z); +L = (a-1).*logz - z - loggama - logb + log(2.*x); +ncen = sum(freq.*cens); +if ncen > 0 + cen = (cens == 1); + zcen = z(cen); + if nargout == 1 + Scen = gammainc(zcen,a,'upper'); + else + [dScen,Scen] = dgammainc(zcen,a,'upper'); + end + L(cen) = log(Scen); +end +nll = -sum(freq .* L); + +if nargout > 1 + dL1 = logz - psi(a); + dL2 = (z - a)./b; + if ncen > 0 + dL1(cen) = dScen ./ Scen; + dL2(cen) = exp(a.*logz(cen) - logb - zcen - loggama) ./ Scen; + end + ngrad = -[sum(freq .* dL1) sum(freq .* dL2)]; + + % transform back to Nakagami parameters + ngrad = ngrad * [1 0; -b./a 1./a]; % (a,b) -> (mu,omega) +end diff --git a/boosting/weightedstats/private/addrice.m b/boosting/weightedstats/private/addrice.m new file mode 100644 index 0000000..510cfc4 --- /dev/null +++ b/boosting/weightedstats/private/addrice.m @@ -0,0 +1,238 @@ +function s = addrice(s) +%ADDRICE Add the Rician distribution. + +% Copyright 1993-2004 The MathWorks, Inc. +% $Revision: 1.1.6.10 $ $Date: 2004/02/01 22:10:34 $ + +j = length(s) + 1; +s(j).name = 'Rician'; +s(j).code = 'rician'; +s(j).pnames = {'s' 'sigma'}; +s(j).pdescription = {'noncentrality' 'scale'}; +s(j).prequired = [false false]; +s(j).fitfunc = @ricefit; +s(j).likefunc = @ricelike; +s(j).cdffunc = @ricecdf; +s(j).pdffunc = @ricepdf; +s(j).invfunc = @riceinv; +s(j).statfunc = @ricestat; +s(j).loginvfunc = []; +s(j).logcdffunc = []; +s(j).hasconfbounds = false; +s(j).censoring = true; +s(j).paramvec = true; +s(j).support = [0 Inf]; +s(j).closedbound = [false false]; +s(j).iscontinuous = true; +s(j).islocscale = false; +s(j).uselogpp = false; + + +% ==== Rician distribution functions ==== + +% these distribution functions do not yet handle arrays of parameters + +function y = ricepdf(x,s,sigma) +%RICEPDF Rician probability density function (pdf). +s(s < 0) = NaN; +sigma(sigma <= 0) = NaN; + +x(x<0) = 0; +sigsq = sigma.^2; +expon = (x.^2 + s.^2)./(2.*sigsq); +y = (x./sigsq) .* exp(-expon) .* besseli(0, x.*s./sigsq); +y(expon > (log(realmax(class(x)))-1)) = 0; % fix up 0*Inf + + +function p = ricecdf(x,s,sigma) +%RICECDF Rician cumulative distribution function (cdf). +s(s < 0) = NaN; +sigma(sigma <= 0) = NaN; + +x(x<0) = 0; +p = ncx2cdf((x./sigma).^2, 2, (s./sigma).^2); + + +function x = riceinv(p,s,sigma) +%RICEINV Inverse of the Rician cumulative distribution function (cdf). +s(s < 0) = NaN; +sigma(sigma <= 0) = NaN; + +x = sigma .* sqrt(ncx2inv(p, 2, (s./sigma).^2)); + + +function r = ricernd(s,sigma,varargin) +%RICERND Random arrays from the Rician distribution. +s(s < 0) = NaN; +sigma(sigma <= 0) = NaN; + +[err, sizeOut] = statsizechk(2,s,sigma,varargin{:}); +if err > 0 + error('stats:ricernd:InconsistentSizes','Size information is inconsistent.'); +end + +r = sigma .* sqrt(ncx2rnd(2, (s./sigma).^2, sizeOut)); + + +function [m,v] = ricestat(s,sigma) +%RICESTAT Mean and variance for the Rician distribution. +s(s < 0) = NaN; +sigma(sigma <= 0) = NaN; + +t = .5 .* (s./sigma).^2; +m = sigma.*sqrt(.5.*pi).*exp(-.5.*t) .* ((1+t).*besseli(0,.5.*t) + t.*besseli(1,.5.*t)); +v = 2.*sigma.^2 + s.^2 - m.^2; + + +function [nlogL,acov] = ricelike(params,data,cens,freq) +%RICELIKE Negative log-likelihood for the Rician distribution. +if nargin < 4 || isempty(freq), freq = ones(size(data)); end +if nargin < 3 || isempty(cens), cens = zeros(size(data)); end + +nlogL = rice_nloglf(params, data, cens, freq); +if nargout > 1 + acov = mlecov(params, data, 'nloglf',@rice_nloglf, 'cens',cens, 'freq',freq); +end + + +% ==== Rician fitting functions ==== + +function [phat,pci] = ricefit(x,alpha,cens,freq,opts) +%NAKAFIT Parameter estimates and confidence intervals for Rician data. + +if nargin < 2 || isempty(alpha), alpha = .05; end +if nargin < 3 || isempty(cens), cens = zeros(size(x)); end +if nargin < 4 || isempty(freq), freq = ones(size(x)); end +if nargin < 5, opts = []; end + +if any(x <= 0) + error('stats:ricefit:BadData','The data in X must be positive'); +end + +% Moment estimators of the uncensored data as starting point +% E[x.^2] = s.^2 + 2.*sigma.^2 +% E[x.^4] = s.^4 + 8.*s.^2.*sigma.^2 + 8.*sigma.^4 +xsqunc = x(cens == 0).^2; +meanxsq = mean(xsqunc); meanx4th = mean(xsqunc); +if meanxsq.^2 < meanx4th && meanx4th < 2.*meanxsq.^2 + s4th = 2.*meanxsq.^2 - meanx4th; + ssq = sqrt(s4th); + sigsq = .5.*(meanxsq - ssq); + start = [sqrt(ssq) sqrt(sigsq)]; +else + start = cast([1 1],class(x)); +end + +% The default options include turning fminsearch's display off. This +% function gives its own warning/error messages, and the caller can turn +% display on to get the text output from fminsearch if desired. +options = statset(statset('ricefit'), opts); +tolBnd = options.TolBnd; +options = optimset(options); + +% Maximize the log-likelihood with respect to s and sigma. +[phat,nll,err,output] = ... + fminsearch(@rice_nloglf, start, options, x, cens, freq, tolBnd); +if (err == 0) + % fminsearch may print its own output text; in any case give something + % more statistical here, controllable via warning IDs. + if output.funcCount >= options.MaxFunEvals + wmsg = 'Maximum likelihood estimation did not converge. Function evaluation limit exceeded.'; + else + wmsg = 'Maximum likelihood estimation did not converge. Iteration limit exceeded.'; + end + warning('stats:ricefit:IterOrEvalLimit',wmsg); +elseif (err < 0) + error('stats:ricefit:NoSolution',... + 'Unable to reach a maximum likelihood solution.'); +end + +% Compute CIs using a normal approximation for phat. +if nargout > 1 + acov = mlecov(phat, x, 'nloglf',@rice_nloglf, 'cens',cens, 'freq',freq); + probs = [alpha/2; 1-alpha/2]; + se = sqrt(diag(acov))'; + pci = norminv([probs probs], [phat; phat], [se; se]); +end + + +function nll = rice_nloglf(parms, x, cens, freq, tolBnd) +%RICE_NLOGLF Objective function for Rician maximum likelihood. +s = parms(1); +sigma = parms(2); +sigsq = sigma.^2; + +% Restrict sigma to the open interval (0, Inf). +if nargin > 4 + if s < tolBnd || sigma < tolBnd + nll = Inf; + return + end +end + +bess0 = besseli(0, x.*s./sigsq); +rsq = (x.^2 + s.^2)./(2.*sigsq); +L = -rsq + log(bess0) + log(x./sigsq); +ncen = sum(freq.*cens); +if ncen > 0 + cen = (cens == 1); + xcen = x(cen); + L(cen) = log(marcumq(s./sigma,xcen./sigma)); +end +nll = -sum(freq .* L); + +% Don't have derivatives of the Marcum's Q, so can't compute an analytic +% gradient with censoring. +% +% if nargout > 1 +% dlogbess0 = besseli(1, x.*s./sigsq) ./ bess0; +% dL1 = (-s + dlogbess0.*x) ./ sigsq; +% dL2 = (rsq - 1 - dlogbess0.*x.*s./sigsq) ./ sigma; +% if ncen > 0 +% % dL1(cen) = ; +% % dL2(cen) = ; +% end +% ngrad = -[sum(freq .* dL1) sum(freq .* dL2)]; +% end + + +function Q = marcumq(a,b) +% Q = MARCUMQ(A,B) returns Marcum's "Q" function. + +if isa(a,'single') || isa(b,'single') + Q = repmat(single(NaN), size(a)); +else + Q = repmat(NaN, size(a)); +end +Q(a~=Inf & b==0) = 1; +Q(a~=Inf & b==Inf) = 0; +Q(a==Inf & b~=Inf) = 1; +z = (isnan(Q) & a==0 & b~=Inf); +if (any(z)) + Q(z) = exp((-b(z).^2)./2); +end + +z = isnan(Q) & ~isnan(a) & ~isnan(b); +if (any(z(:))) +% aa = (a(z).^2)./2; + aa = (a.^2)./2; + bb = (b(z).^2)./2; + + d = exp(-aa); + h = d; + f = bb.*exp(-bb); + k = 1; + delta = f .* h; + sum = delta; + j = (delta > sum.*eps(class(delta))); + while any(j) + d = aa.*d./k; + h = h + d; + f = bb.*f./(k+1); + delta = f .* h; + sum(j) = sum(j) + delta(j); + j = (delta > sum.*eps(class(delta))); + k = k + 1; + end + Q(z) = 1 - sum; +end diff --git a/boosting/weightedstats/private/addtls.m b/boosting/weightedstats/private/addtls.m new file mode 100644 index 0000000..c2d5efb --- /dev/null +++ b/boosting/weightedstats/private/addtls.m @@ -0,0 +1,210 @@ +function s = addtls(s) +%ADDTLS Add the t location-scale distribution. + +% Copyright 1993-2004 The MathWorks, Inc. +% $Revision: 1.1.6.7 $ $Date: 2004/01/24 09:35:08 $ + +j = length(s) + 1; +s(j).name = 't location-scale'; +s(j).code = 'tlocationscale'; +s(j).pnames = {'mu' 'sigma' 'nu'}; +s(j).pdescription = {'location' 'scale' 'shape'}; +s(j).prequired = [false false false]; +s(j).fitfunc = @tlsfit; +s(j).likefunc = @tlslike; +s(j).cdffunc = @tlscdf; +s(j).pdffunc = @tlspdf; +s(j).invfunc = @tlsinv; +s(j).statfunc = @tlsstat; +s(j).loginvfunc = []; +s(j).logcdffunc = []; +s(j).hasconfbounds = false; +s(j).censoring = true; +s(j).paramvec = true; +s(j).support = [-Inf Inf]; +s(j).closedbound = [false false]; +s(j).iscontinuous = true; +s(j).islocscale = false; +s(j).uselogpp = false; + + +% ==== t Location-Scale distribution functions ==== + +% these distribution functions do not yet handle arrays of parameters + +function y = tlspdf(x, mu, sigma, nu) +%TLSPDF T location-scale probability density function (pdf). +sigma(sigma <= 0) = NaN; +nu(nu <= 0) = NaN; + +y = tpdf((x - mu)./sigma,nu)./sigma; + + +function p = tlscdf(x ,mu, sigma, nu) +%TLSCDF T location-scale cumulative distribution function (cdf). +sigma(sigma <= 0) = NaN; +nu(nu <= 0) = NaN; + +p = tcdf((x - mu)./sigma,nu); + + +function x = tlsinv(p, mu, sigma, nu) +%TLSINV Inverse of the t location-scale cumulative distribution function (cdf). +sigma(sigma <= 0) = NaN; +nu(nu <= 0) = NaN; + +x = tinv(p,nu).*sigma + mu; + + +function r = tlsrnd(mu, sigma, nu, varargin) +%TLSRND Random arrays from the t location-scale distribution. +sigma(sigma <= 0) = NaN; +nu(nu <= 0) = NaN; + +[err, sizeOut] = statsizechk(3,mu,sigma,nu,varargin{:}); +if err > 0 + error('stats:tlsrnd:InconsistentSizes','Size information is inconsistent.'); +end + +r = mu + sigma.*trnd(nu,sizeOut); + + +function [m,v] = tlsstat(mu, sigma, nu) +%TLSSTAT Mean and variance for the t location-scale distribution. +sigma(sigma <= 0) = NaN; +nu(nu <= 0) = NaN; + +if nu <= 1 + m = NaN; +else + m = mu; +end +if nu <= 2 + v = Inf; +else + v = sigma.^2 .* nu ./ (nu - 2); +end + + +function [nlogL,acov] = tlslike(params,data,cens,freq) +%TLSLIKE Negative log-likelihood for the t location-scale distribution. +if nargin < 4 || isempty(freq), freq = ones(size(data)); end +if nargin < 3 || isempty(cens), cens = zeros(size(data)); end + +nlogL = tls_nloglf(params, data, cens, freq); +if nargout > 1 + acov = mlecov(params, data, 'nloglf',@tls_nloglf, 'cens',cens, 'freq',freq); +end + + +% ==== t location-scale fitting functions ==== + +function [phat,pci] = tlsfit(x,alpha,cens,freq,opts) + +if nargin < 2 || isempty(alpha), alpha = .05; end +if nargin < 3 || isempty(cens), cens = zeros(size(x)); end +if nargin < 4 || isempty(freq), freq = ones(size(x)); end +if nargin < 5, opts = []; end + +% Robust estimators for the mean and std dev of a normal, and method +% of moments on t-kurtosis for nu +xunc = x(cens == 0); +k = max(kurtosis(xunc), 4); +start = [median(xunc), 1.253.*mad(xunc), 2.*(2.*k-3)./(k-3)]; + +% The default options include turning fminsearch's display off. This +% function gives its own warning/error messages, and the caller can turn +% display on to get the text output from fminsearch if desired. +options = statset(statset('tlsfit'), opts); +tolBnd = options.TolBnd; +options = optimset(options); + +% Maximize the log-likelihood with respect to mu, sigma, and nu. +[phat,nll,err,output] = ... + fminsearch(@tls_nloglf, start, options, x, cens, freq, tolBnd); +if (err == 0) + % fminsearch may print its own output text; in any case give something + % more statistical here, controllable via warning IDs. + if output.funcCount >= options.MaxFunEvals + wmsg = 'Maximum likelihood estimation did not converge. Function evaluation limit exceeded.'; + else + wmsg = 'Maximum likelihood estimation did not converge. Iteration limit exceeded.'; + end + if phat(3) > 100 % degrees of freedom became very large + wmsg = sprintf('%s\n%s', wmsg, ... + 'The normal distribution might provide a better fit.'); + end + warning('stats:tlsfit:IterOrEvalLimit',wmsg); +elseif (err < 0) + error('stats:tlsfit:NoSolution',... + 'Unable to reach a maximum likelihood solution.'); +end + +if nargout > 1 + acov = mlecov(phat, x, 'nloglf',@tls_nloglf, 'cens',cens, 'freq',freq); + probs = [alpha/2; 1-alpha/2]; + se = sqrt(diag(acov))'; + + % Compute the CI for mu using a normal approximation for muhat. + pci(:,1) = norminv(probs, phat(1), se(1)); + + % Compute the CI for sigma using a normal approximation for + % log(sigmahat), and transform back to the original scale. + % se(log(sigmahat)) is se(sigmahat) / sigmahat. + logsigci = norminv(probs, log(phat(2)), se(2)./phat(2)); + pci(:,2) = exp(logsigci); + + % Compute the CI for nu using a normal distribution for nuhat. + pci(:,3) = norminv(probs, phat(3), se(3)); +end + + +function nll = tls_nloglf(parms, x, cens, freq, tolBnd) +%TLS_NLOGLF Objective function for t location-scale maximum likelihood. +mu = parms(1); +sigma = parms(2); +nu = parms(3); + +% Restrict sigma and nu to the open interval (0, Inf). +if nargin > 4 + if sigma < tolBnd || nu < tolBnd + nll = Inf; + return + end +end + +t = (x - mu) ./ sigma; +w = nu + (t.^2); +logw = log(w); + +L = -.5.*(nu+1).*logw + gammaln(.5.*(nu+1)) - gammaln(.5.*nu) + 0.5.*nu.*log(nu) - log(sigma) - .5.*log(pi); +ncen = sum(freq.*cens); +if ncen > 0 + cen = (cens == 1); + if nu < 1e7 % Use the standard formula + Scen = betainc(nu ./ w(cen), .5.*nu, 0.5) ./ 2; + + % Reflect for negative t. + reflect = (t(cen) < 0); + Scen(reflect) = 1 - Scen(reflect); + + else % Use a normal approximation. + Scen = log(0.5 * erfc(t(cen) ./ sqrt(2))); + end + L(cen) = log(Scen); +end +nll = -sum(freq .* L); + +% Don't yet have dbetainc, so can't compute an analytic gradient with censoring. +% +% if nargout > 1 +% dL1 = (nu+1).*t./(w.*sigma); +% dL2 = t.*dL1 - 1./sigma; +% dL3 = .5.*(-logw - (nu+1)./w + psi(.5.*(nu+1)) - psi(.5.*nu) + log(nu) + 1); +% if ncen > 0 +% % dL1(cen) = ; +% % dL2(cen) = ; +% % dL3(cen) = ; +% end +% ngrad = -[sum(freq .* dL1) sum(freq .* dL2) sum(freq .* dL3)]; +% end diff --git a/boosting/weightedstats/private/dfaddbuttons.m b/boosting/weightedstats/private/dfaddbuttons.m new file mode 100644 index 0000000..90dccbc --- /dev/null +++ b/boosting/weightedstats/private/dfaddbuttons.m @@ -0,0 +1,163 @@ +function dfaddbuttons(dffig) +%DFADDBUTTONS Add buttons to the curve fitting plot figure window + +% $Revision: 1.1.6.8 $ $Date: 2004/01/24 09:35:09 $ +% Copyright 2003-2004 The MathWorks, Inc. + +% Clear out any old stuff +h0 = findall(dffig,'Type','uicontrol','Style','pushbutton'); +if ~isempty(h0), delete(h0); end +p0 = ones(1,4); % temporary position before adjustment + +h0=uicontrol(dffig,'units','pixels','Tag','selectionframe',... + 'Style','frame','Position',p0); +h1=uicontrol(dffig,'units','pixels','Tag','displaytext',... + 'String','Display type:', 'style','text','Position',p0,... + 'HorizontalAlignment','left','FontWeight','bold'); + +choices = ['Density (PDF)|Cumulative probability (CDF)|Quantile (inverse CDF)|Probability plot|'... + 'Survivor function|Cumulative hazard']; +h2=uicontrol(dffig,'units','pixels','Tag','displaylist',... + 'String',choices, 'Style','pop','BackgroundColor',ones(1,3), ... + 'Callback', @cbkfunction,'Position',p0); +setappdata(h2,'codenames',... + {'pdf' 'cdf' 'icdf' 'probplot' 'survivor' 'cumhazard'}); + +h3=uicontrol(dffig,'units','pixels','Tag','typetext',... + 'String','Distribution:', 'style','text','Position',p0,... + 'HorizontalAlignment','left','FontWeight','bold', 'Enable', 'off'); + +% Figure out which distributions could be used for probability plots, +% and remember their names and properties for later +alldist = dfgetdistributions; +dnames = {alldist.name}; +dcodes = {alldist.code}; +islocscale = [alldist.islocscale]; +choices = dnames(islocscale); +default = strmatch('Normal',choices); +if length(default)~=1 + default = 1; +end +h4=uicontrol(dffig,'units','pixels','Tag','typelist',... + 'String',choices, 'Style','pop','BackgroundColor',ones(1,3),... + 'Value',default,'Enable','off', 'Callback',@cbkfunction,... + 'Position',p0); + +% Store information about all distributions that could be used to make +% a probability plot, then separately list distributions that are ok +% given the current data. Right now they're the same, but if we read +% in negative data we'll prune the "ok" list to omit positive +% distributions. +setappdata(h4,'allfullnames',choices); +setappdata(h4,'allcodenames',dcodes(islocscale)); +setappdata(h4,'alldistspec',alldist(islocscale)); +setappdata(h4,'okcodenames',dcodes(islocscale)); + +hvec = [h0 h1 h2 h3 h4]; +setappdata(dffig,'selectioncontrols',hvec); + +% Define information for the buttons +strings = {xlate('Data...') xlate('New Fit...') xlate('Manage Fits...') xlate('Evaluate...') xlate('Exclude...')}; +tips = {xlate('Import, view, rename, plot and delete data') ... + xlate('Add a fitted distribution') ... + xlate('Edit, view, plot and rename fits') ... + xlate('Evaluate fits to compute a table of results') ... + xlate('Define rules for excluding data from a fit')}; +cbacks = {@cbkdata @cbknewfit @cbkmanfit @cbkeval @cbkexclude}; + +% Add the buttons to the figure +tags = {'dfdata' 'dfnewfit' 'dfmanfit' 'dfevaluate' 'dfexclude'}; +n = length(strings); +h = zeros(1,n); +for j=1:length(strings) + h(j) = uicontrol(dffig,'Units','pixels', ... + 'Position',[j,1,1,1],... + 'String',strings{j}, 'TooltipString',tips{j}, ... + 'Callback',cbacks{j}, 'Tag',tags{j}); +end +setappdata(dffig,'buttoncontrols',h); + +% ---------------------- callback for Import button +function cbkdata(varargin) +%CBKDATA Callback for Data button + +delete(findall(gcbf,'Tag','dfstarthint')); +com.mathworks.toolbox.stats.Data.showData; + +% ---------------------- callback for New Fit button +function cbknewfit(varargin) +%CBKNEWFIT Callback for New Fit button + +com.mathworks.toolbox.stats.Fitting.getFitting.showNewFit; + + +% ---------------------- callback for Manage Fit button +function cbkmanfit(varargin) +%CBKMANFIT Callback for Manage Fit button + +com.mathworks.toolbox.stats.FitManager.showFitManager; + +% ---------------------- callback for Evaluate button +function cbkeval(varargin) +%CBKEVAL Callback for Evaluate button + +% Get the current horizontal axis limits of the main figure. We'll set the +% default points at which to evaluate to a "pretty" colon expression that +% spans those limits, with 10 steps. +xlims = dfgetset('xminmax'); + +if isempty(xlims) + xstr = ''; +else + % Start out by choosing a rounding that will give the smaller (in + % magnitude) of min(x) and max(x) a single sig digit, but at most two sig + % digits in the larger. The smaller may round to zero. If the min and max + % round to the same thing, use more digits until we get rounded numbers + % that differ. + xmag = max(min(abs(xlims)), max(abs(xlims))/10); + rounder = 10^floor(log10(xmag)); + while true + xmin = round(xlims(1)./rounder) * rounder; + xmax = round(xlims(2)./rounder) * rounder; + if xmin < xmax, break; end + rounder = rounder/10; + end + + % Create 10 steps, where the step size will have three more significant + % digits than the endpoints. + stepRounder = rounder ./ 1000; + step = floor((xmax-xmin)./(10*stepRounder)) * stepRounder; + + % Figure out how many digits we need display in order to distinguish the + % endpoints. That's the number of digits to the left of the decimal, plus + % however many we've rounded to on the right. Allow for at least four so + % that we'll get, e.g., "4000", and not "4e+03". + xminDigits = max(round(log10(max(abs(xmin),1))-log10(rounder)),4); + xmaxDigits = max(round(log10(max(abs(xmax),1))-log10(rounder)),4); + xstr = sprintf('%0.*g:%g:%0.*g', xminDigits, xmin, step, xmaxDigits, xmax); +end + +% Get the current plot type of the main figure, we'll set the default +% function type to evaluate based on that. +ftype = dfgetset('ftype'); +if strcmp(ftype,'probplot') + ftype = 'cdf'; % can't evaluate a prob plot, use cdf instead +% else {'pdf' 'cdf' 'survivor' 'icdf' 'cumhazard' 'hazrate'} + % otherwise use the current setting +end + +com.mathworks.toolbox.stats.Evaluate.showEvaluate(ftype,xstr); + +% ---------------------- callback for Exclude button +function cbkexclude(varargin) +%CBKEXCLUDE Callback for Exclude button + +com.mathworks.toolbox.stats.Exclude.showExclude; + +% ---------------------- callback for display list +function cbkfunction(varargin) +%CBKFUNCTION Callback for setting function to display + +% Get the requested function and distribution types +dffig = gcbf; +dfsetplottype(dffig); diff --git a/boosting/weightedstats/private/dfaddparamfit.m b/boosting/weightedstats/private/dfaddparamfit.m new file mode 100644 index 0000000..8200908 --- /dev/null +++ b/boosting/weightedstats/private/dfaddparamfit.m @@ -0,0 +1,307 @@ +function hFit = dfaddparamfit(hFit, fitname, distname, dsname, fitframe, exclname, useestimated, fixedvals) +%DFADDPARAMFIT Add parametric fit in dfittool + +% $Revision: 1.1.6.10 $ $Date: 2004/01/24 09:35:10 $ +% Copyright 2003-2004 The MathWorks, Inc. + +badfit = false; % badfit=true means fit failed or not attempted +covok = true; % covariance calculation can be done +if isempty(hFit) + newfit = true; + hFit = stats.dffit(fitname, fitframe); +else + newfit = false; +end +listeners = hFit.listeners; +set(listeners, 'Enabled', 'off'); + +frameContents = fitframe.getContentPane; +componentsVector = get(frameContents,'Components'); +fittingPanel = componentsVector(1); + +% Get data set to fit +ds=find(getdsdb,'name',dsname); +hFit.distname = distname; +hFit.dataset = dsname; +hFit.fittype = 'param'; +hFit.dshandle = ds; + +% Store some GUI values in fit +hFit.pfixedtext = fixedvals; +hFit.pestimated = useestimated; + +% Extract data from this data set +alpha = 0.05; +hExcl = dfgetexclusionrule(exclname); +[x, cens, freq] = getincludeddata(ds,hExcl); + +% Get information about the requested distribution +dist = dfgetdistributions(distname); +if length(dist)~=1 || isempty(x) + if length(dist)~=1 + emsg = 'Bad distribution name.'; + else + emsg = 'No data remaining after exclusion rule applied.'; + end + wmsg = ''; + badfit = true; +end +if length(dist)==1 + hFit.enablebounds = dist.hasconfbounds; +end + +% Perform the fit +lasterr(''); +lastwarn(''); +ws = warning('off'); +if badfit + p = []; +else + try + nparams = length(dist.pnames); + if dist.censoring + censargs = {cens freq}; + else + if ~isempty(cens) && any(cens) + error('stats:dfaddparamfit:NoCensoring',... + 'Censoring not allowed with the %s distribution', distname); + elseif ~isempty(freq) && any(freq~=1) + x = expandInput(x,freq); + freq = []; + end + censargs = {}; + end + + % How many output variables will this return? + if dist.paramvec + nparamvars = 1; + else + nparamvars = nparams; + end + + fixedparams = cell(0,1); + if any(~useestimated) + for j=1:nparams + if ~useestimated(j) + txt = deblank(fixedvals{j}); + if isempty(txt) + error('stats:dfaddparamfit:BadParam',... + 'Invalid value for parameter %s', dist.pnames{j}); + end + num = str2double(txt); + if ~isfinite(num) + error('stats:dfaddparamfit:BadParam',... + 'Invalid value for parameter %s', dist.pnames{j}); + end + fixedparams{length(fixedparams)+1} = num; + end + end + end + + % Set up a cell array to receive outputs, then do the fit + pcell = cell(nparamvars,1); + [pcell{:}] = feval(dist.fitfunc, x, fixedparams{:}, alpha, censargs{:}); + + % Extract results into a single vector + if dist.paramvec + p = pcell{1}; + else + p = [pcell{:}]; + end + catch + p = []; + end +end +warning(ws); + +if ~badfit + if ~isempty(lastwarn) + wmsg = sprintf('Warning: %s',lastwarn); + else + wmsg = ''; + end + emsg = lasterr; + newmsg = ''; + if any(~isfinite(p)) + newmsg = 'Fit produced infinite parameter estimates.'; + elseif numel(p)~=numel(dist.pnames) || ~isnumeric(p) + newmsg = 'Fit function returned bad parameter values'; + end + if ~isempty(newmsg) + badfit = true; + emsg = combinemsg(emsg,newmsg); + end + + % Any type of failure so far makes the covariance calculation questionable + if ~isempty(wmsg) || ~isempty(emsg) + covok = false; + end +end + +% Try to get a likelihood value +if isempty(p) + pcov = []; + nlogl = NaN; +else + try + if ~isempty(dist.likefunc) + if covok + [nlogl,pcov] = feval(dist.likefunc, p, x, censargs{:}); + else + nlogl = feval(dist.likefunc, p, x, censargs{:}); + pcov = []; + end + else + pcov = []; + nlogl = localnlogl(num2cell(p),dist.pdffunc,dist.cdffunc,x,cens,freq); + end + newmsg = ''; + catch + newmsg = lasterr; + end + if isempty(newmsg) && (~isnumeric(nlogl) || ~isscalar(nlogl)) + newmsg = 'Result must be a numeric scalar'; + end + if isnan(nlogl) + nlogl = NaN; % explicitly set to real nan to remove imaginary part + end + if ~isempty(newmsg); + pcov = []; + nlogl = NaN; + wmsg = combinemsg(wmsg,... + sprintf('Error while evaluating likelihood:\n%s',... + newmsg)); + end + +end + +% Get the range over which to show the fit +dffig = dfgetset('dffig'); +ax = findall(dffig,'Type','axes','Tag','main'); +xlim = get(ax,'XLim'); + +% Create a fit object using the information we calculated +if badfit + resultsText = emsg; +else + try + hFit = storefitresults(hFit, dist, p, pcov, nlogl, xlim, hExcl, exclname); + resultsText = getresults(hFit); + catch + resultsText = lasterr; + badfit = true; + end +end + +resultsText = combinemsg(wmsg,resultsText); + +% Show results +hFit.resultstext = resultsText; +fittingPanel.setResults(resultsText) + +if ~isempty(hFit) + if ~newfit && ~(hFit.isgood == ~badfit) + com.mathworks.toolbox.stats.FitsManager.getFitsManager.fitIsGoodChanged(java(hFit), ~badfit); + end + hFit.isgood = ~badfit; + if newfit + hFit.plot = 1; + % Add to fit array + connect(hFit,getfitdb,'up'); + end +end + +if hFit.plot + % Determine if bounds can be shown + if ~dist.hasconfbounds + hFit.showbounds = false; + end + + % Update plotted curve + updateplot(hFit); + + % Update plot limits + dfswitchyard('dfupdatexlim'); + dfswitchyard('dfupdateylim'); +end + +set(listeners, 'Enabled', 'on'); + +if ~newfit + com.mathworks.toolbox.stats.FitsManager.getFitsManager.fitChanged(... + java(hFit),fitname,fitname); +end + +% Display a more prominent warning outside the results text +if ~badfit && ~isempty(wmsg) + warndlg(wmsg,'Distribution Fitting Warning','modal'); +end + +% ---------------------------------------------- +function hFit = storefitresults(hFit, dist, p, pcov, nlogl, xlim, hExcl, exclname) +% Update its properties +hFit.distspec = dist; +hFit.params = p; +hFit.pcov = pcov; +hFit.pfixed = false(size(p)); +hFit.loglik = -nlogl; +hFit.support = dist.support; +hFit.exclusionrule = hExcl; +hFit.exclusionrulename = exclname; + +hFit.xlim = xlim; +setftype(hFit,dfgetset('ftype')); + + +% --------------------------------------------- +function nlogl = localnlogl(p, pdf, cdf, x, cens, freq) +% Calculate negative log likelihood + +% Handle defaults for option inputs +if isempty(cens) + cens = false(size(x)); +else + cens = (cens == 1); +end +if isempty(freq) + freq = ones(size(x)); +end + +% Compute for uncensored observations +nlogl = - sum(freq(~cens) .* log(feval(pdf, x(~cens), p{:}))); + +% Add component for censored observations +if any(cens) + nlogl = nlogl - sum(freq(cens) .* log(1-feval(cdf, x(cens), p{:}))); +end + +% ----------------------------------- +function msg = combinemsg(msg,newmsg) +%COMBINEMSG Combine multiple messages into a single message +if isempty(msg) + msg = newmsg; +elseif ~isempty(newmsg) + msg = sprintf('%s\n\n%s',msg,newmsg); +end + +% ----------------------------------------- +function expanded = expandInput(input,freq) +%EXPANDDATA Expand out an input vector using element frequencies. +if ~isequal(size(input),size(freq)) + error('stats:dfaddparamfit:InputSizeMismatch',... + 'Input argument sizes must match.'); +end + +% Remove points that have zero frequency +t = (freq == 0); +if any(t) + input(t) = []; + freq(t) = []; +end + +% Expand the remainder +i = cumsum(freq); +j = zeros(1, i(end)); +j(i(1:end-1)+1) = 1; +j(1) = 1; +expanded = input(cumsum(j)); diff --git a/boosting/weightedstats/private/dfaddsmoothfit.m b/boosting/weightedstats/private/dfaddsmoothfit.m new file mode 100644 index 0000000..bed4812 --- /dev/null +++ b/boosting/weightedstats/private/dfaddsmoothfit.m @@ -0,0 +1,147 @@ +function hFit = dfaddsmoothfit(hFit, fitname, kernelname, widthradio, widthtext, dsname, fitframe, supportradio, supporttext, exclname) +%DFADDSMOOTHFIT Add smooth fit in dfittool + +% $Revision: 1.1.6.7 $ $Date: 2004/01/24 09:35:11 $ +% Copyright 2003-2004 The MathWorks, Inc. + +frameContents = fitframe.getContentPane; +componentsVector = get(frameContents,'Components'); +fittingPanel = componentsVector(1); +isgood = true; + +if isempty(hFit) + newfit = true; + % Create the fit + hFit = stats.dffit(fitname, fitframe); +else + newfit = false; +end + +listeners = hFit.listeners; +set(listeners, 'Enabled', 'off'); + +if supportradio == 2 + support = supporttext; + try + L = str2double(support(1)); + U = str2double(support(2)); + catch + L = NaN; + U = NaN; + end + if isnan(L) || isnan(U) || L>U + emsg = 'Invalid values for specified domain bounds.'; + fittingPanel.setResults(emsg); + hFit.resultstext = emsg; + errordlg(emsg,'Domain Invalid'); + if ~isempty(hFit) + isgood = false; + end + end + support = [L U]; +elseif supportradio == 1 + support = 'positive'; + L = 0; + U = Inf; +else %unbounded + support = 'unbounded'; + L = -Inf; + U = Inf; +end + +% Get data set to fit +ds=find(getdsdb,'name',dsname); + +if widthradio == 0 + width = []; +else + width = str2num(widthtext); +end + +% Get the range over which to show the fit +dffig = dfgetset('dffig'); +ax = findall(dffig,'Type','axes','Tag','main'); +xlim = get(ax,'XLim'); + +% Make sure the data are within range +hExcl = dfgetexclusionrule(exclname); +ydata = getincludeddata(ds,hFit.exclusionrule); +if isempty(ydata) + emsg = 'No data remaining after exclusion rule applied.'; + fittingPanel.setResults(emsg); + hFit.resultstext = emsg; + isgood = false; +end + +if isgood && ((min(ydata)<=L) || (max(ydata)>=U)) + emsg = 'Data out of range of specified domain bounds.'; + fittingPanel.setResults(emsg); + hFit.resultstext = emsg; + errordlg(emsg,'Domain Invalid'); + isgood = false; +end + +if ~newfit && ~(hFit.isgood == isgood) + sendIsGoodChangeNotification = true; +else + sendIsGoodChangeNotification = false; +end + +try + % Update its properties + hFit.dshandle = ds; + hFit.dataset=dsname; + hFit.bandwidth = width; + hFit.bandwidthtext = widthtext; + hFit.bandwidthradio = widthradio; + hFit.kernel = kernelname; + hFit.xlim = xlim; + hFit.fittype = 'smooth'; + hFit.support = support; + hFit.supportlower = supporttext{1}; + hFit.supportupper = supporttext{2}; + hFit.supportradio = supportradio; + hFit.exclusionrule = hExcl; + hFit.exclusionrulename = exclname; + hFit.isgood = isgood; + hFit.enablebounds = 0; + setftype(hFit,dfgetset('ftype')); + success = true; +catch + success = false; +end + +if success + if newfit + % Add to fit array + hFit.plot = 1; + connect(hFit,getfitdb,'up'); + end + + if sendIsGoodChangeNotification + com.mathworks.toolbox.stats.FitsManager.getFitsManager.fitIsGoodChanged(java(hFit), isgood); + end + + if ~newfit + com.mathworks.toolbox.stats.FitsManager.getFitsManager.fitChanged(... + java(hFit),fitname,fitname); + end +end + +if hFit.plot + % Update plotted curve + updateplot(hFit); + + % Update plot limits + dfswitchyard('dfupdatexlim'); + dfswitchyard('dfupdateylim'); +end + +if isgood + % Show results, must be done after bandwidth is filled in during plotting + resultsText = getresults(hFit); + hFit.resultstext = resultsText; + fittingPanel.setResults(resultsText); +end + +set(listeners, 'Enabled', 'on'); diff --git a/boosting/weightedstats/private/dfadjustlayout.m b/boosting/weightedstats/private/dfadjustlayout.m new file mode 100644 index 0000000..a0b57e2 --- /dev/null +++ b/boosting/weightedstats/private/dfadjustlayout.m @@ -0,0 +1,95 @@ +function dfadjustlayout(dffig,showctrl) +%ADJUSTLAYOUT Adjust layout of buttons and graph in figure window + +% $Revision: 1.1.6.5 $ $Date: 2004/01/24 09:35:12 $ +% Copyright 2003-2004 The MathWorks, Inc. + +% Get some measurements +fpos = get(dffig,'Position'); +fwidth = fpos(3); +fheight = max(1,fpos(4)); + +% Adjust selection controls at top +hsel = getappdata(dffig,'selectioncontrols'); +lmargin = 0; +if ~isempty(hsel) + % If there are no selection controls, don't try to compute their height + emax = zeros(1,4); + allpos = get(hsel, 'Position'); + allpos = vertcat(allpos{:}); + allextent = get(hsel, 'Extent'); + allextent = vertcat(allextent{:}); + maxheight = max(allextent(2:end,4)); % max height of non-frame controls +end +for j=2:2:length(hsel) + % Adjust label + p = allpos(j,:); + e = allextent(j,:); + p(1) = lmargin + 5; + p(2) = fheight-1.45*maxheight; + p(3) = e(3); + p(4) = maxheight; + lmargin = p(1) + p(3); + set(hsel(j),'Position',p); + emax = max(emax,e); + labelwidth = e(3); + + % Adjust drop-down + p = get(hsel(j+1),'Position'); + e = get(hsel(j+1),'Extent'); + p(1) = lmargin + 5; + p(2) = fheight-1.25*maxheight; + p(3) = max(e(3),2.25*labelwidth); + p(4) = maxheight; + lmargin = p(1) + p(3) + 20; + set(hsel(j+1),'Position',p); + emax = max(emax,e); +end +if ~isempty(hsel) + % Get position of the base of the frame containing these controls + framebase = max(1, fheight-1.6*maxheight); + p = [1, framebase, fwidth, 1.6*maxheight]; + set(hsel(1),'Position',p); +else + % Say the base of this frame is right at the top of the figure + framebase = fheight; +end + +% Adjust all button positions below selection controls +hbuttons = getappdata(dffig,'buttoncontrols'); +if isempty(hbuttons) + % If there are no buttons, say their base is right at the frame + buttonbase = framebase; +else + % If there are buttons, compute the height of their base + nbuttons = length(hbuttons); + extents = get(hbuttons,'Extent'); + extents = vertcat(extents{:}); + bheight = 1.5 * extents(1,4); % 1.5 * text height + gutter = bheight/4; % between buttons + margin = bheight/2; % around text within button + bwidth = extents(:,3)' + 2*margin; + totalwidth = sum(bwidth) + gutter*(nbuttons-1); + startpos = max(0, (fwidth/2) - (totalwidth/2)); + bleft = startpos + [0, cumsum(bwidth + gutter)]; + buttonbase = max(1, framebase-bheight-gutter); + for j=1:nbuttons + pos = [bleft(j), buttonbase, bwidth(j), bheight]; + set(hbuttons(j), 'Position',pos); + end +end + +% Position the axes in the remaining area +ax = get(dffig,'CurrentAxes'); +axbase = 0.11*fheight; +p1 = max(1, [.13*fwidth, axbase, .775*fwidth, .9*buttonbase-axbase]); +set(ax,'Units','pixels','Position',p1); +set(ax,'Units','normalized'); + +if nargin<2 + showctrl = dfgetset('showaxlimctrl'); +end +if isequal(showctrl,'on') + dfaxlimctrl(dffig,'off'); + dfaxlimctrl(dffig,'on'); +end diff --git a/boosting/weightedstats/private/dfadjustmenu.m b/boosting/weightedstats/private/dfadjustmenu.m new file mode 100644 index 0000000..d36adf5 --- /dev/null +++ b/boosting/weightedstats/private/dfadjustmenu.m @@ -0,0 +1,121 @@ +function dfadjustmenu(dffig) +%DFADJUSTMENU Adjust contents of curve fit plot menus + +% $Revision: 1.1.6.9 $ $Date: 2004/01/24 09:35:13 $ +% Copyright 2003-2004 The MathWorks, Inc. + +% Remove some menus entirely +h = findall(dffig, 'Type','uimenu', 'Parent',dffig); +h0 = findall(h,'flat', 'Label','&Edit'); +if (~isempty(h0)) + j = find(h==h0); + delete(h0); + h(j) = []; +end +h0 = findall(h,'flat', 'Label','&Insert'); +if (~isempty(h0)) + j = find(h==h0); + delete(h0); + h(j) = []; +end + +% Add or remove some items from other menus +% Fix FILE menu +h0 = findall(h,'flat', 'Label','&File'); +h1 = findall(h0, 'Type','uimenu', 'Parent',h0); +m4 = []; +m2 = []; +for j=length(h1):-1:1 + mlabel = get(h1(j),'Label'); + if ~isempty(findstr(mlabel,'Close')) + m7 = h1(j); + set(m7,'Label','&Close Distribution Fitting') + elseif ~isempty(findstr(mlabel,'Print...')) + m5 = h1(j); + else + delete(h1(j)); + h1(j) = []; + end +end +uimenu(h0, 'Label','&Import Data...', 'Position',1,... + 'Callback','dfittool(''import data'')'); +uimenu(h0, 'Label','Clea&r Session','Position',2,... + 'Callback','dfittool(''clear session'')','Separator','on'); +uimenu(h0, 'Label','&Load Session...', 'Position',3,... + 'Callback','dfittool(''load session'')'); +uimenu(h0, 'Label','&Save Session...', 'Position',4,... + 'Callback','dfittool(''save session'')'); +uimenu(h0, 'Label','Generate &M File...', 'Position',5,... + 'Callback','dfittool(''generate code'')'); + +uimenu(h0, 'Label','&Define Custom Distributions...','Position',6,... + 'Callback',{@dfcustomdist,'define'}','Separator','on'); +uimenu(h0, 'Label','I&mport Custom Distributions...', 'Position',7, ... + 'Callback',{@dfcustomdist,'import'},'Tag','importcustom'); +uimenu(h0, 'Label','Cl&ear Custom Distributions...', 'Position',8,... + 'Callback',{@dfcustomdist,'clear'},'Tag','clearcustom'); + + +set(m5,'Position',9,'Separator','on'); +uimenu(h0, 'Label','Print to &Figure', 'Position',10,... + 'Callback','dfittool(''duplicate'')'); +set(m7,'Position',11,'Separator','on'); + +% Fix VIEW menu +h0 = findall(h,'flat', 'Label','&View'); +h1 = findall(h0, 'Type','uimenu', 'Parent',h0); +delete(h1); +uimenu(h0, 'Label','&Legend', 'Position',1,'Separator','off',... + 'Callback','dfittool(''togglelegend'')', 'Checked','on',... + 'Tag','showlegend'); +dfgetset('showlegend','on'); +uimenu(h0, 'Label','&Grid', 'Position',2,... + 'Callback','dfittool(''togglegrid'')', 'Checked','off', ... + 'Tag','showgrid'); +dfgetset('showgrid','off'); +h1 = uimenu(h0, 'Label','C&onfidence Level','Position',3,'Separator','on'); +uimenu(h1, 'Label','9&0%', 'Position',1, ... + 'Callback','dfittool(''setconflev'',.90)','Tag','conflev'); +uimenu(h1, 'Label','9&5%', 'Position',2, 'Checked','on',... + 'Callback','dfittool(''setconflev'',.95)','Tag','conflev'); +uimenu(h1, 'Label','9&9%', 'Position',3, ... + 'Callback','dfittool(''setconflev'',.99)','Tag','conflev'); +uimenu(h1, 'Label','&Other...', 'Position',4, ... + 'Callback','dfittool(''setconflev'',[])','Tag','conflev'); +dfgetset('conflev',0.95); +uimenu(h0, 'Label','&Clear Plot', 'Position',4,... + 'Callback','dfittool(''clear plot'')'); + +% Fix TOOLS menu +h0 = findall(h,'flat', 'Label','&Tools'); +h1 = findall(h0, 'Type','uimenu', 'Parent',h0); +for j=length(h1):-1:1 + mlabel = get(h1(j),'Label'); + if isempty(findstr(mlabel,'Zoom')) && isempty(findstr(mlabel,'Pa&n')) + delete(h1(j)); + h1(j) = []; + else + set(h1(j),'Separator','off'); + end +end +uimenu(h0, 'Label','&Axis Limit Control', 'Position',4, 'Separator','on', ... + 'Callback','dfittool(''toggleaxlimctrl'')', 'Checked','off', ... + 'Tag','showaxlimctrl'); +dfgetset('showaxlimctrl','off'); +uimenu(h0, 'Label','&Default Axis Limits', 'Position',5, ... + 'Callback','dfittool(''defaultaxes'')'); +uimenu(h0, 'Label','Set Default &Bin Rules', 'Position',6, 'Separator','on', 'Callback', ... + 'com.mathworks.toolbox.stats.BinWidth.getBinWidth.displayBinWidth', ... + 'Tag','setbinrules'); + + +% Fix HELP menu +h0 = findall(h,'flat', 'Label','&Help'); +h1 = findall(h0, 'Type','uimenu', 'Parent',h0); +delete(h1); +uimenu(h0, 'Label','Statistics &Toolbox Help', 'Position',1,'Callback',... + 'doc stats'); +uimenu(h0, 'Label', 'Distribution &Fitting Tool Help', 'Position',2,'Callback',... + 'dfswitchyard(''dfhelpviewer'', ''distribution_fitting'', ''dfittool'')'); +uimenu(h0, 'Label','&Demos', 'Position',3,'Separator','on','Callback',... + 'demo toolbox stat'); diff --git a/boosting/weightedstats/private/dfadjusttoolbar.m b/boosting/weightedstats/private/dfadjusttoolbar.m new file mode 100644 index 0000000..0abd20b --- /dev/null +++ b/boosting/weightedstats/private/dfadjusttoolbar.m @@ -0,0 +1,52 @@ +function dfadjusttoolbar(dffig) +%DFADJUSTTOOLBAR Adjust contents of distribution fitting plot toolbar + +% $Revision: 1.1.6.5 $ $Date: 2004/01/24 09:35:14 $ +% Copyright 2003-2004 The MathWorks, Inc. + +h0 = findall(dffig,'Type','uitoolbar'); +h1 = findall(h0,'Parent',h0); +czoom = []; +for j=length(h1):-1:1 + mlabel = get(h1(j),xlate('TooltipString')); + if ~isempty(findstr(mlabel,'Zoom')) || ~isempty(findstr(mlabel,'Pan')) + czoom(end+1) = h1(j); + elseif isempty(findstr(mlabel,'Print')) + delete(h1(j)); + h1(j) = []; + else + c1 = h1(j); + end +end + +% Add more icons especially for distribution fitting +if exist('dficons.mat','file')==2 + icons = load('dficons.mat','icons'); + state = dfgetset('showlegend'); + if isempty(state), state = 'on'; end + try + % Try to get the default MATLAB legend icon + legicon = load([matlabroot '/toolbox/matlab/icons/legend.mat']); + cdata = legicon.cdata; + catch + cdata = icons.icons.legend; % in case of trouble, use older icon + end + c2 = uitoggletool(h0, 'CData',cdata,... + 'State',state,... + 'TooltipString', 'Legend On/Off',... + 'Separator','on',... + 'ClickedCallback','dfittool(''togglelegend'')',... + 'Tag','showlegend'); + state = dfgetset('showgrid'); + if isempty(state), state = 'off'; end + c3 = uitoggletool(h0, 'CData',icons.icons.grid,... + 'State',state,... + 'TooltipString', ('Grid On/Off'),... + 'Separator','off',... + 'ClickedCallback','dfittool(''togglegrid'')',... + 'Tag','showgrid'); + c = get(h0,'Children'); + cnew = [c1 czoom c2 c3]'; + + set(h0,'Children',cnew(end:-1:1)); +end diff --git a/boosting/weightedstats/private/dfasksavesession.m b/boosting/weightedstats/private/dfasksavesession.m new file mode 100644 index 0000000..c3fd625 --- /dev/null +++ b/boosting/weightedstats/private/dfasksavesession.m @@ -0,0 +1,29 @@ +function ok = dfasksavesession(dffig) +%DFASKSAVESESSION Ask whether current session should be saved + +% $Revision: 1.1.6.2 $ $Date: 2004/01/24 09:35:15 $ +% Copyright 2003-2004 The MathWorks, Inc. + +dsdb = getdsdb; +fitdb = getfitdb; + +% Offer to save session unless there's nothing to save +if isempty(down(dsdb)) && isempty(down(fitdb)) + resp = 'No'; +else + resp = questdlg('Save this Distribution Fitting session?', ... + 'Distribution Fitting', 'Yes', 'No', 'Cancel', 'Yes'); +end + +if isempty(resp) + resp = 'Cancel'; +end + +if isequal(resp,'Yes') + ok = dfsession('save'); + if ~ok + resp = 'Cancel'; + end +end + +ok = ~isequal(resp,'Cancel'); diff --git a/boosting/weightedstats/private/dfaxlimctrl.m b/boosting/weightedstats/private/dfaxlimctrl.m new file mode 100644 index 0000000..0fe1561 --- /dev/null +++ b/boosting/weightedstats/private/dfaxlimctrl.m @@ -0,0 +1,401 @@ +function dfaxlimctrl(dffig,onoff) +%DFAXLIMCTRL Turn on or off the controls for adjusting axis limits + +% $Revision: 1.1.6.2 $ $Date: 2004/01/24 09:35:16 $ +% Copyright 2001-2004 The MathWorks, Inc. + +% Remove controls from figure if requested +if isequal(onoff,'off') + a = findall(dffig,'Tag','axlimctrl'); + delete(a); + return +end + +% Add controls to figure. +% First find all axes in figure and their limits +ax1 = get(dffig,'CurrentAxes'); +lims = get(ax1,'XLim'); +lims(3:4) = get(ax1,'YLim'); + +% Create an arrow for labeling button controls +fcolor = get(dffig,'Color'); +ar = ... +[1 1 1 1 1 1 1 1 + 1 0 1 1 1 1 1 1 + 1 0 0 1 1 1 1 1 + 1 0 0 0 1 1 1 1 + 1 0 0 0 0 1 1 1 + 1 0 0 0 0 0 1 1 + 1 0 0 0 0 0 0 1 + 1 0 0 0 0 0 1 1 + 1 0 0 0 0 1 1 1 + 1 0 0 0 1 1 1 1 + 1 0 0 1 1 1 1 1 + 1 0 1 1 1 1 1 1 + 1 1 1 1 1 1 1 1]; +ar = repmat(ar,[1 1 3]); +ar(:,:,1) = min(ar(:,:,1),fcolor(1)); +ar(:,:,2) = min(ar(:,:,2),fcolor(2)); +ar(:,:,3) = min(ar(:,:,3),fcolor(3)); + +% Find axes position in pixel units +oldaxunits = get(ax1,'Units'); +set(ax1,'Units','pixel'); +axpos = get(ax1,'Position'); +axbottom = ax1; +axbottompos = axpos; + +% Compute the dimensions to use for text fields + +% First get the longest axis limit and measure it in an edit control +samptxt= ''; +for j=1:length(lims) + newtxt = num2str(lims(j)); + if length(newtxt)>length(samptxt) + samptxt = newtxt; + end +end +if length(samptxt)<7 + samptxt = samptxt(min(length(samptxt),1:7)); +end +temph = uicontrol(dffig,'style','edit','string',samptxt,... + 'position',[1 1 300 10],'Visible','off'); +extent = get(temph,'extent'); +delete(temph); + +% Next measure the y axis tick labels in a text control +ytxt = get(ax1,'YTickLabel'); +ytxt(:,end+1) = ' '; +a = uicontrol(dffig,'style','text','string',ytxt,... + 'position',[1 1 300 10],'visible','off'); +e = get(a,'Extent'); +tickwidth = e(3); +delete(a); + +% Reserve room for the controls +oldunits = get(axbottom,'Units'); +set(axbottom,'Units','pixel'); +bht = ceil(extent(4)/2); % arrow button height +bwd = ceil(1.5*bht); % arrow button width +if axbottompos(2)<6*bht + oldtop = axbottompos(2)+axbottompos(4); + axbottompos(2) = 6*bht; + axbottompos(4) = max(1,oldtop - axbottompos(2)); + set(axbottom,'Position',axbottompos); + axpos = axbottompos; +end +leftlim = 1 + extent(3) + bwd + tickwidth; +if axbottompos(1)=lims(2)) | (limindex==2 & curval<=lims(1)) + return + end + +else + % Button press + + % If tick locations are not automatic, extend with more locations on each end + if ~fixedlabels + if logscale + % For log scale there are major and minor ticks, but we have no access + % to the minor tick labels or locations. Deal with this by creating + % arrays of tick labels likely to match the union of the major and + % minor ticks. + majorlo = min(locs); + while(majorlo/10 >= lims(1)) + majorlo = majorlo/10; + end + majorhi = max(locs); + while(majorhi*10 <= lims(2)) + majorhi = majorhi*10; + end + locs = [majorlo.*[(1:9)/10, (1:9)], ... + majorhi.*[(2:9)/10, (1:10)]]'; + locs = sort(locs); + locs(diff(locs)==0) = []; + else + % For linear scale just add two more entries on each end, + % one in case we move past the end and another in case the + % end tick is just beyond the end by a small amount + delta = locs(2)-locs(1); + locs = [locs(1)-2*delta, locs(1)-delta, locs, ... + locs(end)+delta, locs(end)+2*delta]; + end + end + + % Get current value of the most extreme tick label within bounds + if logscale + small = sqrt(eps); + else + small = max(abs(lims))*sqrt(eps); + end + if opt(2)=='h' + limindex = 2; + if logscale + jcurtick = sum(locs0 + return +end + +% Get handle to control containing the distribution list +if nargin<2 + dffig = dfgetset('dffig'); +end +hsel = getappdata(dffig,'selectioncontrols'); + +% No problem unless we have a probability plot +h = hsel(3); % handle of display type control +choice = get(hsel(3),'Value'); +ftypes = getappdata(hsel(3),'codenames'); +ftype = ftypes{choice}; +if ~isequal(ftype, 'probplot') + return +end + +% No problem unless distribution has support that excludes some data +ax = get(dffig,'CurrentAxes'); +distspec = getappdata(ax,'DistSpec'); +lobnd = distspec.support(1); +strict = ~distspec.closedbound(1); +if strict && lobnd>=xmin + ok = false; +elseif ~strict && lobnd>xmin + ok = false; +end diff --git a/boosting/weightedstats/private/dfcbkclear.m b/boosting/weightedstats/private/dfcbkclear.m new file mode 100644 index 0000000..bbfbe13 --- /dev/null +++ b/boosting/weightedstats/private/dfcbkclear.m @@ -0,0 +1,25 @@ +function dfcbkclear +%DFCBKCLEAR Callback for Clear button + +% $Revision: 1.1.6.3 $ $Date: 2004/01/24 09:35:20 $ +% Copyright 2003-2004 The MathWorks, Inc. + +% Clear all saved fits from the plot and notify fits manager +fitdb = getfitdb; +fit = down(fitdb); +while(~isempty(fit)) + fit.plot = 0; + fit = right(fit); +end + +% Clear all datasets from the plot and notify data sets manager +dsdb = getdsdb; +ds = down(dsdb); +while(~isempty(ds)) + ds.plot = 0; + ds = right(ds); +end + +dfupdatexlim; +dfupdateallplots; +dfupdateylim; diff --git a/boosting/weightedstats/private/dfcheckselections.m b/boosting/weightedstats/private/dfcheckselections.m new file mode 100644 index 0000000..85ebe17 --- /dev/null +++ b/boosting/weightedstats/private/dfcheckselections.m @@ -0,0 +1,136 @@ +function [err,d,c,f]=dfcheckselections(data,censoring,frequency,dval,cval,fval) + +% For use by DFITTOOL + +% $Revision: 1.1.6.6 $ $Date: 2004/01/24 09:35:21 $ +% Copyright 2003-2004 The MathWorks, Inc. + +err = ''; +d_l = 0; +c_l = 0; +f_l = 0; +NONE = '(none)'; +d = []; +c = []; +f = []; + +if isequal(data, NONE) + err = sprintf('Invalid Data Choice: %s\n', NONE); +else + if isempty(data) + dataname = 'data variable'; + else + dataname = sprintf('"%s"',data); + end + try + if nargin<4 + d=evalin('base',data); + else + d = dval; + end + if isvector(d) && (length(d) > 1) + if any(isinf(d)) + err = sprintf('%s cannot contain Inf or -Inf\n', dataname); + elseif ~isreal(d) + err = sprintf('%s cannot be complex\n', dataname); + else + d_l = length(d); + end + else + err = sprintf('%s is not a vector\n', dataname); + end + catch + err = [err sprintf('Invalid expression: %s\n %s\n', dataname, lasterr)]; + end +end + +if (nargin<5) && (isempty(censoring) || isequal(censoring, NONE)) + c_l = -1; +else + if isempty(censoring) + censname = 'censoring variable'; + else + censname = sprintf('"%s"',censoring); + end + try + if nargin<5 + c=evalin('base',censoring); + else + c = cval; + end + if isempty(c) + c_l = -1; + elseif isvector(c) && (length(c) > 1) + if ~all(ismember(c, 0:1)) + err = [err sprintf('%s must be a logical vector.\n',censname)]; + elseif any(isinf(c)) + err = [err sprintf('%s cannot contain Inf or -Inf\n', censname)]; + elseif ~isreal(c) + err = [err sprintf('%s cannot be complex\n', censname)]; + else + c_l = length(c); + end + else + err = [err sprintf('%s is not a vector\n', censname)]; + end + catch + err = [err sprintf('Invalid expression: %s\n %s\n', censname, lasterr)]; + end +end + +if (nargin<6) && (isempty(frequency) || isequal(frequency, NONE)) + f_l = -1; +else + if isempty(frequency) + freqname = 'frequency variable'; + else + freqname = sprintf('"%s"',frequency); + end + try + if nargin<6 + f=evalin('base',frequency); + else + f = fval; + end + if isempty(f) + f_l = -1; + elseif isvector(f) && (length(f) > 1) + if any(f<0) || any(f~=round(f) & ~isnan(f)) + err = [err sprintf('%s values must be non-negative integers.\n',freqname)]; + elseif any(isinf(f)) + err = [err sprintf('%s cannot contain Inf or -Inf\n', freqname)]; + elseif ~isreal(f) + err = [err sprintf('%s cannot be complex\n', freqname)]; + else + f_l = length(f); + end + else + err = [err sprintf('%s is not a vector\n', freqname)]; + end + catch + err = [err sprintf('Invalid expression: %s\n %s\n', freqname, lasterr)]; + end +end + +% Check lengths if no other errors +if isequal(err, '') + if ((c_l ~= -1) && (c_l ~= d_l)) || ((f_l ~= -1) && (f_l ~= d_l)) + err = sprintf('Vector lengths must be equal\n'); + err = [err sprintf(' Data length: %d\n', d_l)]; + if (c_l ~= -1) && (c_l ~= d_l) + err = [err sprintf(' Censoring length: %d\n', c_l)]; + end + if (f_l ~= -1) && (f_l ~= d_l) + err = [err sprintf(' Frequency length: %d\n', f_l)]; + end + end +end + +% Must have some non-censored data +if isempty(err) && c_l~=-1 + if (f_l==-1 && all(c==1)) || (f_l~=-1 && all(c(f>0)==1)) + err = 'Cannot have all observations censored'; + end +end + + \ No newline at end of file diff --git a/boosting/weightedstats/private/dfcopyexrule.m b/boosting/weightedstats/private/dfcopyexrule.m new file mode 100644 index 0000000..c92b656 --- /dev/null +++ b/boosting/weightedstats/private/dfcopyexrule.m @@ -0,0 +1,43 @@ +function [newname, dataset, yl, yh, yle, yhe] = dfcopyexrule(name) +%DFCOPYEXRULE GUI helper to create a copy of an exclusion rule + +% $Revision: 1.1.6.2 $ $Date: 2004/01/24 09:35:22 $ +% Copyright 2003-2004 The MathWorks, Inc. + +exrule=find(getoutlierdb,'name',name); + +%create copy name +COPY = ' copy '; +index = strfind(name, COPY); +if isempty(index) + sourcename = sprintf('%s%s', name, COPY); +else + sourcename = sprintf('%s%s', name(1:index-1), COPY); +end +cn = 1; +newname = sprintf('%s%d', sourcename, cn); +%loop until unique name is found +while true + if isempty(find(getoutlierdb,'name',newname)) + break; + else + cn = cn+1; + newname = sprintf('%s%d', sourcename, cn); + end +end + +%make sure dataset still exists +if isempty(find(getdsdb, 'name', exrule.dataset)); + dataset='(none)'; +else + dataset=exrule.dataset; +end + +yl = exrule.YLow; +yh = exrule.YHigh; +yle = exrule.YLowLessEqual; +yhe = exrule.YHighGreaterEqual; + + + + diff --git a/boosting/weightedstats/private/dfcreatecopy.m b/boosting/weightedstats/private/dfcreatecopy.m new file mode 100644 index 0000000..eda89c7 --- /dev/null +++ b/boosting/weightedstats/private/dfcreatecopy.m @@ -0,0 +1,10 @@ +function [new, fittype]=dfcreatecopy(original); + +% $Revision: 1.1.6.3 $ $Date: 2004/01/24 09:35:23 $ +% Copyright 2003-2004 The MathWorks, Inc. + +fittype = original.fittype; + +new = copyfit(original); +new = java(new); + diff --git a/boosting/weightedstats/private/dfcreatedataset.m b/boosting/weightedstats/private/dfcreatedataset.m new file mode 100644 index 0000000..50edcfc --- /dev/null +++ b/boosting/weightedstats/private/dfcreatedataset.m @@ -0,0 +1,95 @@ +function [ds, err, dsname, plotok] = dfcreatedataset(varargin) +%DFCREATEDATASET Create dfittool data set +% DFCREATEDATASET(YEXPR,CEXPR,FEXPR,DSNAME) creates a data set named DSNAME +% using the data obtained by evaluating the expressions entered in the gui +% for Y, CENSORING, and FREQUENCY. +% +% DFCREATEDATASET(YDATA,CDATA,FDATA,DSNAME,YNAME,CNAME,FNAME) creates a data +% set named DSNAME using the y, censoring, and frequency data passed in via +% the command line. The "name" arguments are the names of these data +% variables, or empty if they are not simple variable names. + +% $Revision: 1.1.6.7 $ $Date: 2004/01/24 09:35:24 $ +% Copyright 2003-2004 The MathWorks, Inc. + +% Determine if we had data passed directly in from the command line +if nargin>=1 && isnumeric(varargin{1}) + fromgui = false; +else + fromgui = true; +end + +% Check for a data set name (we used to check for duplicate names +% before calling this function). + +if nargin>=4 + dsname = varargin{4}; + if fromgui + % Get the array of data sets + dsdb = getdsdb; + dset = down(dsdb); + while(~isempty(dset)) + if strcmp(dset.name, dsname); + err = 'datanamethesame'; + ds = []; + plotok = false; + return; + end + dset = right(dset); + end + end +else + dsname = ''; +end + +n = min(length(varargin),3); +nameargs = cell(1,3); +if ~fromgui + % Input data entered at the command line, maybe input names in args 5-7 + nameargs(:) = {''}; + valargs = cell(1,3); + for j=1:n + valargs{j} = varargin{j}; + end + for j=5:nargin + nameargs{j-4} = varargin{j}; + end +else + % Input expressions from gui, get values by evaluating them + nameargs = varargin(1:n); + valargs = cell(1,0); +end + +% Check input expressions or data vectors +outvecs = cell(1,3); +[err,outvecs{:}] = dfcheckselections(nameargs{:}, valargs{:}); +if ~isequal(err, '') + ds = []; + return; +end + +% Make a new data set +ds = stats.dfdata(nameargs{:},dsname,outvecs{:}); +dsname = ds.name; + +% Set the function type before plotting +setftype(ds,dfgetset('ftype')); +ds.conflev = dfgetset('conflev'); + +% Plot the histogram or other empirical curve if possible +dffig = dfgetset('dffig'); +if dfcanplotdata(ds,dffig) + ds.plot = 1; + ds.plotok = 1; % this was the default when we created ds + plotok = 'true'; % use text; boolean causes problems in java caller +else + % Update the java dialog to show current flag state + % ds.plot = 0; % this was the default when we created ds + ds.plotok = 0; + plotok = 'false'; +end + +% Update the plot +dfupdatexlim; +dfupdateylim; +dfupdatelegend(dffig); diff --git a/boosting/weightedstats/private/dfcreateexclusionrule.m b/boosting/weightedstats/private/dfcreateexclusionrule.m new file mode 100644 index 0000000..f40a15f --- /dev/null +++ b/boosting/weightedstats/private/dfcreateexclusionrule.m @@ -0,0 +1,24 @@ +function err = dfcreateexclusionrule(name, dataset, yl, yh, yle, yhe) +%DFCREATEEXCLUSIONRULE Create dfittool exclusion rule + +% $Revision: 1.1.6.4 $ $Date: 2004/01/24 09:35:25 $ +% Copyright 2003-2004 The MathWorks, Inc. + +err = ''; +% check for duplicate name + outlierdb = getoutlierdb; + outlier = down(outlierdb); + + while(~isempty(outlier)) + if strcmp(outlier.name, name) + err = 'outliernamethesame'; + return; + end + outlier = right(outlier); + end + +% Make a new exclusion rule +stats.outlier(name, dataset, yl, yh, yle, yhe); + + + diff --git a/boosting/weightedstats/private/dfcreateplot.m b/boosting/weightedstats/private/dfcreateplot.m new file mode 100644 index 0000000..b3692fa --- /dev/null +++ b/boosting/weightedstats/private/dfcreateplot.m @@ -0,0 +1,145 @@ +function dffig = dfcreateplot +%DFCREATEPLOT Create plot window for DFITTOOL + +% $Revision: 1.1.6.8 $ $Date: 2004/03/09 16:17:03 $ +% Copyright 2003-2004 The MathWorks, Inc. + +% Get some screen and figure position measurements +tempFigure=figure('visible','off','units','pixels',... + 'Tag','Distribution Fitting Figure'); +dfp=get(tempFigure,'position'); +dfop=get(tempFigure,'outerposition'); +diffp = dfop - dfp; +xmargin = diffp(3); +ymargin = diffp(4); +close(tempFigure) +oldu = get(0,'units'); +set(0,'units','pixels'); +screenSize=get(0,'screensize'); +screenWidth=screenSize(3); +screenHeight=screenSize(4); +set(0,'units',oldu'); + +% Get the desired width and height +width=dfp(3)*1.2 + xmargin; +height=dfp(4)*1.2 + ymargin; +if width > screenWidth + width = screenWidth-10-xmargin; +end +if height > screenHeight; + height = screenHeight-10-ymargin; +end + +% Calculate the position on the screen +leftEdge=min((screenWidth/3)+10+xmargin/2, screenWidth-width-10-2*xmargin); +bottomEdge=(screenHeight-height)/2; + +% Make an invisible figure to start +dffig=figure('Visible','off','IntegerHandle','off',... + 'HandleVisibility','callback',... + 'color',get(0,'defaultuicontrolbackgroundcolor'),... + 'name','Distribution Fitting Tool',... + 'numbertitle','off',... + 'units','pixels',... + 'position',[leftEdge bottomEdge width height], ... + 'CloseRequestFcn',@closefig, ... + 'PaperPositionMode','auto',... + 'doublebuffer','on',... + 'Dock','off'); + +dfgetset('dffig',dffig); + +% Set default print options +pt = printtemplate; +pt.PrintUI = 0; +set(dffig,'PrintTemplate',pt) + +% Add buttons along the top +dfaddbuttons(dffig); + +% We want a subset of the usual toolbar +% Instead of calling adjusttoolbar, there is a handlegraphics bug +% that turned the toolbar off when the buttons were created, so +% we have to toggle it back on. +dftoggletoolbar(dffig,'on'); + +% We want a subset of the usual menus and some more toolbar icons +dfadjustmenu(dffig); + +% Set up axes the way we want them +ax=axes('Parent',dffig, 'box','on','Tag','main',... + 'XLimMode','manual','YLimMode','manual','ZLimMode','manual',... + 'CLimMode','manual','AlimMode','manual'); + +% Adjust layout of buttons and graph +if ~ispc % some unix platforms seem to require this + set(dffig,'Visible','on'); + drawnow; +end +dfadjustlayout(dffig); + +% Remember current position +dfgetset('oldposition',get(dffig,'Position')); +dfgetset('oldunits',get(dffig,'Units')); + +% Now make the figure visible +if ispc + set(dffig,'visible','on'); +end +set(dffig, 'ResizeFcn','dfittool(''adjustlayout'')'); +drawnow; + +% Set up some listeners +hgpkg = findpackage('hg'); +axesC = hgpkg.findclass('axes'); + +% Create context menus for data and fit lines +dfdocontext('create', dffig); + +% Listen for figure position changes if resize function is questionable +if ~ispc + list(1) = handle.listener(dffig, findprop(handle(dffig),'position'), ... + 'PropertyPostSet', 'dfittool(''adjustlayout2'')'); + dfgetset('figlistener',list); +end + + +% ---------------------- helper to verify closing of figure +function closefig(varargin) +%CLOSEFIG Verify intention to close distribution fitting figure + +dsdb = getdsdb; +fitdb = getfitdb; + +% Offer to save session unless there's nothing to save +if isempty(down(dsdb)) && isempty(down(fitdb)) + resp = 'No'; +else + resp = questdlg('Save this Distribution Fitting session?', ... + 'Distribution Fitting', 'Yes', 'No', 'Cancel', 'Yes'); +end + +if isempty(resp) + resp = 'Cancel'; +end + +if isequal(resp,'Yes') + ok = dfsession('save'); + if ~ok + resp = 'Cancel'; + end +end + +% Anything but cancel means go ahead and quit +if ~isequal(resp,'Cancel') + set(gcbf,'CloseRequestFcn',''); + + % Clear current session + dfsession('clear'); + + % Delete any dfittool-related figures + h = dfgetset('evaluateFigure'); + if ~isempty(h) & ishandle(h), delete(h); end + h = gcbf; + if ~isempty(h) & ishandle(h), delete(h); end +end diff --git a/boosting/weightedstats/private/dfcustomdist.m b/boosting/weightedstats/private/dfcustomdist.m new file mode 100644 index 0000000..7f787c8 --- /dev/null +++ b/boosting/weightedstats/private/dfcustomdist.m @@ -0,0 +1,183 @@ +function dfcustomdist(ignore1,ignore2,action) +%DFCUSTOM Callbacks for menu items related to custom distributions + +% $Revision: 1.1.6.3 $ $Date: 2004/02/01 22:10:36 $ +% Copyright 2003-2004 The MathWorks, Inc. + +fnpath = which('dfittooldists.m'); +dft = com.mathworks.toolbox.stats.DistributionFitting.getDistributionFitting; + +switch(action) + % -------------------------------- + case 'clear' % clear custom definitions, revert to standard ones + % Ask for confirmation + ok = questdlg('Clear custom distributions and revert to standard ones?',... + 'Clear Custom Distributions',... + 'Yes','No','Yes'); + if ~isequal(ok,'Yes') + return; + end + dfgetset('alldistributions',''); % clear all distributions + dists = dfgetdistributions('',false); % get built-in list + dfsetdistributions(dft,dists); % save them as current + showresults({dists.name},[]); + + % -------------------------------- + case 'define' % define a file of probability distribution specs + % Determine if such a file already exists + if isempty(fnpath) + % None found, so start editing a new one with default contents + fname = fullfile(matlabroot,'toolbox','stats','private',... + 'dftoolinittemplate.m'); + txt = textread(fname,'%s','whitespace','','bufsize',1e6); + com.mathworks.mlservices.MLEditorServices.newDocument(txt{1}); + else + % Edit the existing file + edit(fnpath) + end + + % Display a helpful message about what's going on + msg = sprintf(['Define your custom distributions by editing this file and'... + '\nsaving it on your path with the name dfittooldists.m.'... + '\n\nThen use File -> Custom Distributions -> Import '... + '\nto import your distributions.']); + msgbox(msg,'Define Custom Distributions','none','modal'); + + % -------------------------------- + case 'import' % import a file of probability distribution specs + % Remember current distributions + olds = dfgetset('alldistributions'); + + % Locate the file of new distribution settings + if isempty(fnpath) + fnpath = '*.m'; + end + [fn,pn] = uigetfile(fnpath,'Select file of distributions to import'); + if isequal(fn,0) + return + end + [dirpath,fname,fext] = fileparts(fn); + if ~isequal(fext,'.m') + errordlg(sprintf(['MATLAB .m file required.\n' ... + 'Can''t import distributions from the file %s.'],... + [fname fext]),... + 'Bad Selection'); + return + end + + % Go to that file's directory and try to run it + olddir = pwd; + dists = olds; + try + cd(pn); + [dists,errid,errmsg,newrows] = dfgetuserdists(dists,fname); + catch + errmsg = lasterr; + newrows = []; + end + cd(olddir); + + % Revert to previous distribution list if anything bad happened + if ~isempty(errmsg) + dists = olds; + errordlg(sprintf('Error trying to import custom distributions:\n%s',... + errmsg),... + 'Import Custom Distributions','modal'); + newrows = []; + end + + % Sort by name + lowernames = lower(strvcat(dists.name)); + [ignore, ind] = sortrows(lowernames); + dists = dists(ind); + newrows = find(ismember(ind,newrows)); + + if isempty(errmsg) + showresults({dists.name},newrows); + end + dfsetdistributions(dft,dists); +end + +% --------------------------------- +function showresults(liststring,asterisk); +%SHOWRESULTS Stripped-down version of listdlg, just to show a list + +promptstring = 'New parametric distribution list:'; + +if nargin>=2 + for j=1:length(asterisk) + liststring{asterisk(j)} = sprintf('%s *',liststring{asterisk(j)}); + end + footnote = ~isempty(asterisk); +else + footnote = false; +end + +ex = get(0,'defaultuicontrolfontsize')*1.7; % extent height per line +fp = get(0,'defaultfigureposition'); +fus = 8; % frame/uicontrol spacing +ffs = 8; % frame/figure spacing +uh = 22; % uicontrol button height +listsize = [160 300]; +if footnote + footnoteheight = 2*ex; +else + footnoteheight = 0; +end + +w = 2*(fus+ffs)+listsize(1); +h = 2*ffs+6*fus+ex+listsize(2)+uh + footnoteheight; +fp = [fp(1) fp(2)+fp(4)-h w h]; % keep upper left corner fixed + +figcol = get(0,'defaultUicontrolBackgroundColor'); +fig_props = { ... + 'name' 'Imported Distributions' ... + 'color' figcol ... + 'resize' 'off' ... + 'numbertitle' 'off' ... + 'menubar' 'none' ... + 'windowstyle' 'modal' ... + 'visible' 'off' ... + 'integerhandle' 'off' ... + 'handlevisibility' 'callback' ... + 'position' fp ... + 'closerequestfcn' 'delete(gcbf)' ... + 'Dock' 'off' ... + }; +fig = figure(fig_props{:}); + +posn = [ffs+fus fp(4)-(ffs+fus+ex) ... + listsize(1) ex]; + +uicontrol('style','text','string',promptstring,... + 'horizontalalignment','left','position',posn); + +btn_wid = (fp(3)-2*(ffs+fus)-fus)/2; +liststring=cellstr(liststring); +listbox = uicontrol('style','listbox',... + 'position',[ffs+fus ffs+uh+4*fus+footnoteheight listsize],... + 'string',liststring,... + 'backgroundcolor',figcol,... + 'max',2,... + 'tag','listbox',... + 'value',[],... + 'callback', 'set(gcbo,''value'',0)'); + +%frameh = uicontrol('style','frame',... +% 'position',[ffs+fus-1 ffs+fus-1 btn_wid+2 uh+2],... +% 'backgroundcolor','k'); +if footnote + uicontrol('style','text','string','* Imported or changed',... + 'horizontalalignment','left',... + 'position',[ffs+fus, ffs+fus+uh+footnoteheight/4, listsize(1), footnoteheight]); +end + + +ok_btn = uicontrol('style','pushbutton',... + 'string','OK',... + 'position',[ffs+fus+listsize(1)/2-btn_wid/2 ffs+fus btn_wid uh],... + 'callback','delete(gcbf)'); + +% make sure we are on screen +placetitlebar(fig) +set(fig, 'visible','on'); diff --git a/boosting/weightedstats/private/dfdeleteexrule.m b/boosting/weightedstats/private/dfdeleteexrule.m new file mode 100644 index 0000000..ab407c2 --- /dev/null +++ b/boosting/weightedstats/private/dfdeleteexrule.m @@ -0,0 +1,57 @@ +function dfdeleteexrule(names) +%DFDELETEEXRULE GUI helper to delete an exclusion rule + +% $Revision: 1.1.6.2 $ $Date: 2004/01/24 09:35:28 $ +% Copyright 2003-2004 The MathWorks, Inc. + +fitdb = getfitdb; +fit = down(fitdb); +OKtoDelete = true; +fitsToDelete = {}; + +if ~isempty(fit) + msg = ''; + for i=1:length(names) + fitCnt = 0; + fitnames = ''; + m=''; + fit = down(fitdb); + while(~isempty(fit)) + if strcmp(names{i}, fit.exclusionrulename) + fitsToDelete{1, end + 1} = fit.name; + if fitCnt > 0 + fitnames = [fitnames, ', ']; + end; + fitCnt = fitCnt + 1; + fitnames = [fitnames, fit.name]; + end + fit = right(fit); + end + if fitCnt == 1 + m = sprintf('If you delete "%s", the following fit will also be deleted: %s\n', names{i}, fitnames); + elseif fitCnt > 1 + m = sprintf('If you delete "%s", the following fits will also be deleted: %s\n', names{i}, fitnames); + end + msg = [msg, m]; + end + if length(msg) > 0 + button = questdlg(msg, 'Deleting exclusion rules', 'OK', 'Cancel', 'OK'); + if ~strcmp(button, 'OK') + OKtoDelete = false; + end + end +end + + +if OKtoDelete + import com.mathworks.toolbox.stats.*; + if ~isempty(fitsToDelete) + FitsManager.getFitsManager.deleteFits(fitsToDelete); + end + OutliersManager.getOutliersManager.deleteOutliers(names); +end + + + + + diff --git a/boosting/weightedstats/private/dfdelgraphexclude.m b/boosting/weightedstats/private/dfdelgraphexclude.m new file mode 100644 index 0000000..0f53f0b --- /dev/null +++ b/boosting/weightedstats/private/dfdelgraphexclude.m @@ -0,0 +1,13 @@ +function dfdelgraphexclude +%DFDELGRAPHEXCLUDE Called when the exclusion dataset changes + +% Copyright 2003-2004 The MathWorks, Inc. +% $Revision: 1.1.6.2 $ $Date: 2004/01/24 09:35:29 $ + +% Find the exclusion graph's figure window, and delete it +t = get(0,'ShowHiddenHandles'); +set(0,'ShowHiddenHandles','on'); +c = get(0,'Child'); +f = findobj(c,'flat','Type','figure','Tag','dfexcludegraph'); +set(0,'ShowHiddenHandles',t); +delete(f); diff --git a/boosting/weightedstats/private/dfdocontext.m b/boosting/weightedstats/private/dfdocontext.m new file mode 100644 index 0000000..0bdc60e --- /dev/null +++ b/boosting/weightedstats/private/dfdocontext.m @@ -0,0 +1,252 @@ +function dfdocontext(varargin) +%DFDOCONTEXT Perform context menu actions for distribution fitting tool + +% Copyright 2001-2004 The MathWorks, Inc. +% $Revision: 1.1.6.7 $ $Date: 2004/03/09 16:17:04 $ +import com.mathworks.toolbox.stats.*; + + +% Special action to create context menus +if isequal(varargin{1},'create') + makecontextmenu(varargin{2}); + return +end + +% Get information about what invoked this function +obj = gcbo; +action = get(obj,'Tag'); +h = gco; +if isempty(h), return; end +dffig = gcbf; + +% Set up variables that define some menu items +[sizes styles markers] = getmenuitems; +styles{end+1} = 'none'; + +changed = true; % did a line property change? + +switch action + + % This case is triggered when we display the menu + case {'fitcontext' 'datacontext'} + % Store a handle to the object that triggered this menu + set(obj,'UserData',h); + + hObject = get(h,'UserData'); + c = findall(obj,'Type','uimenu'); + hBounds = findall(c,'flat','Tag','confbounds'); + ftype = dfgetset('ftype'); + + % Enable or disable as appropriate + hMarker = findall(c,'flat','Tag','marker'); + if isequal(action,'datacontext') + hLineStyle = findall(c,'flat','Tag','linestyle'); + hLineWidth = findall(c,'flat','Tag','linewidth'); + hBinRules = findall(c,'flat','Tag','binrules'); + if isequal(ftype,'probplot') + set(hMarker,'Enable','on'); + set(hLineStyle,'Enable','off'); + set(hLineWidth,'Enable','off'); + set(hBinRules,'Enable','off'); + elseif isequal(ftype,'pdf') + set(hMarker,'Enable','off'); + set(hLineStyle,'Enable','on'); + set(hLineWidth,'Enable','on'); + set(hBinRules,'Enable','on'); + else + set(hMarker,'Enable','off'); + set(hLineStyle,'Enable','on'); + set(hLineWidth,'Enable','on'); + set(hBinRules,'Enable','off'); + end + else + if ~hObject.iscontinuous && isequal(ftype,'pdf') + set(hMarker,'Enable','on'); + else + set(hMarker,'Enable','off'); + end + end + try + hasconfbounds = hObject.distspec.hasconfbounds; + catch + hasconfbounds = false; + end + if isequal(ftype,'pdf') || isequal(ftype,'probplot') || ... + (isequal(action,'fitcontext') && ~hasconfbounds) || ... + (isequal(action,'datacontext') && isequal(ftype,'icdf')) + set(hBounds,'Enable','off'); + else + set(hBounds,'Enable','on'); + end + + set(c,'Checked','off'); + + % Fix check mark for confidence bounds + if hObject.showbounds + set(hBounds,'Checked','on'); + end + + % Fix check marks on line width and line style cascading menus + w = get(h,'LineWidth'); + u = findall(c,'flat','Tag',num2str(w)); + if ~isempty(u) + set(u,'Checked','on'); + end + w = get(h,'LineStyle'); + u = findall(c,'flat','Tag',w); + if ~isempty(u) + set(u,'Checked','on'); + end + w = get(h,'Marker'); + u = findall(c,'flat','Tag',w); + if ~isempty(u) + set(u,'Checked','on'); + end + return + + % Remaining cases are triggered by selecting menu items + case 'confbounds' + hObject = get(h,'UserData'); + hObject.showbounds = ~hObject.showbounds; + nm = get(hObject,'name'); + htag = get(h,'Tag'); + if isequal(htag,'dfdata') + DataSetsManager.getDataSetsManager.dataSetChanged(java(hObject),nm,nm); + else + FitsManager.getFitsManager.fitChanged(java(hObject),nm,nm); + end + + case 'color' + oldcolor = get(h,'Color'); + newcolor = uisetcolor(oldcolor); + if ~isequal(oldcolor,newcolor) + set(h,'Color',newcolor); + end + + case styles + set(h,'LineStyle',action); + + case markers + if isequal(action,'point') + msize = 12; + else + msize = 6; + end + set(h,'Marker',action,'MarkerSize',msize); + + % Either delete a fit, or a hide a fit or data set + case {'hidecurve' 'deletefit'} + htag = get(h,'Tag'); + if isequal(htag,'distfit') || isequal(htag,'dfdata') + hndl = get(h,'UserData'); + if isequal(action,'hidecurve') + hndl.plot = 0; + nm = get(hndl,'name'); + else + % The delete action appears on the fit menu only, not the data set menu + FitsManager.getFitsManager.deleteFits(java(hndl)); + end + end + changed = false; + + % Edit a fit + case 'editfit' + htag = get(h,'Tag'); + if isequal(htag,'distfit') % should always be true + hndl = get(h,'UserData'); + FitsManager.getFitsManager.editFit(hndl.name); + end + changed = false; + + % Bring up the "Set Bin Width Rules" dialog for this data set + case 'binrules' + htag = get(h,'Tag'); + if isequal(htag,'dfdata') + hndl = get(h,'UserData'); + nm = get(hndl,'name'); + bw = com.mathworks.toolbox.stats.BinWidth.getBinWidth; % get dialog + bw.displayBinWidth(nm); % display dialog for this data set + end + changed = false; + + % If the menu item is a number, it is a line width + otherwise + j = str2num(action); + if ~isempty(j) + set(h,'LineWidth',j); + end + +end + +if changed + % Save plot info in the fit or data set object + hObject = get(h,'UserData'); + savelineproperties(hObject); + + % Update legend + dfupdatelegend(dffig); +end + + +% ---------------------- helper to make context menu +function makecontextmenu(dffig) +%MAKECONTEXTMENU Creates context menu for curve fitting figure + +% Create context menus for fits, data curve, probability plot data curves +cFit = uicontextmenu('Parent',dffig,'Tag','fitcontext','Callback',@dfdocontext); +uimenu(cFit,'Label','Color...','Tag','color','Callback',@dfdocontext); + +% Add menu items for line and marker control +uwidth = uimenu(cFit,'Label','Line &Width','Tag','linewidth'); +ustyle = uimenu(cFit,'Label','Line &Style','Tag','linestyle'); +umark = uimenu(cFit,'Label','Marker','Tag','marker','Position',2); + +% Add menu items to control confidence bounds +uimenu(cFit,'Label','Confidence &Bounds','Callback',@dfdocontext,... + 'Tag','confbounds'); + +% Get menu item labels and tags +[sizes styles markers slabels mlabels] = getmenuitems; + +for j=1:length(markers) + uimenu(umark,'Label',mlabels{j},'Callback',@dfdocontext,'Tag',markers{j}); +end + +% Sub-menus for line widths +for i = 1:length(sizes) + val = num2str(sizes(i)); + uimenu(uwidth,'Label',val,'Callback',@dfdocontext,'Tag',val); +end + +% Sub-menus for line styles +for j=1:length(styles) + uimenu(ustyle,'Label',slabels{j},'Callback',@dfdocontext,'Tag',styles{j}); +end + +% Copy the fit menu to create a data menu +cData = copyobj(cFit,dffig); +set(cData,'Tag','datacontext') + +% Add items for fit menus only +uimenu(cFit,'Label','&Hide Fit','Tag','hidecurve','Callback',@dfdocontext,... + 'Separator','on'); +uimenu(cFit,'Label','&Delete Fit','Tag','deletefit','Callback',@dfdocontext); +uimenu(cFit,'Label','&Edit Fit','Tag','editfit','Callback',@dfdocontext); + +% Add items for data menus only +uimenu(cData,'Label','&Hide Data','Tag','hidecurve',... + 'Callback',@dfdocontext,'Separator','on'); +uimenu(cData,'Label','Set Bin &Rules','Tag','binrules',... + 'Callback',@dfdocontext,'Separator','on'); + +% -------------- helper to get menu item labels +function [sizes,styles,markers,slabels,mlabels] = getmenuitems +%GETMENUITEMS Get items for curve fitting context menus +sizes = [0.5 1 2 3 4 5 6 7 8 9 10]; +styles = {'-' '--' ':' '-.'}; +markers = {'+' 'o' '*' '.' 'x' 'square' 'diamond' ... + 'v' '^' '<' '>' 'pentagram' 'hexagram'}; +slabels = {'solid' 'dash' 'dot' 'dash-dot'}; +mlabels = {'plus' 'circle' 'star' 'point' 'x-mark' 'square' 'diamond' ... + 'triangle (down)' 'triangle (up)' 'triangle (left)' ... + 'triangle (right)' 'pentagram' 'hexagram'}; diff --git a/boosting/weightedstats/private/dfdupfigure.m b/boosting/weightedstats/private/dfdupfigure.m new file mode 100644 index 0000000..09809bc --- /dev/null +++ b/boosting/weightedstats/private/dfdupfigure.m @@ -0,0 +1,51 @@ +function dfdupfigure(dffig) +%DFDUPFIGURE Make a duplicate, editable copy of the distribution fitting figure + +% $Revision: 1.1.6.5 $ $Date: 2004/03/26 13:30:58 $ +% Copyright 2003-2004 The MathWorks, Inc. + +% Copy the regular axes, not the legend axes +f = figure; +ax = findall(dffig,'Type','axes','Tag','main'); +copyobj(ax,f); +newax = findall(f,'Type','axes','Tag','main'); + +% Adjust layout in new figure, but don't add axis controls +dfadjustlayout(f,'off'); + +for i=1:length(ax) + % Remove any context menus and callbacks associated with the old figure + set(findall(newax(i),'Type','line'),... + 'DeleteFcn','','UIContextMenu',[],'ButtonDownFcn',''); + + % Make a new legend based on the original, if any + [legh,unused,h0,txt] = legend(ax(i)); + if length(h0)>0 + c0 = get(ax(i),'Child'); + c1 = get(newax(i),'Child'); + h1 = h0; + for j=length(h0):-1:1 + k = find(c0==h0(j)); + if isempty(k) + h1(j) = []; + txt(j) = []; + else + % Convert to lineseries + h1(j) = hgline2lineseries(c1(k(1))); + end + end + + % It's hard to match the old legend position adequately because the + % two figure sizes may be very different. Since the original legend + % positions were TL or TR, we'll just pick whichever of these is + % closest to the current position. If the user moved the legend in + % the original figure, he or she may need to do the same here. + oldpos = get(legh,'Position'); + if oldpos(1)<.4 + newpos = 'NW'; + else + newpos = 'NE'; + end + legend(newax(i),h1,txt,'Location', newpos); + end +end diff --git a/boosting/weightedstats/private/dfevaluate.m b/boosting/weightedstats/private/dfevaluate.m new file mode 100644 index 0000000..59ee0bf --- /dev/null +++ b/boosting/weightedstats/private/dfevaluate.m @@ -0,0 +1,77 @@ +function [errmsg,x,values] = dfevaluate(fitNames,x,fun,wantBounds,confLevel,plotFun,dum) +%DFEVALUATE Evaluate fits for DFITTOOL + +% $Revision: 1.1.6.3 $ $Date: 2004/01/24 09:35:32 $ +% Copyright 1993-2004 The MathWorks, Inc. + + +% If the function is th empty string, clear the plot (if there is one) and +% clear any saved data. +if isempty(fitNames) + dfevaluateplot(false,false); % closes the plot window + dfgetset('evaluateResults', []); + return +end + +nfits = length(fitNames); +try + x = sprintf('[ %s ]', x); % allow an unbracketed list of numbers to work + x = eval(x); + confLevel = eval(confLevel) ./ 100; +catch + x = []; + values = zeros(0, nfits*(1+2*wantBounds)); + errmsg = sprintf('Invalid MATLAB expression: %s',lasterr); + h = []; + return +end +errmsg = ''; + +x = x(:); +n = length(x); + +% % This is enforced by the evaluate panel. +% switch fun +% case {'pdf' 'hazrate' 'condmean'} +% % No bounds allowed for pdf, hazrate, or conditional mean. +% wantBounds = false; +% otherwise % {'cdf' 'icdf' 'survivor' 'cumhazard' 'probplot'} +% % bounds are allowed +% end + +% Output table will have first column for data, then for each fit, one +% column for function, two columns for bounds (if requested). +values = repmat(NaN, n, nfits*(1+2*wantBounds)); + +fitdb = getfitdb; +for i = 1:nfits + fit = find(fitdb, 'name', fitNames{i}); + % Cannot compute bounds for kernel smooths and certain parametric fits. + getBounds = wantBounds && ... + (~isequal(fit.fittype, 'smooth') && fit.distspec.hasconfbounds); + + % Evaluate the requested function for this fit. + if getBounds + [y,ylo,yup] = eval(fit,x,fun,confLevel); + values(:,3*i-2) = y; + values(:,3*i-1) = ylo; + values(:,3*i) = yup; + else + y = eval(fit,x,fun); + if wantBounds + values(:,3*i-2) = y; + else + values(:,i) = y; + end + end +end + +% Save the results for SaveToWorkSpace. The plot function can be called +% directly from java, so it uses those saved results as well. +dfgetset('evaluateResults', [x,values]); + +% Save information about the fits that we've evaluated for the plot function. +dfgetset('evaluateFun', fun); +dfgetset('evaluateInfo', struct('fitNames',{fitNames}, 'wantBounds',wantBounds)); + +dfevaluateplot(plotFun,dum); diff --git a/boosting/weightedstats/private/dfevaluateplot.m b/boosting/weightedstats/private/dfevaluateplot.m new file mode 100644 index 0000000..971e606 --- /dev/null +++ b/boosting/weightedstats/private/dfevaluateplot.m @@ -0,0 +1,170 @@ +function dfevaluateplot(plotFun,dum) +%DFEVALUATEPLOT Plot data and evaluated fits for DFITTOOL + +% $Revision: 1.1.6.3 $ $Date: 2004/01/24 09:35:33 $ +% Copyright 1993-2004 The MathWorks, Inc. + +plotfig = dfgetset('evaluateFigure'); + +% If no plotting selected, delete the existing figure if there is one +if ~plotFun % && ~plotData + if ~isempty(plotfig) && ishandle(plotfig) +% h = findobj(allchild(plotfig),'flat','serializable','on'); +% delete(h); +% plotaxes = axes('Visible','on', 'XTick',[], 'YTick',[], 'Parent',plotfig); +% text(.5, .5, xlate('Press "Apply" to create a new plot'), ... +% 'Parent',plotaxes, 'HorizontalAlignment','center'); + delete(plotfig); + dfgetset('evaluateFigure',plotfig); + end + return; +end + +% Get the current evaluated results +evaluateResults = dfgetset('evaluateResults'); +x = evaluateResults(:,1); +values = evaluateResults(:,2:end); + +% Get the current information about the evaluated results +evaluateInfo = dfgetset('evaluateInfo'); +fitNames = evaluateInfo.fitNames; +wantBounds = evaluateInfo.wantBounds; +fun = dfgetset('evaluateFun'); + +nfits = length(fitNames); + +% Create plotting figure if it does not yet exist +if isempty(plotfig) || ~ishandle(plotfig) + plotfig = figure('Visible','on', ... + 'IntegerHandle','off',... + 'HandleVisibility','callback',... + 'name','Distribution Fitting Evaluate',... + 'numbertitle','off',... + 'PaperPositionMode','auto',... + 'doublebuffer','on',... + 'CloseRequestFcn',@closefig); + dfgetset('evaluateFigure',plotfig); +end + +% New or old, prepare figure by removing old contents +h = findobj(allchild(plotfig),'flat','serializable','on'); +delete(h); + +plotaxes = axes('Parent',plotfig); + +% Will save fit line handles for legend, but not conf bound handles. +lineHndls = repmat(NaN,nfits,1); + +% % Need to save fit/dataset names and line handles for legend +% nhndls = nfits * (plotFun + plotData); % might multiple count some datasets +% lineHndls = repmat(NaN,nhndls,1); +% lgndNames = cell(nhndls,1); +% lgndOrder = zeros(nhndls,1); +% nfitHndls = nfits * plotFun; % how many fits will be plotted +% lineCnt = nfitHndls; % keep track of how many (unique) legend items + +% If there's only one point to plot, make it more visible. +if isscalar(x) + marker = '.'; +else + marker = 'none'; +end + +fitdb = getfitdb; +for i = 1:nfits + fit = find(fitdb, 'name', fitNames{i}); + getBounds = wantBounds && ... + (~isequal(fit.fittype, 'smooth') && fit.distspec.hasconfbounds); + + if wantBounds + y = values(:,3*i-2); + if getBounds + ylo = values(:,3*i-1); + yup = values(:,3*i); + end + else + y = values(:,i); + end + + % Plot the function (and bounds) for this fit. + if plotFun +% lgndNames{i} = fit.name; + color = fit.ColorMarkerLine{1}; + lineHndls(i) = line(x,y, 'LineStyle','-', ... + 'Marker',marker, 'Color',color, 'Parent',plotaxes); + if getBounds + line(x,ylo, 'LineStyle','--', ... + 'Marker',marker, 'Color',color, 'Parent',plotaxes); + line(x,yup, 'LineStyle','--', ... + 'Marker',marker, 'Color',color, 'Parent',plotaxes); + end + +% % If data are not being plotted, the fits will appear in the legend +% % in their natural order from fitNames. Otherwise, lgndOrder will +% % be modified below so that they will appear under their dataset. +% lgndOrder(i) = i; + end + +% % Plot the empirical function (and bounds) for the data. +% if plotData +% % First figure out if this dataset has already been plotted +% ds = fit.dshandle; +% dataIdx = strmatch(ds.name, lgndNames((nfitHndls+1):lineCnt),'exact'); +% if isempty(dataIdx) +% lineCnt = lineCnt + 1; % one more line (dataset) has been plotted +% dataIdx = lineCnt - nfitHndls; +% lgndNames{lineCnt} = ds.name; +% color = ds.ColorMarkerLine{1}; +% if wantBounds && ~isequal(fun,'icdf') +% % *** this uses the current conf level saved in the dataset, not +% % *** the one requested from the evaluate panel +% [xdata,ydata,ydatabounds] = getplotdata(ds,fun,false); +% lineHndls(lineCnt) = line(xdata,ydata, 'LineStyle','-', ... +% 'Marker','none', 'Color',color, 'Parent',plotaxes); +% line(xdata,ydatabounds(:,1), 'LineStyle','--', ... +% 'Marker','none', 'Color',color, 'Parent',plotaxes); +% line(xdata,ydatabounds(:,2), 'LineStyle','--', ... +% 'Marker','none', 'Color',color, 'Parent',plotaxes); +% else +% [xdata,ydata] = getplotdata(ds,fun); +% lineHndls(lineCnt) = line(xdata,ydata, 'LineStyle','-', ... +% 'Marker','none', 'Color',color, 'Parent',plotaxes); +% end +% +% % Datasets will appear in the legend in their natural order +% % from fitNames, and the corresponding fits will appear +% % directly below. If more than one fit use the same dataset, +% % that dataset will appear only once. +% lgndOrder(lineCnt) = 10000*dataIdx; +% end +% +% % Put the current fit below its dataset in the legend. +% if plotData +% lgndOrder(i) = 10000*dataIdx + i; +% end +% end +end + +if plotFun % || plotData +% set(plotaxes, 'XLim', [min(x), max(x)]); +% [dum,ord] = sort(lgndOrder(1:lineCnt)); +% legend(plotaxes,lineHndls(ord),lgndNames(ord),0); + legend(plotaxes,lineHndls,fitNames,0); + figure(plotfig); +end + +% ------------------------------ +function closefig(varargin) +%CLOSEFIG Close this figure, but also update check box in evaluate panel + +% Delete the figure containing the evaluate plot +h = gcbf; +if ~isempty(h) && ishandle(h) + delete(h); +end + +% Update the checkbox +evp = com.mathworks.toolbox.stats.Evaluate.getEvaluatePanel; +if ~isempty(evp) && ~isequal(evp,0) + evp.setPlotCB(false); +end diff --git a/boosting/weightedstats/private/dfexport2workspace.m b/boosting/weightedstats/private/dfexport2workspace.m new file mode 100644 index 0000000..e64aeee --- /dev/null +++ b/boosting/weightedstats/private/dfexport2workspace.m @@ -0,0 +1,11 @@ +function dfexport2workspace() +% DFEXPORT2WORKSPACE Helper file for the Distribution Fitting tool +% DFEXPORT2WORKSPACE gets the saved evaluated results and passes them to +% export2wsdlg + +% Copyright 2003-2004 The MathWorks, Inc. +% $Revision: 1.1.6.2 $ + +results = dfgetset('evaluateResults'); +export2wsdlg({'Save evaluate results to a MATLAB variable named:'}, ... + {'evaluateresults'}, {results}); diff --git a/boosting/weightedstats/private/dffig2m.m b/boosting/weightedstats/private/dffig2m.m new file mode 100644 index 0000000..251be92 --- /dev/null +++ b/boosting/weightedstats/private/dffig2m.m @@ -0,0 +1,880 @@ +function dffig2m(dffig,outfilename) +%DFFIG2M Turn figure into an M file that can produce the figure + +% $Revision: 1.1.6.12 $ $Date: 2004/03/22 23:55:33 $ +% Copyright 2003-2004 The MathWorks, Inc. + +dsdb = getdsdb; +fitdb = getfitdb; +if isempty(down(dsdb)) && isempty(down(fitdb)) + emsg = 'Cannot save M file when no datasets or fits exist.'; + errordlg(emsg,'Error Saving M File','modal'); + return +end + +if nargin<1 + dffig = dfgetset('dffig'); +end + +if nargin<2 + % Get file name to use, remember the directory name + olddir = dfgetset('dirname'); + filespec = [olddir '*.m']; + [outfilename,pn] = uiputfile(filespec,'Save M File'); + if isequal(outfilename,0) || isequal(pn,0) + return + end + if ~ismember('.',outfilename) + outfilename = [outfilename '.m']; + end + dfgetset('dirname',pn); + outfilename = sprintf('%s%s',pn,outfilename); +end + +% Get M file name with .m suffix, and get corresponding function name +if length(outfilename)<2 || ~isequal(outfilename(end-1:end),'.m') + outfilename = sprintf('%s.m',outfilename); +end +fcnname = outfilename(1:end-2); +k = max(find(fcnname(1:end-1)=='\')); +if ~isempty(k) + fcnname = fcnname(k+1:end); +end +k = max(find(fcnname(1:end-1)=='/')); +if ~isempty(k) + fcnname = fcnname(k+1:end); +end + + +% Set up some variables for later +allprop = {'Color' 'Marker' 'LineStyle' 'LineWidth' 'MarkerSize'}; +showlegend = isequal(dfgetset('showlegend'),'on'); +ftype = dfgetset('ftype'); +alpha = 1 - dfgetset('conflev'); + +% Create arrays to receive code text +blkc = cell(0,1); % block of comment lines +blks = cell(0,1); % block of setup lines +blkd = cell(0,1); % block of data-related lines +blkf = cell(0,1); % block of fit-related lines +blke = cell(0,1); % block of lines at end + +% Write introduction to dataset section, including figure +% preparation code +blks{end+1} = '% Set up figure to receive datasets and fits'; +blks{end+1} = 'f_ = clf;'; +blks{end+1} = 'figure(f_);'; +if showlegend + blks{end+1} = 'legh_ = []; legt_ = {}; % handles and text for legend'; +end + +% Process each dataset +exprlist = {}; % names and expressions of the data, censoring, frequency +arglist = {}; % variable names to use for each expression +ds = down(dsdb); +numds = 0; +while(~isempty(ds)) + numds = numds + 1; + [blkc,blkd,exprlist,arglist,showbounds,onplot] = ... + writedset(blkc,blkd,ds,exprlist,arglist,allprop,alpha); + + if onplot && showlegend + blkd{end+1} = 'legh_(end+1) = h_;'; + blkd{end+1} = sprintf('legt_{end+1} = ''%s'';',quotedtext(ds.name)); + if showbounds + blkd{end+1} = 'legh_(end+1) = hb_;'; + blkd{end+1} = sprintf('legt_{end+1} = ''%g%% confidence bounds'';',... + 100*(1-alpha)); + end + end + ds = right(ds); +end + +% Set up for plotting fits +anycontinuous = false; +anydiscrete = false; +ft = down(fitdb); +while(~isempty(ft)) + if ft.iscontinuous + anycontinuous = true; + else + anydiscrete = true; + end + ft = right(ft); +end + +% Create a suitable X vector, may depend on whether it's discrete +if ~isequal(ftype,'pdf') || ~anydiscrete + blkf{end+1} = sprintf('x_ = linspace(xlim_(1),xlim_(2),100);'); +elseif ~anycontinuous + blkf{end+1} = 'incr_ = max(1,floor((xlim_(2)-xlim_(1))/100));'; + blkf{end+1} = 'x_ = floor(xlim_(1)):incr_:ceil(xlim_(2));'; +else + blkf{end+1} = sprintf('xc_ = linspace(xlim_(1),xlim_(2),100);'); + blkf{end+1} = 'incr_ = max(1,floor((xlim_(2)-xlim_(1))/100));' + blkf{end+1} = 'xd_ = floor(xlim_(1)):incr_:ceil(xlim_(2));'; +end + +% Process each fit +numfit = 0; +ft = down(fitdb); +anySmoothFits = false; +while(~isempty(ft)) + numfit = numfit+1; + fitname = ft.name; + + % Create code to re-create this fit + blkf{end+1} = sprintf('\n%% --- Create fit "%s"',fitname); + + % Call subfunction to generate code for each type + if isequal(getfittype(ft),'param') + [blkf,showbounds,onplot] = writepfit(blkf,ft,alpha,allprop,... + anycontinuous,anydiscrete,exprlist,arglist); + else + anySmoothFits = true; + [blkf,onplot] = writenpfit(blkf,ft,alpha,allprop,... + anycontinuous,anydiscrete,exprlist,arglist); + showbounds = false; + end + + % Add legend if requested + if onplot && showlegend + blkf{end+1} = 'legh_(end+1) = h_;'; + blkf{end+1} = sprintf('legt_{end+1} = ''%s'';',quotedtext(ft.name)); + if showbounds + blkf{end+1} = 'legh_(end+1) = hb_;'; + blkf{end+1} = sprintf('legt_{end+1} = ''%g%% confidence bounds'';',... + 100*(1-alpha)); + end + end + ft = right(ft); +end + +% In setup section, create empty axes and set some properties +if ~isequal(ftype,'probplot') + blks{end+1} = 'ax_ = newplot;'; +else + dtype = dfgetset('dtype'); + if ischar(dtype) + blks{end+1} = sprintf('probplot(''%s'');', dtype); + else + blks{end+1} = sprintf(... + 'dist_ = dfswitchyard(''dfgetdistributions'',''%s'');',... + dtype.distspec.code); + blks{end+1} = sprintf('probplot({dist_,%s});',... + cell2text(num2cell(dtype.params))); + end + blks{end+1} = 'ax_ = gca;'; + blks{end+1} = 'title(ax_,'''');'; +end + +blks{end+1} = 'set(ax_,''Box'',''on'');'; +if isequal(dfgetset('showgrid'),'on') + blks{end+1} = 'grid(ax_,''on'');'; +end +blks{end+1} = 'hold on;'; + +% At end of data set section, set x axis limits +blkd{end+1} = sprintf('\n%% Nudge axis limits beyond data limits'); +blkd{end+1} = 'xlim_ = get(ax_,''XLim'');'; +blkd{end+1} = 'if all(isfinite(xlim_))'; +blkd{end+1} = ' xlim_ = xlim_ + [-1 1] * 0.01 * diff(xlim_);'; +blkd{end+1} = ' set(ax_,''XLim'',xlim_)'; +blkd{end+1} = 'end'; + + +% Finish up +blke{end+1} = 'hold off;'; +if showlegend + axold = get(dffig,'CurrentAxes'); + legh = legend('-find',axold); + if isempty(legh) || ~ishandle(legh) + oldpos = [.8 .8 .1 .1]; % close to TR position + else + oldpos = get(legh,'Position'); + end + if oldpos(1)<.4 + newpos = 'NorthWest'; + else + newpos = 'NorthEast'; + end + + blke{end+1} = sprintf('legend(ax_,legh_, legt_, ''Location'',''%s'');',newpos); +end + +% Write code into m file +if length(arglist)==0 + argtext = ''; +else + argtext = sprintf('%s,',arglist{:}); + argtext = sprintf('(%s)',argtext(1:end-1)); +end +[fid,msg] = fopen(outfilename,'w'); +if fid==-1 + emsg = sprintf('Error trying to write to %s:\n%s',outfilename,msg); + errordlg(emsg,'Error Saving M File','modal'); + return +end +fprintf(fid,'function %s%s\n',fcnname,argtext); +fprintf(fid,'%%%s Create plot of datasets and fits\n',upper(fcnname)); +fprintf(fid,'%% %s%s\n',upper(fcnname),upper(argtext)); +fprintf(fid,'%% Creates a plot, similar to the plot in the main distribution fitting\n'); +fprintf(fid,'%% window, using the data that you provide as input. You can\n'); +fprintf(fid,'%% apply this function to the same data you used with dfittool\n'); +fprintf(fid,'%% or with different data. You may want to edit the function to\n'); +fprintf(fid,'%% customize the code and this help message.\n'); +fprintf(fid,'%%\n'); +fprintf(fid,'%% Number of datasets: %d\n',numds); +fprintf(fid,'%% Number of fits: %d\n',numfit); +fprintf(fid,'\n'); +fprintf(fid,'%% This function was automatically generated on %s\n',... + datestr(now)); +for j=1:length(blkc) + fprintf(fid,'%s\n',xlate(blkc{j})); +end +fprintf(fid,'\n'); +for j=1:length(blks) + fprintf(fid,'%s\n',xlate(blks{j})); +end +fprintf(fid,'\n'); +for j=1:length(blkd) + fprintf(fid,'%s\n',xlate(blkd{j})); +end +fprintf(fid,'\n'); +for j=1:length(blkf) + fprintf(fid,'%s\n',xlate(blkf{j})); +end +fprintf(fid,'\n'); +for j=1:length(blke) + fprintf(fid,'%s\n',xlate(blke{j})); +end + +% Create sub function to be used to support a functionline fit on a probability plot +if anySmoothFits && isequal(ftype,'probplot') + fprintf(fid,'\n\n%% -----------------------------------------------\n'); + fprintf(fid,'function f=cdfnp(x,y,cens,freq,support,kernel,width)\n'); + fprintf(fid,'%%CDFNP Compute cdf for non-parametric fit, used in probability plot\n\n'); + fprintf(fid,'f = ksdensity(y,x,''cens'',cens,''weight'',freq,''function'',''cdf'',...\n'); + fprintf(fid,' ''support'',support,''kernel'',kernel,''width'',width);\n'); +end + +fclose(fid); + +% ------------------- double up quotes in text string +function a = quotedtext(b) +if ischar(b) + a = strrep(b,'''',''''''); +else + a = sprintf('%.13g',b); +end + +% ------------------- create text to re-create cell or numeric array +function a = cell2text(b) + +if ~iscell(b) + if ischar(b) + a = sprintf('''%s''',quotedtext(b)); + elseif length(b)==1 + a = sprintf('%.13g',b); + else + numtext = num2str(b,'%.13g '); + if size(numtext,1)>1 + numtext = [numtext repmat(';',size(numtext,1),1)]'; + numtext = numtext(:)'; + numtext = numtext(1:end-1); + end + a = sprintf('[%s]',numtext); + end + return +end + +if length(b)>0 + bj = b{1}; + if ischar(bj) + a = sprintf('''%s''',quotedtext(bj)); + else + a = sprintf('%.13g',bj); + end + for j=2:length(b) + bj = b{j}; + if ischar(bj) + a = sprintf('%s, ''%s''',a,quotedtext(bj)); + else + a = sprintf('%s, %.13g',a,bj); + end + end +else + a = ''; +end +a = sprintf('[%s]',a); + + +% ----------------- add censoring and frequency args to code block +function blk = addcensfreq(blk,censname,freqname) + +if ~isempty(censname) && ~isequal(censname,'[]') + blk{end+1} = sprintf(' ,''cens'',%s...',censname); +end +if ~isempty(freqname) && ~isequal(freqname,'[]') + blk{end+1} = sprintf(' ,''freq'',%s...',freqname); +end + + +% ---------------- write code for parametric fit +function [blkf,showbounds,onplot] = ... + writepfit(blkf,ft,alpha,allprop,anycontinuous,anydiscrete,exprlist,arglist) + +ds = ft.ds; +yname = expression2name(ds.yname,exprlist,arglist); +dist = ft.distspec; +ftype = ft.ftype; +showbounds = false; +onplot = true; + +blkf{end+1} = sprintf('\n%% Fit this distribution to get parameter values'); +[censname,freqname] = getcensfreqname(ds,exprlist,arglist); +shortform = isempty(censname) & isempty(freqname); + +% Exclude data if necessary +if ~isempty(ft.exclusionrule) + [blkf,yname,censname,freqname] = applyexclusion(blkf,ft.exclusionrule,... + yname,censname,freqname); +end + +if isempty(censname) + censname = '[]'; +end +if isempty(freqname) + freqname = '[]'; +end + +% Helpful note about using old results instead of fitting new data +if isequal(getfittype(ft),'param') + blkf{end+1} = sprintf('%% To use parameter estimates from the original fit:'); + blkf{end+1} = sprintf('%% p_ = %s;', cell2text(num2cell(ft.params))); +end + +nparams = length(dist.pnames); + +if shortform + arglist = sprintf('%s, %g',yname,alpha); +else + arglist = sprintf('%s, %g, %s, %s',yname,alpha,censname,freqname); +end + +fname = func2str(dist.fitfunc); +onpath = exist(fname); +if onpath + rhs = sprintf('%s(%s);',fname,arglist); +else + rhs = sprintf('mle(''%s'',%s); %% Fit %s distribution',... + dist.code,arglist,dist.name); +end +if dist.paramvec + blkf{end+1} = sprintf('p_ = %s;',rhs); +else + blkf{end+1} = sprintf('pargs_ = cell(1,%d);',nparams); + blkf{end+1} = sprintf('[pargs_{:}] = %s',rhs); + blkf{end+1} = 'p_ = [pargs_{:}];'; +end + +pargs = 'p_(1)'; +if nparams>1 + pargs = [pargs, sprintf(', p_(%d)',2:nparams)]; +end + +% Get covariance matrix if we need confidence bounds +if ft.showbounds && ismember(ftype,{'cdf' 'survivor' 'cumhazard' 'icdf'}) + showbounds = true; +else + showbounds = false; +end + +% Sometimes we need the structure that describes the distribution +if ~onpath && (showbounds || isequal(ftype,'probplot')) + blkf{end+1} = sprintf(... + '\n%% Get a description of the %s distribution',... + dist.name); + blkf{end+1} = sprintf(... + 'dist_ = dfswitchyard(''dfgetdistributions'',''%s'');\n',... + dist.code); +end + +if showbounds + if onpath + blkf{end+1} = sprintf('[NlogL_,pcov_] = %s(p_,%s,%s,%s);',... + func2str(dist.likefunc),yname, censname, freqname); + else + blkf{end+1} = sprintf(... + '[NlogL_,pcov_] = feval(dist_.likefunc,p_,%s,%s,%s);',... + yname, censname, freqname); + end +end + +% Plot the fit and bounds if the original figure had them plotted +if isempty(ft.line) || ~ishandle(ft.line) + blkf{end+1} = '% This fit does not appear on the plot'; + onplot = false; + return; +end + +propvals = get(ft.line,allprop); +[c,m,l,w,s] = deal(propvals{:}); + +switch(ftype) + case {'pdf'} + if anycontinuous && anydiscrete + if ft.iscontinuous + blkf{end+1} = 'x_ = xc_;'; + else + blkf{end+1} = 'x_ = xd_;'; + end + end + if onpath + blkf{end+1} = sprintf('y_ = %s(x_,%s);',func2str(dist.pdffunc),pargs); + else + blkf{end+1} = sprintf('y_ = pdf(''%s'',x_,%s);',dist.code,pargs); + end + + blkf{end+1} = sprintf('h_ = plot(x_,y_,''Color'',[%g %g %g],...',... + c(1),c(2),c(3)); + blkf{end+1} = sprintf(' ''LineStyle'',''%s'', ''LineWidth'',%d,...',l,w); + blkf{end+1} = sprintf(' ''Marker'',''%s'', ''MarkerSize'',%d);',m,s); + + case {'cdf' 'survivor' 'cumhazard' 'icdf'} + if isequal(ftype,'icdf') + if onpath + prefix = sprintf('%s(',func2str(dist.invfunc)); + else + prefix = sprintf('icdf(''%s'',',dist.code); + end + else + if onpath + prefix = sprintf('%s(',func2str(dist.cdffunc)); + else + prefix = sprintf('cdf(''%s'',',dist.code); + end + end + + if showbounds + blkf{end+1} = sprintf('[y_,yL_,yU_] = %sx_,%s,pcov_,%g); %% cdf and bounds',... + prefix,pargs,alpha); + else + blkf{end+1} = sprintf('y_ = %sx_,%s); %% compute cdf',... + prefix,pargs); + end + + if isequal(ftype,'survivor') + blkf{end+1} = 'y_ = 1 - y_; % convert to survivor function'; + if showbounds + blkf{end+1} = 'tmp_ = yL_;'; + blkf{end+1} = 'yL_ = 1 - yU_;'; + blkf{end+1} = 'yU_ = 1 - tmp_;'; + end + elseif isequal(ftype,'cumhazard') + blkf{end+1} = 't_ = (y_ < 1); % only where the hazard is finite'; + blkf{end+1} = 'x_ = x_(t_);'; + blkf{end+1} = 'y_ = -log(1 - y_(t_));'; + if showbounds + blkf{end+1} = 'if ~isempty(yL_)'; + blkf{end+1} = ' tmp_ = yL_;'; + blkf{end+1} = ' yL_ = -log(1 - yU_(t_));'; + blkf{end+1} = ' yU_ = -log(1 - tmp_(t_));'; + blkf{end+1} = 'end'; + end + end + + blkf{end+1} = sprintf('h_ = plot(x_,y_,''Color'',[%g %g %g],...',... + c(1),c(2),c(3)); + blkf{end+1} = sprintf(' ''LineStyle'',''%s'', ''LineWidth'',%d,...',l,w); + blkf{end+1} = sprintf(' ''Marker'',''%s'', ''MarkerSize'',%d);',m,s); + + if showbounds + blkf{end+1} = 'if ~isempty(yL_)'; + blkf{end+1} = sprintf(' hb_ = plot([x_(:); NaN; x_(:)], [yL_(:); NaN; yU_(:)],''Color'',[%g %g %g],...',... + c(1),c(2),c(3)); + blkf{end+1} = ' ''LineStyle'','':'', ''LineWidth'',1,...'; + blkf{end+1} = ' ''Marker'',''none'');'; + blkf{end+1} = 'end'; + end + + case 'probplot' + if onpath + stmt = sprintf('h_ = probplot(ax_,@%s,p_);', ... + func2str(dist.cdffunc)); + else + stmt = 'h_ = probplot(ax_,dist_.cdffunc,p_);'; + end + blkf{end+1} = stmt; + blkf{end+1} = sprintf('set(h_,''Color'',[%g %g %g],''LineStyle'',''%s'', ''LineWidth'',%d);', ... + c(1),c(2),c(3),l,w); +end + + +% ---------------- write code for nonparametric fit +function [blkf,onplot] = ... + writenpfit(blkf,ft,alpha,allprop,anycontinuous,anydiscrete,exprlist,arglist) + +ds = ft.ds; +yname = expression2name(ds.yname,exprlist,arglist); +ftype = ft.ftype; + +[censname,freqname] = getcensfreqname(ds,exprlist,arglist); +shortform = isempty(censname) & isempty(freqname); + +% Exclude data if necessary +if ~isempty(ft.exclusionrule) + [blkf,yname,censname,freqname] = applyexclusion(blkf,ft.exclusionrule,... + yname,censname,freqname); +end + +if isempty(censname) + censname = '[]'; +end +if isempty(freqname) + freqname = '[]'; +end + +kernel = sprintf('''%s''',ft.kernel); +if ft.bandwidthradio == 0 + width = '[]'; +else + width = ft.bandwidthtext; +end +if ischar(ft.support) + spt = sprintf('''%s''',ft.support); +else + spt = sprintf('[%g, %g]',ft.support); +end + +% Plot the fit and bounds if the original figure had them plotted +if isempty(ft.line) || ~ishandle(ft.line) + blkf{end+1} = '% This fit does not appear on the plot'; + onplot = false; + return; +end +onplot = true; + +propvals = get(ft.line,allprop); +[c,m,l,w,s] = deal(propvals{:}); + +switch(ftype) + case {'pdf' 'icdf' 'cdf' 'survivor' 'cumhazard'} + if isequal(ftype,'pdf') && anycontinuous && anydiscrete + blkf{end+1} = 'x_ = xc_;'; + end + + blkf{end+1} = sprintf('y_ = ksdensity(%s,x_,''kernel'',%s,...',... + yname,kernel); + if ~shortform + blkf{end+1} = sprintf(' ''cens'',%s,''weight'',%s,...',... + censname,freqname); + end + + if ~isequal(ft,'unbounded') + blkf{end+1} = sprintf(' ''support'',%s,...',spt); + end + if ~isequal(width,'[]') + blkf{end+1} = sprintf(' ''width'',%s,...',width); + end + + blkf{end+1} = sprintf(' ''function'',''%s'');',ftype); + blkf{end+1} = sprintf('h_ = plot(x_,y_,''Color'',[%g %g %g],...',... + c(1),c(2),c(3)); + blkf{end+1} = sprintf(' ''LineStyle'',''%s'', ''LineWidth'',%d,...',l,w); + blkf{end+1} = sprintf(' ''Marker'',''%s'', ''MarkerSize'',%d);',m,s); + + + case 'probplot' + blkf{end+1} = sprintf('npinfo_ = {%s %s %s %s %s %s};',... + yname,censname,freqname,spt,kernel,width); + + blkf{end+1} = 'h_ = probplot(ax_,@cdfnp,npinfo_);'; + blkf{end+1} = sprintf('set(h_,''Color'',[%g %g %g],''LineStyle'',''%s'', ''LineWidth'',%d);', ... + c(1),c(2),c(3),l,w); +end + + +% --------------- write code for data set +function [blkc,blkd,exprlist,arglist,showbounds,onplot] = ... + writedset(blkc,blkd,ds,exprlist,arglist,allprop,alpha) + +dsname = ds.name; +yname = ds.yname; +[censname,freqname] = getcensfreqname(ds); +newnames = {yname censname freqname}; +newvars = false(1,3); +ftype = ds.ftype; +showbounds = false; +onplot = true; + +% Create comment text associating dataset with variable names +blkc{end+1} = ' '; +blkc{end+1} = sprintf('%% Data from dataset "%s":',dsname); + +% Each non-empty variable name becomes a function argument, +% except expressions that are not valid variable names have +% to be replaced by a variable name that we will select here +descrtext = {'Y' 'Censoring' 'Frequency'}; +for j=1:3 + exprj = newnames{j}; + if isempty(exprj) + continue; + end + exprnum = strmatch(exprj,exprlist,'exact'); + if isempty(exprnum); + exprnum = length(exprlist) + 1; + exprlist{exprnum} = exprj; + if isvarname(exprj) + namej = exprj; + else + namej = sprintf('arg_%d',exprnum); + end + arglist{exprnum} = namej; + newvars(j) = true; + else + namej = arglist{exprnum}; + end + if isequal(namej,exprj) + suffix = ''; + else + suffix = sprintf(' (originally %s)',exprj); + end + + blkc{end+1} = sprintf('%% %s = %s%s',descrtext{j},namej,suffix); + newnames{j} = namej; +end + +yname = newnames{1}; +censname = newnames{2}; +freqname = newnames{3}; +havecens = ~isempty(censname); +if ~havecens + censname = '[]'; +end +havefreq = ~isempty(freqname); +if ~havefreq + freqname = '[]'; +end + +blkc{end+1} = ' '; +blkc{end+1} = '% Remove missing values'; +blkc{end+1} = sprintf('t_ = ~isnan(%s);', yname); +if havecens + blkc{end+1} = sprintf('if ~isempty(%s), t_ = t_ & ~isnan(%s); end',... + censname, censname); +end +if havefreq + blkc{end+1} = sprintf('if ~isempty(%s), t_ = t_ & ~isnan(%s); end',... + freqname, freqname); +end +blkc{end+1} = sprintf('%s = %s(t_);', yname, yname); +if havecens + blkc{end+1} = sprintf('if ~isempty(%s), %s = %s(t_); end',... + censname, censname, censname); +end +if havefreq + blkc{end+1} = sprintf('if ~isempty(%s), %s = %s(t_); end',... + freqname, freqname, freqname); +end + + +% Create code to plot this dataset into the figure we have created +blkd{end+1} = ' '; +blkd{end+1} = sprintf('%% --- Plot data originally in dataset "%s"',dsname); +for j=1:3 + if newvars(j) + blkd{end+1} = sprintf('%s = %s(:);',newnames{j},newnames{j}); + end +end +dsline = ds.line; +if isempty(dsline) || ~ishandle(dsline) + blkd{end+1} = '% This dataset does not appear on the plot'; + onplot = false; + return; +end + +propvals = get(dsline,allprop); +[c,m,l,w,s] = deal(propvals{:}); +switch(ftype) + case 'pdf' + % Generate code to compute the empirical cdf + blkd{end+1} = sprintf('[F_,X_] = ecdf(%s,''Function'',''cdf''...', yname); + if havecens + blkd{end+1} = sprintf(' ,''cens'',%s...',censname); + end + if havefreq + blkd{end+1} = sprintf(' ,''freq'',%s...',freqname); + end + blkd{end+1} = ' ); % compute empirical cdf'; + + % Generate code to duplicate the current histogram bin width selection + bininfo = ds.binDlgInfo; + if isempty(bininfo) % use default in case this is empty + bininfo.rule = 1; + end + blkd{end+1} = sprintf('Bin_.rule = %d;', bininfo.rule); + + switch bininfo.rule + case 3 + blkd{end+1} = sprintf('Bin_.nbins = %d;',bininfo.nbins); + + case 5 + blkd{end+1} = sprintf('Bin_.width = %g;',bininfo.width); + blkd{end+1} = sprintf('Bin_.placementRule = %d;',bininfo.placementRule); + if bininfo.placementRule ~= 1 + blkd{end+1} = sprintf('Bin_.anchor = %g;',bininfo.anchor); + end + end + + blkd{end+1} = sprintf('[C_,E_] = dfswitchyard(''dfhistbins'',%s,%s,%s,Bin_,F_,X_);',... + yname,censname,freqname); + + % Generate code to produce the histogram + blkd{end+1} = '[N_,C_] = ecdfhist(F_,X_,''edges'',E_); % empirical pdf from cdf'; + blkd{end+1} = 'h_ = bar(C_,N_,''hist'');'; + blkd{end+1} = sprintf('set(h_,''FaceColor'',''none'',''EdgeColor'',[%g %g %g],...', ... + c(1),c(2),c(3)); + blkd{end+1} = sprintf(' ''LineStyle'',''%s'', ''LineWidth'',%d);', ... + l,w); + blkd{end+1} = 'xlabel(''Data'');'; + blkd{end+1} = 'ylabel(''Density'')'; + + case {'cdf' 'survivor' 'cumhazard'} + showbounds = ds.showbounds; + if showbounds + blkd{end+1} = sprintf('[Y_,X_,yL_,yU_] = ecdf(%s,''Function'',''%s'',''alpha'',%g...',... + yname, ftype,alpha); + else + blkd{end+1} = sprintf('[Y_,X_] = ecdf(%s,''Function'',''%s''...',... + yname, ftype); + end + blkd = addcensfreq(blkd,censname,freqname); + blkd{end+1} = ' ); % compute empirical function'; + blkd{end+1} = 'h_ = stairs(X_,Y_);'; + blkd{end+1} = sprintf('set(h_,''Color'',[%g %g %g],''LineStyle'',''%s'', ''LineWidth'',%d);', ... + c(1),c(2),c(3),l,w); + if showbounds + blkd{end+1} = '[XX1_,YY1_] = stairs(X_,yL_);'; + blkd{end+1} = '[XX2_,YY2_] = stairs(X_,yU_);'; + blkd{end+1} = 'hb_ = plot([XX1_(:); NaN; XX2_(:)], [YY1_(:); NaN; YY2_(:)],...'; + blkd{end+1} = sprintf(' ''Color'',[%g %g %g],''LineStyle'','':'', ''LineWidth'',1);', ... + c(1),c(2),c(3)); + end + blkd{end+1} = 'xlabel(''Data'');'; + switch(ftype) + case 'cdf', blkd{end+1} = 'ylabel(''Cumulative probability'')'; + case 'survivor', blkd{end+1} = 'ylabel(''Survivor function'')'; + case 'cumhazard', blkd{end+1} = 'ylabel(''Cumulative hazard'')'; + end + + case 'icdf' + blkd{end+1} = sprintf('[Y_,X_] = ecdf(%s,''Function'',''cdf''...', yname); + blkd = addcensfreq(blkd,censname,freqname); + blkd{end+1} = ' ); % compute empirical cdf'; + blkd{end+1} = 'h_ = stairs(Y_,[X_(2:end);X_(end)]);'; + blkd{end+1} = sprintf('set(h_,''Color'',[%g %g %g],''LineStyle'',''%s'', ''LineWidth'',%d);', ... + c(1),c(2),c(3),l,w); + blkd{end+1} = 'xlabel(''Probability'');'; + blkd{end+1} = 'ylabel(''Quantile'')'; + + case 'probplot' + blkd{end+1} = sprintf('h_ = probplot(ax_,%s...', yname); + blkd = addcensfreq(blkd,censname,freqname); + blkd{end+1} = ' ,''noref''); % add to probability plot'; + blkd{end+1} = sprintf('set(h_,''Color'',[%g %g %g],''Marker'',''%s'', ''MarkerSize'',%d);', ... + c(1),c(2),c(3),m,s); + blkd{end+1} = 'xlabel(''Data'');'; + blkd{end+1} = 'ylabel(''Probability'')'; +end + + +% ----------------------------- +function [blkf,yname,censname,freqname]=applyexclusion(blkf,exclrule,... + yname,censname,freqname); +%APPLYEXCLUSION Change var names to use indexing to apply exclusion rule + +% Create expressions for inclusion rules +if isempty(exclrule.ylow) + e1 = ''; +else + ylow = str2double(exclrule.ylow); + if exclrule.ylowlessequal==1 + e1 = sprintf('%s > %g', yname, ylow); + else + e1 = sprintf('%s >= %g', yname, ylow); + end +end +if isempty(exclrule.yhigh) + e2 = ''; +else + yhigh = str2double(exclrule.yhigh); + if exclrule.yhighgreaterequal==1 + e2 = sprintf('%s < %g', yname, yhigh); + else + e2 = sprintf('%s <= %g', yname, yhigh); + end +end + +% Combine exclusion expressions +if isempty(e1) + if isempty(e2) + etxt = ''; + else + etxt = e2; + end +else + if isempty(e2) + etxt = e1; + else + etxt = sprintf('%s & %s',e1,e2); + end +end + +% Create code to generate index vector and reduce all variables +if ~isempty(etxt) + blkf{end+1} = sprintf('\n%% Create vector for exclusion rule ''%s''',... + exclrule.name); + blkf{end+1} = '% Vector indexes the points that are included'; + blkf{end+1} = sprintf('excl_ = (%s);\n', etxt); + + yname = sprintf('%s(excl_)',yname); + if ~isempty(censname) + censname = sprintf('%s(excl_)',censname); + end + if ~isempty(freqname) + freqname = sprintf('%s(excl_)',freqname); + end +end + +% ----------------------------------------- +function [censname,freqname] = getcensfreqname(ds,exprlist,arglist) +%GETCENSFREQNAME Get censoring and freqency names + +censname = ds.censname; +freqname = ds.freqname; +if strcmp(censname,'(none)') + censname = ''; +end +if strcmp(freqname,'(none)') + freqname = ''; +end + +if nargin>=3 + censname = expression2name(censname,exprlist,arglist); + freqname = expression2name(freqname,exprlist,arglist); +end + + +% ------------------------------------------- +function nm = expression2name(expr,exprlist,arglist) +%EXPRESSION2NAME Find out what name we're using in place of this expression + +nm = expr; +if ~isempty(expr) + j = strmatch(expr,exprlist,'exact'); + if isscalar(j) + nm = arglist{j}; + end +end + \ No newline at end of file diff --git a/boosting/weightedstats/private/dfgetbinwidthdefaults.m b/boosting/weightedstats/private/dfgetbinwidthdefaults.m new file mode 100644 index 0000000..ccc4700 --- /dev/null +++ b/boosting/weightedstats/private/dfgetbinwidthdefaults.m @@ -0,0 +1,14 @@ +function [v0, v1, v2, v3, v4, v5, v6] = dfgetbinwidthdefaults() +% DFGETBINWIDTHDEFAULTS Helper function for the dfittool set default bin width panel + +% Copyright 2003-2004 The MathWorks, Inc. +% $Revision: 1.1.6.2 $ $Date: 2004/01/24 09:35:35 $ + +binDlgInfo = dfgetset('binDlgInfo'); +v0 = binDlgInfo.rule; +v1 = binDlgInfo.nbinsExpr; +v2 = binDlgInfo.widthExpr; +v3 = binDlgInfo.placementRule; +v4 = binDlgInfo.anchorExpr; +v5 = binDlgInfo.applyToAll; +v6 = binDlgInfo.setDefault; diff --git a/boosting/weightedstats/private/dfgetdistributions.m b/boosting/weightedstats/private/dfgetdistributions.m new file mode 100644 index 0000000..08291ca --- /dev/null +++ b/boosting/weightedstats/private/dfgetdistributions.m @@ -0,0 +1,343 @@ +function [s,errid] = dfgetdistributions(distname,douser) +%DFGETDISTRIBUTIONS Get structure defining the distributions supported by dfittool + +% $Revision: 1.1.6.8 $ $Date: 2004/01/24 09:35:36 $ +% Copyright 2003-2004 The MathWorks, Inc. + +errid = ''; + +% If a struct was passed in, store this for later use +if nargin>0 && isstruct(distname) + dfgetset('alldistributions',distname); + return +end + +% Get old value if already created and stored +s = dfgetset('alldistributions'); + +% If not created yet, create it now +if isempty(s) + % Get built-in distributions + s = getbuiltins; + + if nargin<2 || douser + % Get user-defined distributions (won't be done if we already + % had a distribution list created before this function was called) + [s,errid,errmsg] = dfgetuserdists(s); + if ~isempty(errid) + errordlg(errmsg,'DFITTOOL User-Defined Distributions','modal'); + end + end + + % Sort by name + lowernames = lower(strvcat(s.name)); + [ignore, ind] = sortrows(lowernames); + s = s(ind); + + % Store it for next time + dfgetset('alldistributions',s); +end + +if nargin>0 && ~isempty(distname) + % Return only the distribution(s) requested, not all of them + allnames = {s.code}; + distnum = strmatch(lower(distname), allnames); + s = s(distnum); +end + + +% ------------------------------------ +function s = getbuiltins +%GETBUILTINS Get distributions functions provided off the shelf + +ndists = 11; % to be updated if distributions added or removed +s(ndists).name = ''; + +% Exponential distribution +j = 1; +s(j).name = 'Exponential'; % distribution name +s(j).code = 'exponential'; % distribution code name +s(j).pnames = {'mu'}; % parameter names +s(j).pdescription = {'scale'}; % parameter descriptions +s(j).prequired = false; % is a value required for this parameter? +s(j).fitfunc = @expfit; % fitting function +s(j).likefunc = @explike; % likelihood (and covariance) function +s(j).cdffunc = @expcdf; % cdf function +s(j).pdffunc = @exppdf; % pdf function +s(j).invfunc = @expinv; % inverse cdf function +s(j).statfunc = @expstat; % function to compute mean and var +s(j).loginvfunc = []; % inverse cdf function on log scale, if any +s(j).logcdffunc = []; % cdf function on log scale, if any +s(j).hasconfbounds = true; % supports conf bnds for cdf and inverse +s(j).censoring = true; % supports censoring +s(j).paramvec = true; % returns fitted parameters as a vector +s(j).support = [0 Inf]; % range of x with positive density +s(j).closedbound = [true false];% is x at this boundary point acceptable +s(j).iscontinuous = true; % is continuous, not discrete +s(j).islocscale = true; % is location/scale family, no shape param +s(j).uselogpp = false; % use log scale for probability plot + +% Extreme value +j = j+1; +s(j).name = 'Extreme value'; +s(j).code = 'extreme value'; +s(j).pnames = {'mu' 'sigma'}; +s(j).pdescription = {'location' 'scale'}; +s(j).prequired = [false false]; +s(j).fitfunc = @evfit; +s(j).likefunc = @evlike; +s(j).cdffunc = @evcdf; +s(j).pdffunc = @evpdf; +s(j).invfunc = @evinv; +s(j).statfunc = @evstat; +s(j).loginvfunc = []; +s(j).logcdffunc = []; +s(j).hasconfbounds = true; +s(j).censoring = true; +s(j).paramvec = true; +s(j).support = [-Inf Inf]; +s(j).closedbound = [false false]; +s(j).iscontinuous = true; +s(j).islocscale = true; +s(j).uselogpp = false; + +% Gamma +j = j+1; +s(j).name = 'Gamma'; +s(j).code = 'gamma'; +s(j).pnames = {'a' 'b'}; +s(j).pdescription = {'shape' 'scale'}; +s(j).prequired = [false false]; +s(j).fitfunc = @gamfit; +s(j).likefunc = @gamlike; +s(j).cdffunc = @gamcdf; +s(j).pdffunc = @gampdf; +s(j).invfunc = @gaminv; +s(j).statfunc = @gamstat; +s(j).loginvfunc = []; +s(j).logcdffunc = []; +s(j).hasconfbounds = true; +s(j).censoring = true; +s(j).paramvec = true; +s(j).support = [0 Inf]; +s(j).closedbound = [false false]; +s(j).iscontinuous = true; +s(j).islocscale = false; +s(j).uselogpp = false; + +% Lognormal +j = j+1; +s(j).name = 'Lognormal'; +s(j).code = 'lognormal'; +s(j).pnames = {'mu' 'sigma'}; +s(j).pdescription = {'log location' 'log scale'}; +s(j).prequired = [false false]; +s(j).fitfunc = @lognfit; +s(j).likefunc = @lognlike; +s(j).cdffunc = @logncdf; +s(j).pdffunc = @lognpdf; +s(j).invfunc = @logninv; +s(j).statfunc = @lognstat; +s(j).loginvfunc = @norminv; +s(j).logcdffunc = @normcdf; +s(j).hasconfbounds = true; +s(j).censoring = true; +s(j).paramvec = true; +s(j).support = [0 Inf]; +s(j).closedbound = [false false]; +s(j).iscontinuous = true; +s(j).islocscale = true; +s(j).uselogpp = true; + +% Normal +j = j+1; +s(j).name = 'Normal'; +s(j).code = 'normal'; +s(j).pnames = {'mu' 'sigma'}; +s(j).pdescription = {'location' 'scale'}; +s(j).prequired = [false false]; +s(j).fitfunc = @normfit; +s(j).likefunc = @normlike; +s(j).cdffunc = @normcdf; +s(j).pdffunc = @normpdf; +s(j).invfunc = @norminv; +s(j).statfunc = @normstat; +s(j).loginvfunc = []; +s(j).logcdffunc = []; +s(j).hasconfbounds = true; +s(j).censoring = true; +s(j).paramvec = false; +s(j).support = [-Inf Inf]; +s(j).closedbound = [false false]; +s(j).iscontinuous = true; +s(j).islocscale = true; +s(j).uselogpp = false; + +% Weibull +j = j+1; +s(j).name = 'Weibull'; +s(j).code = 'weibull'; +s(j).pnames = {'a' 'b'}; +s(j).pdescription = {'scale' 'shape'}; +s(j).prequired = [false false]; +s(j).fitfunc = @wblfit; +s(j).likefunc = @wbllike; +s(j).cdffunc = @wblcdf; +s(j).pdffunc = @wblpdf; +s(j).invfunc = @wblinv; +s(j).statfunc = @wblstat; +s(j).loginvfunc = @evinv; +s(j).logcdffunc = @evcdf; +s(j).hasconfbounds = true; +s(j).censoring = true; +s(j).paramvec = true; +s(j).support = [0 Inf]; +s(j).closedbound = [false false]; +s(j).iscontinuous = true; +s(j).islocscale = true; +s(j).uselogpp = true; + +% Rayleigh +j = j+1; +s(j).name = 'Rayleigh'; +s(j).code = 'rayleigh'; +s(j).pnames = {'b'}; +s(j).pdescription = {'scale'}; +s(j).prequired = false; +s(j).fitfunc = @raylfit; +s(j).likefunc = []; +s(j).cdffunc = @raylcdf; +s(j).pdffunc = @raylpdf; +s(j).invfunc = @raylinv; +s(j).statfunc = @raylstat; +s(j).loginvfunc = []; +s(j).logcdffunc = []; +s(j).hasconfbounds = false; +s(j).censoring = false; +s(j).paramvec = true; +s(j).support = [0 Inf]; +s(j).closedbound = [false false]; +s(j).iscontinuous = true; +s(j).islocscale = true; +s(j).uselogpp = false; + +% Poisson +j = j+1; +s(j).name = 'Poisson'; +s(j).code = 'poisson'; +s(j).pnames = {'lambda'}; +s(j).pdescription = {'mean'}; +s(j).prequired = false; +s(j).fitfunc = @poissfit; +s(j).likefunc = []; +s(j).cdffunc = @poisscdf; +s(j).pdffunc = @poisspdf; +s(j).invfunc = @poissinv; +s(j).statfunc = @poisstat; +s(j).loginvfunc = []; +s(j).logcdffunc = []; +s(j).hasconfbounds = false; +s(j).censoring = false; +s(j).paramvec = true; +s(j).support = [0 Inf]; +s(j).closedbound = [true false]; +s(j).iscontinuous = false; +s(j).islocscale = false; +s(j).uselogpp = false; + +% Negative binomial +j = j+1; +s(j).name = 'Negative Binomial'; +s(j).code = 'negative binomial'; +s(j).pnames = {'r' 'p'}; +s(j).pdescription = {'' ''}; +s(j).prequired = [false false]; +s(j).fitfunc = @nbinfit; +s(j).likefunc = @nbinlike; +s(j).cdffunc = @nbincdf; +s(j).pdffunc = @nbinpdf; +s(j).invfunc = @nbininv; +s(j).statfunc = @nbinstat; +s(j).loginvfunc = []; +s(j).logcdffunc = []; +s(j).hasconfbounds = false; +s(j).censoring = false; +s(j).paramvec = true; +s(j).support = [0 Inf]; +s(j).closedbound = [true false]; +s(j).iscontinuous = false; +s(j).islocscale = false; +s(j).uselogpp = false; + +% Beta +j = j+1; +s(j).name = 'Beta'; +s(j).code = 'beta'; +s(j).pnames = {'a' 'b'}; +s(j).pdescription = {'' ''}; +s(j).prequired = [false false]; +s(j).fitfunc = @betafit; +s(j).likefunc = @betalike; +s(j).cdffunc = @betacdf; +s(j).pdffunc = @betapdf; +s(j).invfunc = @betainv; +s(j).statfunc = @betastat; +s(j).loginvfunc = []; +s(j).logcdffunc = []; +s(j).hasconfbounds = false; +s(j).censoring = false; +s(j).paramvec = true; +s(j).support = [0 1]; +s(j).closedbound = [false false]; +s(j).iscontinuous = true; +s(j).islocscale = false; +s(j).uselogpp = false; + +% Binomial +j = j+1; +s(j).name = 'Binomial'; +s(j).code = 'binomial'; +s(j).pnames = {'N' 'p'}; +s(j).pdescription = {'trials' 'probability'}; +s(j).prequired = [true false]; +s(j).fitfunc = @localbinofit; +s(j).likefunc = []; +s(j).cdffunc = @binocdf; +s(j).pdffunc = @binopdf; +s(j).invfunc = @binoinv; +s(j).statfunc = @binostat; +s(j).loginvfunc = []; +s(j).logcdffunc = []; +s(j).hasconfbounds = false; +s(j).censoring = false; +s(j).paramvec = true; +s(j).support = [0 Inf]; +s(j).closedbound = [true false]; +s(j).iscontinuous = false; +s(j).islocscale = false; +s(j).uselogpp = false; + +s = addbisa(s); +s = addinvg(s); +s = addlogi(s); +s = addnaka(s); +s = addtls(s); +s = addrice(s); + +% ------------ binomial function is a special case +function [phat,pci] = localbinofit(x,N,alpha) +%LOCALBINOFIT Version of binofit that operates on vectors + +nx = length(x); +sumx = sum(x); +sumN = nx * N; +if nargout==2 + [phat,pci] = binofit(sumx,sumN,alpha); +else + phat = binofit(sumx,sumN,alpha); +end + +phat = [N phat]; +if nargout==2 + pci = [NaN NaN; pci]'; +end diff --git a/boosting/weightedstats/private/dfgetexclusionrule.m b/boosting/weightedstats/private/dfgetexclusionrule.m new file mode 100644 index 0000000..dc4f77e --- /dev/null +++ b/boosting/weightedstats/private/dfgetexclusionrule.m @@ -0,0 +1,9 @@ +function hExcl = dfgetexclusionrule(ename) +%GETEXCLUSIONRULE Get an exclusion rule by name + +% $Revision: 1.1.6.2 $ $Date: 2004/01/24 09:35:37 $ +% Copyright 2003-2004 The MathWorks, Inc. + +db = getoutlierdb; +hExcl = find(db,'name',ename); + diff --git a/boosting/weightedstats/private/dfgetfitname.m b/boosting/weightedstats/private/dfgetfitname.m new file mode 100644 index 0000000..2d9ee18 --- /dev/null +++ b/boosting/weightedstats/private/dfgetfitname.m @@ -0,0 +1,20 @@ +function name = dfgetfitname() + +% $Revision: 1.1.6.2 $ $Date: 2004/01/24 09:35:38 $ +% Copyright 2003-2004 The MathWorks, Inc. + +count=dfgetset('fitcount'); +if isempty(count) + count = 1; +end +taken = 1; +while taken + name=sprintf('fit %i', count); + if isempty(find(getfitdb,'name',name)) + taken = 0; + else + count=count+1; + end +end +dfgetset('fitcount',count+1); + diff --git a/boosting/weightedstats/private/dfgetupdateinfo.m b/boosting/weightedstats/private/dfgetupdateinfo.m new file mode 100644 index 0000000..9f50798 --- /dev/null +++ b/boosting/weightedstats/private/dfgetupdateinfo.m @@ -0,0 +1,26 @@ +function [name, distname, dataset, exrule, results] = dfgetupdateinfo(fit) +%DFGETUPDATEINFO GUI helper to delete an exclusion rule + +% $Revision: 1.1.6.2 $ $Date: 2004/01/24 09:35:39 $ +% Copyright 2003-2004 The MathWorks, Inc. + +name = fit.name; +fittype = fit.fittype; + +if strcmp(fittype, 'smooth') + distname = 'nonparametric'; +else %parametric + distname = fit.distname; +end + +dataset = fit.dataset; +exrule = fit.exclusionrulename; +results = fit.resultstext; + + + + + + + + diff --git a/boosting/weightedstats/private/dfgetuserdists.m b/boosting/weightedstats/private/dfgetuserdists.m new file mode 100644 index 0000000..46eb789 --- /dev/null +++ b/boosting/weightedstats/private/dfgetuserdists.m @@ -0,0 +1,376 @@ +function [news,errid,errmsg,newrows]=dfgetuserdists(olds,userfun) +%GETUSERDISTS Get user-defined distributions for dfittool +% [NEWS,ERRID,ERRMSG,NEWROWS]=GETUSERDISTS(OLDS) appends user-defined +% distribution information to the existing distribution information +% in the structure OLDS and returns the combined information in the +% structure NEWS. Any error id or message is also returned. NEWROWS +% is a vector of the indices that are new or have changed. +% +% [...]=GETUERDISTS(OLDS,USERFUN) uses the function USERFUN in +% place of the default @dfittooldists. + +% $Revision: 1.1.6.3 $ $Date: 2004/03/02 21:49:25 $ +% Copyright 2003-2004 The MathWorks, Inc. + +% If no user-defined function given, use the default if it exists +if nargin<2 + if exist('dfittooldists','file') + userfun = @dfittooldists; + else + userfun = []; + end +end + +errid = ''; +errmsg = ''; +news = olds; +newrows = []; + +if isempty(userfun) + return +end + +% First try running the user's function +if isa(userfun,'function_handle') + userfunname = func2str(userfun); +else + userfunname = userfun; +end +try + s = feval(userfun); +catch + errid = 'stats:dfittool:BadUserDistributions'; + errmsg = sprintf(... + 'Error running %s to get user-defined distributions:\n%s',... + userfunname, lasterr); + return +end +if ~isempty(errmsg) || isempty(s) + return; +end + +% Next make sure the result is a structure +if ~isstruct(s) + errid = 'stats:dfittool:StructureRequired'; + errmsg = sprintf('%s not return a structure',userfunname);; + return; +end + +newfields = fieldnames(s); +numnewdists = length(s); +numolddists = length(olds); +requiredfields = {'name' 'pnames' 'cdffunc' 'pdffunc' 'invfunc'}; + +% Next make sure the result has all required fields +for j=1:length(requiredfields) + if isempty(strmatch(requiredfields{j},newfields,'exact')) + errid = 'stats:dfittool:MissingField'; + errmsg = sprintf('Missing field ''%s'' in %s structure',... + requiredfields{j},userfunname); + return + end +end + +% Make sure the field values are as expected +checked = cell(1,numnewdists); +for j=1:numnewdists + sj = s(j); + [errid,errmsg,sj] = checkfields(sj); + if ~isempty(errid) + return + end + checked{j} = sj; +end + +% See if we are overwriting existing fields +if numolddists>0 + oldnames = {olds.name}; + oldcodes = {olds.code}; +else + oldnames = cell(0); + oldcodes = cell(0); +end + +for j=1:numnewdists + % See if the proposed name or code is in use already + sj = checked{j}; + newfields = fieldnames(sj); % may need updating since previous assignment + name = sj.name; + code = sj.code; + oldnrow = strmatch(name,oldnames,'exact'); + oldcrow = strmatch(code,oldcodes,'exact'); + + if isempty(oldcrow) + if ~isempty(oldnrow) + newrow = oldnrow; % replace distribution with same name + else + newrow = numolddists+1; % new distribution + end + else + % Trying to re-define an existing distribution + if ~isempty(oldnrow) && ~isequal(oldcrow,oldnrow) + errid = 'stats:dfittool:DuplicateName'; + errmsg = sprintf(... + ['Distribution with code ''%s'' has a name duplicating that ' ... + 'of another distribution.'],code); + return + end + newrow = oldcrow; + end + + % Update fields in old structure. + % Can't concatenate with [] if field names differ. + for fieldnum = 1:length(newfields) + fieldname = newfields{fieldnum}; + olds(newrow).(fieldname) = sj.(fieldname); + end + + % Update arrays to guard against duplicates within the new structure + if newrow>numolddists + oldnames = [oldnames {name}]; + oldcodes = [oldcodes {code}]; + numolddists = numolddists+1; + end + newrows = [newrows newrow]; +end + +% Return updated structure as new structure +news = olds; + +% ------------------------------------------ +function [errid,errmsg,sj] = checkfields(sj) +%CHECKFIELDS Check that a distribution structure's fields are all valid + +% Check required fields +testnames = {'name'}; +for j=1:length(testnames) + field = testnames{j}; + [errid,errmsg,val] = checkstring(sj,field,'',false); + if ~isempty(errmsg) + return + end + sj.(field) = val; +end + +field = 'pnames'; +[errid,errmsg,val] = checktext(sj,field,'',false,[]); +if ~isempty(errmsg) + return +end +sj.(field) = val; +nparams = length(val); + +testnames = {'pdffunc' 'cdffunc' 'invfunc'}; +for j=1:length(testnames) + field = testnames{j}; + [errid,errmsg,val] = checkfunc(sj,field,false); + if ~isempty(errmsg) + return + end + sj.(field) = val; +end + +% Check optional fields and fill in defaults +testnames = {'code'}; +defaults = {lower(sj.name)}; +for j=1:length(testnames) + field = testnames{j}; + [errid,errmsg,val] = checkstring(sj,field,defaults{j},true); + if ~isempty(errmsg) + return + end + sj.(field) = val; +end + +testnames = {'hasconfbounds' 'iscontinuous' 'islocscale' 'uselogpp' ... + 'censoring' 'paramvec'}; +defaults = {false true false false ... + false true}; +for j=1:length(testnames) + field = testnames{j}; + default = defaults{j}; + [errid,errmsg,val] = checklogical(sj,field,default); + if ~isempty(errmsg) + return + end + sj.(field) = val; +end + +testnames = {'likefunc' 'logcdffunc' 'loginvfunc'}; +for j=1:length(testnames) + field = testnames{j}; + [errid,errmsg,val] = checkfunc(sj,field,true); + if ~isempty(errmsg) + return + end + sj.(field) = val; +end + +field = 'prequired'; +[errid,errmsg,val] = checklogical(sj,field,false(1,nparams),nparams); +if ~isempty(errmsg) + return +end +sj.(field) = val; + +field = 'pdescription'; +[errid,errmsg,val] = checktext(sj,field,{},true,nparams); +if ~isempty(errmsg) + return +end +sj.(field) = val; + +field = 'closedbound'; +[errid,errmsg,val] = checklogical(sj,field,[false false],2); +if ~isempty(errmsg) + return +end +sj.(field) = val; + +field = 'support'; +[errid,errmsg,val] = checksupport(sj); +if ~isempty(errmsg) + return +end +sj.(field) = val; + + +% ------------------------------------------ +function [errid,errmsg,val] = checklogical(s,field,default,nvals) +%CHECKLOGICAL Check that a field has a valid logical value + +if nargin<4 + nvals = 1; +end + +val = []; +errid = ''; +errmsg = ''; +if ~isfield(s,field) || isempty(s.(field)) + val = default; +else + val = s.(field); +end + +if (numel(val) ~= nvals) + errid = 'stats:dfittool:WrongSize'; + errmsg = sprintf(... + 'The ''%s'' field must contain %d element(s).',field,nvals); +elseif ~(islogical(val) || isnumeric(val)) + errid = 'stats:dfittool:NotLogical'; + errmsg = sprintf(... + 'The ''%s'' field must be true or false.',field); +else + val = (val ~= 0); +end + + +% ------------------------------------------ +function [errid,errmsg,val]=checktext(s,field,default,optional,nvals) +%CHECKTEXT Check that a field has a value that is an array of strings + +if nargin<5 + nvals = 1; +end + +errid = ''; +errmsg = ''; +val = ''; +if ~isfield(s,field) || isempty(s.(field)) + if optional + val = default; + return + else + errid = 'stats:dfittool:EmptyNotAllowed'; + errmsg = sprintf('The ''%s'' field must not be empty.',field); + return + end +end +val = s.(field); +if iscellstr(val) + if ~isempty(nvals) && numel(val)~=nvals + errid = 'stats:dfittool:BadSize'; + errmsg = sprintf(... + 'The ''%s'' field must contain %d string(s).',field,nvals); + end +elseif ischar(val) + if ~isempty(nvals) && size(val,1)~=nvals + errid = 'stats:dfittool:BadSize'; + errmsg = sprintf(... + 'The ''%s'' field must contain %d string(s).',field,nvals); + else + val = cellstr(val); + end +else + errid = 'stats:dfittool:NotCharacter'; + errmsg = sprintf('Value in ''%s'' field must be a character array or cell array of strings.',field); +end + +% ------------------------------------------ +function [errid,errmsg,val]=checkstring(s,field,default,optional) +%CHECKSTRING Check that a field has a valid string + +errid = ''; +errmsg = ''; +val = ''; +if ~isfield(s,field) || isempty(s.(field)) + if optional + val = default; + return + else + errid = 'stats:dfittool:EmptyNotAllowed'; + errmsg = sprintf('The ''%s'' field must not be empty.',field); + return + end +end +val = s.(field); +if ~ischar(val) || (~isequal(size(val), [1,length(val)])) + errid = 'stats:dfittool:NotCharacter'; + errmsg = sprintf('Value in ''%s'' field must be a character string.',field); +end + + +% ------------------------------------------ +function [errid,errmsg,val] = checkfunc(s,field,optional) +%CHECKFUNC Check that a field has a valid function value + + +val = ''; +errid = ''; +errmsg = ''; +if ~isfield(s,field) || isempty(s.(field)) + if ~optional + errid = 'stats:dfittool:EmptyNotAllowed'; + errmsg = sprintf('The ''%s'' field must not be empty.',field); + end + return +end + +val = s.(field); +if ~isa(val,'function_handle') || ~isscalar(val) + errid = 'stats:dfittool:NotFunctionHandle'; + errmsg = sprintf(... + 'The ''%s'' field must contain a single function handle.',field); +end + +% ------------------------------------------ +function [errid,errmsg,val] = checksupport(s,default) +%CHECKLOGICAL Check that a field has a valid logical value + +val = []; +errid = ''; +errmsg = ''; +default = [-Inf Inf]; +field = 'support'; +if ~isfield(s,field) || isempty(s.(field)) + val = default; + return; +end +val = s.(field); +if ~isnumeric(val) || numel(val)~=2 + errid = 'stats:dfittool:BadSupport'; + errmsg = 'The ''support'' value must contain a two-element vector.'; +elseif (val(1)>=val(2)) || any(isnan(val)) + errid = 'stats:dfittool:BadSupport'; + errmsg = 'The ''support'' values must be increasing and non-NaN.'; +end diff --git a/boosting/weightedstats/private/dfgraphexclude.m b/boosting/weightedstats/private/dfgraphexclude.m new file mode 100644 index 0000000..ae71a6e --- /dev/null +++ b/boosting/weightedstats/private/dfgraphexclude.m @@ -0,0 +1,449 @@ +function dfgraphexclude(dsname,xlo,xhi) +%DFGRAPHEXCLUDE Create graph for selecting (x,y) pairs to exclude +% DFGRAPHEXCLUDE(EXCLUDEPANEL,DSNAME,LOBND,UPBND) creates a graph +% tied to the Java exclusion panel EXCLUDEPANEL, for dataset DSNAME, with +% current lower and upper bounds LOBND and UPBND. It provides a graphical +% way to modify those bounds. + +% Copyright 2001-2004 The MathWorks, Inc. +% $Revision: 1.1.6.8 $ $Date: 2004/02/01 22:10:38 $ + + +% Use old figure if any, or create a new one with a plot of the data +t = get(0,'ShowHiddenHandles'); +set(0,'ShowHiddenHandles','on'); +c = get(0,'Child'); +f = findobj(c,'flat','Type','figure','Tag','dfexcludegraph'); +set(0,'ShowHiddenHandles',t); +if ~isempty(f) + subfig = f; +else + subfig = setupfigure(dsname,xlo,xhi); +end +if isempty(subfig) + return +end + +figure(subfig) + +% Adjust the patches to show the desired +ax = get(subfig,'CurrentAxes'); +xlim = get(ax,'XLim'); + +% If bounds already exist, put them onto the graph +if nargin>=2 && ~isempty(xlo) && ~isinf(xlo) && xlo > xlim(1) + addremovepatch(subfig,'lower',xlo,'add') +end +if nargin>=3 && ~isempty(xhi) && ~isinf(xhi) && xhi < xlim(2) + addremovepatch(subfig,'upper',xhi,'add') +end + +set(ax,'XLimMode','manual','YLimMode','manual'); +set(subfig,'HandleVisibility','callback'); +dfgetset('dfsubfig',subfig); + +return + + +% -------------- Create figure +function subfig = makefigure(x,y,dolegend,xlo,xhi) + +figcolor = get(0,'defaultuicontrolbackgroundcolor'); +subfig = figure('IntegerHandle','off','Units','pixels',... + 'HandleVisibility','callback',... + 'name','Define Boundary for Exclusion Rule',... + 'numbertitle','off',... + 'color',figcolor,... + 'Tag','dfexcludegraph',... + 'DeleteFcn', @closefig,... + 'doublebuffer','on',... + 'Dock','off'); + +% Remove menus +set(subfig,'Menubar','none'); + +% Restore toolbar but keep only zoom tools +set(subfig,'toolbar','figure'); +h = findall(subfig,'Type','uitoolbar'); +h1 = findall(h); % Get all children +h1(h1==h) = []; % Not including the toolbar itself +h2 = findall(h1,'flat','TooltipString','Zoom In'); +h1(h2==h1) = []; +h2 = findall(h1,'flat','TooltipString','Zoom Out'); +h1(h2==h1) = []; +delete(h1); + +% Add axes, for now in default position +xlim = [min(x) max(x)]; +if isfinite(xlo) + xlim(1) = min(xlim(1), xlo); +end +if isfinite(xhi) + xlim(2) = max(xlim(2), xhi); +end + +xlim = xlim + .1 * [-1 1] * diff(xlim); +ax = axes('Parent',subfig,'Box','on','HitTest','off',... + 'XLim',xlim,'YLim',[0, max(y)+1]); + +% Place buttons where required +p = get(subfig,'Position'); + +pbutton = [5 5 50 20]; +a1=uicontrol(subfig,'Units','pixels','Position',pbutton,... + 'Tag','close','Callback',@done,... + 'String','Close'); +e = get(a1,'Extent'); +pbutton(3:4) = 1.5 * e(3:4); +set(a1,'Position',pbutton); + +margin = 15; + +% Use longer string to get extent, then replace with shorter one +a3 = uicontrol(subfig,'Units','pixels','Position',pbutton,... + 'Tag','upper','Callback',@buttoncallback,... + 'String','Remove Upper Limit'); +e = get(a3,'Extent'); +pbutton(3) = 1.25 * e(3); +pbutton(1) = max(1,pbutton(1) - margin - pbutton(3)); +set(a3,'Position',pbutton,'String','Add Upper Limit'); + +pbutton(1) = max(5, pbutton(1)-margin-pbutton(3)); +a4=uicontrol(subfig,'Units','pixels','Position',pbutton,... + 'Tag','lower','Callback',@buttoncallback,... + 'String','Add Lower Limit'); +setappdata(subfig,'buttons',[a1 a3 a4]); + +% Place text as well +if dolegend + p = [5 5 10 10]; + a = uicontrol(subfig, 'Units','pixels', 'Position',p,... + 'Style','text','String','Observed = filled circles'); + e = get(a,'Extent'); + p(3:4) = e(3:4); + set(a,'Position',p); + p(2) = p(2) + p(4) + 5; + a = uicontrol(subfig, 'Units','pixels', 'Position',p,... + 'Style','text','String','Censored = open circles'); +end + +% Leave axis units as pixels, and finally set resize function +set(ax,'Units','pixels'); +resize(subfig); +set(subfig,'ResizeFcn',@resize,'WindowButtonMotionFcn',@fixcursor); + +% ------------------- helper function to set up figure +function subfig = setupfigure(dsname,xlo,xhi) +%SETUPFIGURE Set up figure to do graphical exclusion +% We're excluding based on data in one dataset +dsdb = dfswitchyard('getdsdb'); +a = down(dsdb); +ds = []; +while(~isempty(a)) + if isequal(dsname,a.name) + ds = a; + break; + end + a = right(a); +end +if isempty(ds) + subfig = []; + return +end + +[ydata,cens,freq] = getincludeddata(ds,[]); % get data w/o NaNs +if isempty(cens) + cens = zeros(size(ydata)); +end +if isempty(freq) + freq = ones(size(ydata)); +end + +% Sort y and carry along the rest +[ydata,i] = sort(ydata); +cens = cens(i); +freq = freq(i); + +% Create x and y vectors to plot +n = sum(freq); +x = zeros(n,1); +y = zeros(n,1); +g = zeros(n,1); +j = 1; +x(1:freq(1)) = ydata(1); +y(1:freq(1)) = (1:freq(1))'; +g(1:freq(1)) = cens(1); +i = freq(1)+1; +for k=2:length(ydata) + for j=1:freq(k) + x(i) = ydata(k); + g(i) = cens(k); + if (i>1) && (x(i)==x(i-1)) + y(i) = y(i-1) + 1; + else + y(i) = 1; + end + i = i+1; + end +end + +% Make a figure to receive graph +dolegend = any(g==0) & any(g==1); +subfig = makefigure(x,y,dolegend, xlo, xhi); +ax = get(subfig,'CurrentAxes'); + +% Place data points into graph +t = (g==0); +if any(t) + line('XData',x(t),'YData',y(t),'HitTest','off',... + 'Color','b','Marker','.','LineStyle','none',... + 'MarkerSize',24,'Parent',ax,'Tag','observed'); +end +t = (g==1); +if any(t) + line('XData',x(t),'YData',y(t),'HitTest','off',... + 'Color','b','Marker','o','LineStyle','none',... + 'Parent',ax,'Tag','censored'); +end + +% ----------------------- +function resize(subfig,varargin) + +if ~isempty(subfig) && ~ishandle(subfig) ... + && ~isequal(get(subfig,'type'),'figure') + subfig = gcbf; +end +pFig = get(subfig,'Position'); +ax = get(subfig,'CurrentAxes'); +oldpos = getappdata(subfig,'oldpos'); +if isequal(oldpos,pFig) + return +end + +% Position buttons against the edge +paxes = get(ax,'Position'); +margin = 15; +hButtons = getappdata(subfig,'buttons'); +base = pFig(3); +for j=1:length(hButtons) + p = get(hButtons(j),'Position'); + p(1) = base - p(3) - margin; + set(hButtons(j),'Position',p); + base = p(1); +end + +% Move axes above buttons +paxes(1) = 75; +paxes(3) = max(1,pFig(3)-150); +paxes(2) = p(2) + p(4) + 3*margin; +paxes(4) = max(1,pFig(4) - margin - paxes(2)); +set(ax,'Position',paxes); +setappdata(subfig,'oldpos',pFig); + +% -------------------------------- +function buttoncallback(varargin) + +% Get some handles and dimensions +button = gcbo; +fig = gcbf; +ax = get(fig,'CurrentAxes'); +xlim = get(ax,'XLim'); +ylim = get(ax,'YLim'); +dx = 0.05 * diff(xlim); +buttontag = get(button,'Tag'); +addremovepatch(fig,buttontag); +updateGUI; + +% -------------------------------- +function addremovepatch(fig,whichbound,xbnd,addremove) + +% Get some handles and dimensions +ax = get(fig,'CurrentAxes'); +xlim = get(ax,'XLim'); +ylim = get(ax,'YLim'); +dx = 0.05 * diff(xlim); +if nargin>=4 && isequal(addremove,'add') + forceadd = true; +else + forceadd = false; +end + +if nargin<3 + if isequal(whichbound,'lower') + xbnd = xlim(1) + dx; + else + xbnd = xlim(2) - dx; + end +end + +% Carry out requested action +if isequal(whichbound,'lower') + hPatch = findall(fig,'Tag','lowerpatch'); + if isempty(hPatch) || forceadd + otherpatch = findall(fig,'Tag','upperpatch'); + if ~isempty(otherpatch) + % Never put new limit beyond the other limit + otherx = get(otherpatch,'XData'); + otherx = otherx(2); + xbnd = min(xbnd, xlim(1) + .9*(otherx-xlim(1))); + end + x = [xlim(1), xbnd, xbnd, xlim(1), xlim(1)]; + y = [ylim(1), ylim(1), ylim(2), ylim(2), ylim(1)]; + if isempty(hPatch) + patch(x,y,[.9 .9 .9],'Parent',ax,'Tag','lowerpatch',... + 'FaceAlpha',0.6,'ButtonDownFcn',@startselect); + else + set(hPatch,'XData',x,'YData',y); + end + + newtxt = 'Remove Lower Limit'; + else + delete(hPatch); + newtxt = 'Add Lower Limit'; + end +else + hPatch = findall(fig,'Tag','upperpatch'); + if isempty(hPatch) || forceadd + otherpatch = findall(fig,'Tag','lowerpatch'); + if ~isempty(otherpatch) + % Never put new limit beyond the other limit + otherx = get(otherpatch,'XData'); + otherx = otherx(2); + xbnd = max(xbnd, xlim(2) - .9*(xlim(2)-otherx)); + end + x = [xlim(2), xbnd, xbnd, xlim(2), xlim(2)]; + y = [ylim(1), ylim(1), ylim(2), ylim(2), ylim(1)]; + if isempty(hPatch) + patch(x,y,[.9 .9 .9],'Parent',ax,'Tag','upperpatch',... + 'FaceAlpha',0.6,'ButtonDownFcn',@startselect); + else + set(hPatch,'XData',x,'YData',y); + end + newtxt = 'Remove Upper Limit'; + else + delete(hPatch); + newtxt = 'Add Upper Limit'; + end +end + +% Update button text +button = findobj(fig,'Tag',whichbound); +set(button,'String',newtxt); + +% ------------- function to initiate graphical selection +function startselect(varargin) + +% Get figure and axis handles, define functions to do and end selection +subfig = gcbf; +ax = get(subfig,'CurrentAxes'); + +% Get current exclusion limits, use axis limits if none +lims = get(ax,'XLim'); +hPatch = findall(subfig,'Tag','lowerpatch'); +if ~isempty(hPatch) + x = get(hPatch,'XData'); + lims(1) = max(x(:)); +end +hPatch = findall(subfig,'Tag','upperpatch'); +if ~isempty(hPatch) + x = get(hPatch,'XData'); + lims(2) = min(x(:)); +end + +% Save information for other functions +hPatch = gcbo; +set(subfig,'WindowButtonMotionFcn',{@movepatch hPatch},... + 'WindowButtonUpFcn',@endmove); +setappdata(ax,'limits',lims); +setappdata(ax,'objmoving',hPatch); + + +% ------------- function to update GUI +function updateGUI() +subfig = gcbf; + +hPatch = findall(subfig,'Tag','lowerpatch'); +if isempty(hPatch) + xl = ''; +else + xl = get(hPatch,'XData'); + xl = num2str(xl(2)); +end +hPatch = findall(subfig,'Tag','upperpatch'); +if isempty(hPatch) + xh = ''; +else + xh = get(hPatch,'XData'); + xh = num2str(xh(2)); +end + +com.mathworks.toolbox.stats.Exclude.getExcludePanel.updateBoundsFields(xl, xh); + + +% ------------- function to complete graphical selection +function endmove(varargin) + +% Turn off window functions to end selection +subfig = gcbf; +set(subfig,'WindowButtonMotionFcn',@fixcursor, 'WindowButtonUpFcn',[]); +updateGUI; + +% ------------- move patch boundary +function movepatch(ignore1,ignore2,hPatch) + +varargin +subfig = gcbf; +ax = get(subfig,'CurrentAxes'); + +% Get exclusion limits and axis limits +lims = getappdata(ax,'limits'); +xlim = get(ax,'XLim'); +delta = .01 * abs(xlim(2) - xlim(1)); + +% Extend patch to the current point, but within limits +cp = get(gca,'CurrentPoint'); +x = cp(1); +if isequal(get(hPatch,'Tag'),'lowerpatch') + lobnd = xlim(1) + min(delta, .9*(lims(2)-xlim(1))); + upbnd = min(xlim(2),lims(2)) - delta; + x = max(lobnd, min(x,upbnd)); + lims(1) = x; +else + lobnd = max(xlim(1),lims(1)) + delta; + upbnd = xlim(2) - min(delta, .9*(xlim(2)-lims(1))); + x = min(upbnd, max(lobnd,x)); + lims(2) = x; +end + +% Update saved limits, and x data for this patch +setappdata(ax,'limits',lims); +xdata = get(hPatch,'XData'); +xdata(2:3) = x; +set(hPatch,'XData',xdata); + +% --------------- set cursor if we're on something that can move +function fixcursor(varargin) +ptr = get(gcbf,'Pointer'); +onpatch = isequal(get(hittest,'Type'),'patch'); +if isequal(ptr,'arrow') + if onpatch + set(gcbf,'Pointer','left'); + end +else + if ~onpatch + set(gcbf,'Pointer','arrow'); + end +end + +% --------------- close figure +function done(varargin) + +delete(gcbf); + +% ---------------------- helper to notify GUI that figure is closing +function closefig(varargin) +%CLOSEFIG + +com.mathworks.toolbox.stats.Exclude.getExcludePanel.setGraphExcludeFlag(false); +closereq; + diff --git a/boosting/weightedstats/private/dfhelpviewer.m b/boosting/weightedstats/private/dfhelpviewer.m new file mode 100644 index 0000000..fac7c68 --- /dev/null +++ b/boosting/weightedstats/private/dfhelpviewer.m @@ -0,0 +1,27 @@ +function dfhelpviewer(topic, errorname) +% DFHELPVIEWER is a helper file for the Distribution Fitting Toolbox +% DFHELPVIEWER Displays help for Distriubtion Fitting TOPIC. If the map file +% cannot be found, an error is displayed using ERRORNAME + +% Copyright 2003-2004 The MathWorks, Inc. +% $Revision: 1.1.6.2 $ + +import java.io.*; + +error = false; +mapfilename = [docroot '/toolbox/stats/stats.map']; +f = File(mapfilename); +if f.exists + try + helpview(mapfilename, topic); + catch + error = true; + end +else + error = true; +end +if error + message = sprintf('Unable to display help for %s\n', ... + errorname); + errordlg(message); +end diff --git a/boosting/weightedstats/private/dfhistbins.m b/boosting/weightedstats/private/dfhistbins.m new file mode 100644 index 0000000..6aa6861 --- /dev/null +++ b/boosting/weightedstats/private/dfhistbins.m @@ -0,0 +1,259 @@ +function [centers,edges] = dfhistbins(data,cens,freq,binInfo,F,x) +%DFHISTBINS Compute bin centers for a histogram +% [CENTERS,EDGES] = DFHISTBINS(DATA,CENS,FREQ,BININFO,F,X) computes +% histogram bin centers and edges for the rule specified in BININFO. For +% the Freedman-Diaconis rule, DFHISTBINS uses the empirical distribution +% function F evaluated at the values X to compute the IQR. When there is +% censoring, DFHISTBINS cannot compute the Scott rule, and F-D is +% substituted. + +% $Revision: 1.1.6.3 $ $Date: 2004/01/24 09:35:43 $ +% Copyright 2001-2004 The MathWorks, Inc. + +xmin = min(data); +xmax = max(data); +xrange = xmax - xmin; +if isempty(freq) + n = length(data); +else + n = sum(freq); +end + +rule = binInfo.rule; +% Can't compute the variance for the Scott rule when there is censoring, +% use F-D instead. +if (rule == 2) && ~isempty(cens) && any(cens) + rule = 1; % Freedman-Diaconis +end + +switch rule +case 1 % Freedman-Diaconis + % Get "quartiles", which may not actually be the 25th and 75th points + % if there is a great deal of censoring, and compute the IQR. + iqr = diff(interp1q([F;1], [x;x(end)], [.25; .75])); + + % Guard against too small an IQR. This may be because most + % observations are censored, or because there are some extreme + % outliers. + if iqr < xrange ./ 10 + iqr = xrange ./ 10; + end + + % Compute the bin width proposed by Freedman and Diaconis, and the + % number of bins needed to span the data. Use approximately that + % many bins, placed at nice locations. + [centers,edges] = binpicker(xmin, xmax, 'FD', n, iqr); + +case 2 % Scott + if isempty(freq) + s = sqrt(var(data)); + else + s = sqrt(var(data,freq)); + end + + % Compute the bin width proposed by Scott, and the number of bins + % needed to span the data. Use approximately that many bins, + % placed at nice locations. + [centers,edges] = binpicker(xmin, xmax, 'Scott', n, s); + +case 3 % number of bins given + % Do not create more than 1000 bins. + [centers,edges] = binpicker(xmin, xmax, min(binInfo.nbins,1000)); + +case 4 % bins centered on integers + xscale = max(abs([xmin xmax])); + % If there'd be more than 1000 bins, center them on an appropriate + % power of 10 instead. + if xrange > 1000 + step = 10^ceil(log10(xrange/1000)); + xmin = step*round(xmin/step); % make the edges bin width multiples + xmax = step*round(xmax/step); + + % If a bin width of 1 is effectively zero relative to the magnitude of + % the endpoints, use a bigger power of 10. + elseif xscale*eps > 1; + step = 10^ceil(log10(xscale*eps)); + + else + step = 1; + end + centers = floor(xmin):step:ceil(xmax); + edges = (floor(xmin)-.5*step):step:(ceil(xmax)+.5*step); + +case 5 % bin width given + % Do not create more than 1000 bins. + binWidth = max(binInfo.width, xrange/1000); + if (binInfo.placementRule == 1) % automatic placement: anchored at zero + anchor = 0; + else % anchored + anchor = binInfo.anchor; + end + leftEdge = anchor + binWidth*floor((xmin-anchor) ./ binWidth); + nbins = max(1,ceil((xmax-leftEdge) ./ binWidth)); + edges = leftEdge + (0:nbins) .* binWidth; % get exact multiples + centers = edges(2:end) - 0.5 .* binWidth; +end + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +function [centers,edges] = binpicker(xmin, xmax, nbins, nobs, extraArg) +%BINPICKER Generate pleasant bin locations for a histogram. +% CENTERS = BINPICKER(XMIN,XMAX,NBINS) computes centers for histogram +% bins spanning the range XMIN to XMAX, with extremes of the bins at +% locations that are a multiple of 1, 2, 3, or 5 times a power of 10. +% +% CENTERS = BINPICKER(XMIN,XMAX,'FD',N,IQR) uses the Freedman-Diaconis +% rule for bin width to compute the number of bins. N is the number of +% data points, and IQR is the sample interquartile range of the data. +% +% CENTERS = BINPICKER(XMIN,XMAX,'Scott',N,STD) uses Scott's rule for the +% bin width to compute the number of bins. N is the number of data +% points, and STD is the sample standard deviation of the data. Scott's +% rule is appropriate for "normal-like" data. +% +% CENTERS = BINPICKER(XMIN,XMAX,'Sturges',N) uses Sturges' rule for the +% number of bins. N is the number of data points. Sturges' rule tends +% to give fewer bins than either F-D or Scott. +% +% For the Freedman-Diaconis, Scott's, or Sturges' rules, BINPICKER +% automatically generates "nice" bin locations, where the bin width is 1, +% 2, 3, or 5 times a power of 10, and the bin edges fall on multiples of +% the bin width. Thus, the actual number of bins will often differ +% somewhat from the number defined by the requested rule. +% +% [CENTERS,EDGES] = BINPICKER(...) also returns the bin edges. + +% References: +% [1] Freedman, D. and P. Diaconis (1981) "On the histogram as a +% density estimator: L_2 theory", Zeitschrift fur +% Wahrscheinlichkeitstheorie und verwandte Gebiete, 57:453–476. +% [2] Scott, D.W. (1979) "On optimal and data-based histograms", +% Biometrika, 66:605-610. +% [3] Sturges, H.A. (1926) "The choice of a class interval", +% J.Am.Stat.Assoc., 21:65-66. + +if nargin < 3 + error('stats:binpicker:TooFewInputs', ... + 'Requires at least three inputs.'); +elseif xmax < xmin + error('stats:binpicker:MaxLessThanMin', ... + 'XMAX must be greater than or equal to XMIN.'); +end + +% Bin width rule specified +if ischar(nbins) + ruleNames = ['fd '; 'scott '; 'sturges']; + rule = strmatch(lower(nbins),ruleNames); % 1, 2, or 3 + if isempty(rule) + error('stats:binpicker:UnknownRule', ... + 'RULE must be one of ''FD'', ''Scott'', or ''Sturges''.'); + elseif nobs < 1 + nbins = 1; % give 1 bin for zero-length data + rule = 0; + end + +% Number of bins specified +else + if nbins < 1 || round(nbins) ~= nbins + error('stats:binpicker:NegativeNumBins', ... + 'NBINS must be a positive integer.'); + end + rule = 0; +end + +xscale = max(abs([xmin,xmax])); +xrange = xmax - xmin; + +switch rule +case 1 % Freedman-Diaconis rule + % Use the interquartile range to compute the bin width proposed by + % Freedman and Diaconis, and the number of bins needed to span the + % data. Use approximately that many bins, placed at nice + % locations. + iqr = extraArg; + rawBinWidth = 2*iqr ./ nobs.^(1/3); + +case 2 % Scott's rule + % Compute the bin width proposed by Scott, and the number of bins + % needed to span the data. Use approximately that many bins, + % placed at nice locations. + s = extraArg; + rawBinWidth = 3.49*s ./ nobs.^(1/3); + +case 3 % Sturges' rule for nbins + nbins = 1 + log2(nobs); + rawBinWidth = xrange ./ nbins; + +otherwise % number of bins specified + rawBinWidth = xrange ./ nbins; +end + +% Make sure the bin width is not effectively zero. Otherwise it will never +% amount to anything, which is what we knew all along. +rawBinWidth = max(rawBinWidth, eps*xscale); +% it may _still_ be zero, if data are all zeroes + +% If the data are not constant, place the bins at "nice" locations +if xrange > max(sqrt(eps)*xscale, realmin) + % Choose the bin width as a "nice" value. + powOfTen = 10.^floor(log10(rawBinWidth)); % next lower power of 10 + relSize = rawBinWidth ./ powOfTen; % guaranteed in [1, 10) + if relSize < 1.5 + binWidth = 1*powOfTen; + elseif relSize < 2.5 + binWidth = 2*powOfTen; + elseif relSize < 4 + binWidth = 3*powOfTen; + elseif relSize < 7.5 + binWidth = 5*powOfTen; + else + binWidth = 10*powOfTen; + end + + % Automatic rule specified + if rule > 0 + % Put the bin edges at multiples of the bin width, covering x. The + % actual number of bins used may not be exactly equal to the requested + % rule. Always use at least two bins. + leftEdge = binWidth*floor(xmin ./ binWidth); + nbinsActual = max(2, ceil((xmax-leftEdge) ./ binWidth)); + + % Number of bins specified + else + % Put the extreme bin edges at multiples of the bin width, covering x. + % Then recompute the bin width to make the actual number of bins used + % exactly equal to the requested number. + leftEdge = binWidth*floor(xmin ./ binWidth); + rightEdge = binWidth*ceil(xmax ./ binWidth); + binWidth = (rightEdge - leftEdge) ./ nbins; + nbinsActual = nbins; + end + +else % the data are nearly constant + % For automatic rules, use a single bin. + if rule > 0 + nbins = 1; + end + + % There's no way to know what scale the caller has in mind, just create + % something simple that covers the data. + if xscale > realmin + % Make the bins cover a unit width, or as small an integer width as + % possible without the individual bin width being zero relative to + % xscale. Put the left edge on an integer or half integer below + % xmin, with the data in the middle 50% of the bin. Put the left + % edge similarly above xmax. + binRange = max(1, ceil(nbins*eps*xscale)); + leftEdge = floor(2*(xmin-binRange./4))/2; + rightEdge = ceil(2*(xmax+binRange./4))/2; + else + leftEdge = -0.5; + rightEdge = 0.5; + end + binWidth = (rightEdge - leftEdge) ./ nbins; + nbinsActual = nbins; +end + +edges = leftEdge + (0:nbinsActual) .* binWidth; % get exact multiples +centers = edges(2:end) - 0.5 .* binWidth; diff --git a/boosting/weightedstats/private/dficons.mat b/boosting/weightedstats/private/dficons.mat new file mode 100644 index 0000000..3517c4f Binary files /dev/null and b/boosting/weightedstats/private/dficons.mat differ diff --git a/boosting/weightedstats/private/dfpreview.m b/boosting/weightedstats/private/dfpreview.m new file mode 100644 index 0000000..27012ff --- /dev/null +++ b/boosting/weightedstats/private/dfpreview.m @@ -0,0 +1,100 @@ +function [err, imsource] = dfpreview(dexpr, cexpr, fexpr, width, height, ds, binInfo) +% For use by DFITTOOL + +% $Revision: 1.1.6.6 $ $Date: 2004/01/24 09:35:44 $ +% Copyright 2001-2004 The MathWorks, Inc. + +if nargin<6 + err = dfcheckselections(dexpr, cexpr, fexpr); +else + err = ''; +end +if ~isequal(err, '') + imsource = []; + return; +end + +NONE='(none)'; + +if nargin<6 + data=evalin('base',dexpr); +else + data = ds.y; +end + +if nargin<6 + if isempty(cexpr) || isequal(cexpr, NONE) + censoring=[]; + else + censoring=evalin('base',cexpr); + end +else + censoring = ds.censored; +end + +if nargin<6 + if isempty(fexpr) || isequal(fexpr, NONE) + frequency=[]; + else + frequency=evalin('base',fexpr); + end +else + frequency=ds.frequency; +end + +if nargin < 5 + width = 200; + height = 200; +end + +tempfigure=figure('units','pixels','position',[0 0 width height], ... + 'handlevisibility','callback', ... + 'integerhandle','off', ... + 'visible','off', ... + 'paperpositionmode', 'auto', ... + 'color','w'); +tempaxes=axes('position',[.05 .05 .9 .9], ... + 'parent',tempfigure, ... + 'box','on', ... + 'visible','off'); + +% If data has a complex part, it will spit a warning to the command line, so +% turn off warnings before plotting. +warnstate=warning('off', 'all'); + + +if nargin < 6 + binInfo = dfgetset('binDlgInfo'); +elseif nargin < 7 + binInfo = ds.binDlgInfo; +else + % binInfo passed in +end + +% If we're working on expressions rather than data in an existing data set, +% we may need to remove NaNs +[ignore1,ignore2,data,censoring,frequency] = statremovenan(data,censoring,frequency); + +% Compute the bin centers using the ecdf +% to allow a quartile computation even when there is censoring. +[fstep, xstep] = ecdf(data, 'censoring', censoring, 'frequency', frequency); +[dum,binEdges] = dfhistbins(data,censoring,frequency,binInfo,fstep,xstep); + +set(0,'CurrentFigure', tempfigure); +set(tempfigure,'CurrentAxes', tempaxes); + +% Plot a histogram from ecdf using the computed number of bins +ecdfhist(tempaxes, fstep, xstep, 'edges', binEdges); +set(tempaxes, 'xtick',[],'ytick',[]); +axis(tempaxes,'tight'); +allchildren = get(tempaxes, 'children'); +patchchildren = findobj(allchildren,'flat','Type','patch'); +set(patchchildren, 'facecolor', [.9 .9 .9]); +warning(warnstate); + +x=hardcopy(tempfigure,'-dzbuffer','-r0'); +% give the image a black edge +x(1,:,:)=0; x(end,:,:)=0; x(:,1,:)=0; x(:,end,:)=0; +imsource=im2mis(x); + +delete(tempfigure); diff --git a/boosting/weightedstats/private/dfsectionpreview.m b/boosting/weightedstats/private/dfsectionpreview.m new file mode 100644 index 0000000..8c2c6b2 --- /dev/null +++ b/boosting/weightedstats/private/dfsectionpreview.m @@ -0,0 +1,45 @@ +function imsource = dfsectionpreview(outlier, width, height) +% For use by DFITTOOL + +% $Revision: 1.1.6.3 $ +% Copyright 2003-2004 The MathWorks, Inc. + +if nargin < 3 + width = 180; + height = 180; +end + +tempfigure=figure('units','pixels','position',[0 0 width height], ... + 'handlevisibility','off', ... + 'integerhandle','off', ... + 'visible','off', ... + 'paperpositionmode', 'auto', ... + 'color','w'); + +xlim = [0 4]; +ylim = [0 4]; +ax=axes('position',[.05 .05 .9 .9], ... + 'parent',tempfigure, ... + 'xtick',[],'ytick',[], ... + 'box','on', ... + 'visible','off', 'XLim',xlim,'YLim',ylim); + +gr = [.9 .9 .9]; +o = handle(outlier); + +xlo = o.YLow; +if ~isempty(xlo) + patch([0 1 1 0], [0 0 4 4], gr,'LineStyle','none','Parent',ax); +end + +xhi = o.YHigh; +if ~isempty(xhi) + patch([3 4 4 3], [0 0 4 4], gr,'LineStyle','none','Parent',ax); +end + +x=hardcopy(tempfigure,'-dzbuffer','-r0'); +% give the image a black edge +x(1,:,:)=0; x(end,:,:)=0; x(:,1,:)=0; x(:,end,:)=0; +imsource=im2mis(x); + +delete(tempfigure); diff --git a/boosting/weightedstats/private/dfsession.m b/boosting/weightedstats/private/dfsession.m new file mode 100644 index 0000000..9ecdc7a --- /dev/null +++ b/boosting/weightedstats/private/dfsession.m @@ -0,0 +1,310 @@ +function ok=dfsession(action,fn) +%DFSESSION Clear, load, or save a Distribution Fitting session + +% $Revision: 1.1.6.7 $ $Date: 2004/02/01 22:10:39 $ +% Copyright 2003-2004 The MathWorks, Inc. + +% Create a structure with version information +str.ftype = 'Distribution Fitting session'; % type of file +str.version = 1; % the most current version +str.allversions = [1]; % all supported versions +str.properties = {'Color' 'LineStyle' 'LineWidth' 'Marker' 'MarkerSize'}; + +% Variables we save, and the number required to be in a saved file +varnames = {'ftype' 'version' 'allds' 'dsinfo' 'allfits' ... + 'fitinfo' 'alldists' 'outliers' 'guistate'}; +nrequired = 9; + +if nargin<2 + fn = ''; +end + +% Wrap real function with a try/catch block so we can control the legend +oldleg = dfgetset('showlegend'); +if isequal(oldleg,'on') + dfgetset('showlegend','off'); +end + +ok = true; +%try + switch(action) + case 'save' + ok = savesession(fn,str,varnames); + + case 'load' + ok = loadsession(fn,str,varnames,nrequired); + + case 'clear' + ok = clearsession; + + otherwise + ok = false; + end +%catch +%end + +if isequal(oldleg,'on') + dfgetset('showlegend','on'); + dfupdatelegend(dfgetset('dffig')); +end + +% ---------------------------------------------------------------- +function ok=savesession(fn,str,varnames) +%DFSAVESESSION Callback to save a distribution fitting session to a file + +% Extract some variables from the input structure +ftype = str.ftype; +version = str.version; +guistate.ftype = dfgetset('ftype'); +guistate.dtype = dfgetset('dtype'); +guistate.binDlgInfo = dfgetset('binDlgInfo'); + +% Get all M data set object instances and some properties +dsdb = getdsdb; +allds = find(dsdb); +allds(allds==dsdb) = []; +nds = length(allds); +dsinfo = cell(nds,1); +for j=1:length(allds) + % Save all datasets + dj = allds(j); + dsinfo{j,1} = dj.plot; % remember this flag separately from object +end + +% Get all M fit object instances and some properties +fitdb = getfitdb; +allfits = find(fitdb); +allfits(allfits==fitdb) = []; +nfits = length(allfits); +fitinfo = cell(nfits,1); +for j=1:nfits + % Save all fit objects separately + fj = allfits(j); + fitinfo{j,1} = fj.plot; % remember this flag separately from object +end + +% get the user-defined distributions +alldists = dfgetset('alldistributions'); + +% Get the outliers (excluded sets) +outdb = getoutlierdb; +outliers = find(outdb); +outliers(outliers==outdb) = []; + +% Get file name to use, remember the directory name +olddir = dfgetset('dirname'); +filespec = [olddir '*.dfit']; +if isempty(fn) + [fn,pn] = uiputfile(filespec,'Save Session'); + if isequal(fn,0) || isequal(pn,0) + ok = false; + return + end + if ~ismember('.',fn) + fn = [fn '.dfit']; + end + dfgetset('dirname',pn); + fn = [pn fn]; +end + +% Select a file and save the session variables +le = lasterr; +lasterr(''); +try + save(fn, varnames{:}, '-mat'); +catch + uiwait(errordlg(sprintf('Error saving session file:\n%s', lasterr),... + 'Save Error','modal')) + ok = false; + return +end +lasterr(le); +ok = true; + +% ---------------------------------------------------------------- +function ok=loadsession(fn,str,varnames,nrequired) +%DFLOADSESSION Callback to load a saved distribution fitting session + +import com.mathworks.toolbox.stats.*; + +ok = true; +ftype = str.ftype; +version = str.version; +allversions = str.allversions; +properties = str.properties; + +% Get file name and load from it, remember the directory name +olddir = dfgetset('dirname'); +filespec = [olddir '*.dfit']; + +if isempty(fn) + [fn,pn] = uigetfile(filespec,'Load Session'); + if isequal(fn,0) || isequal(pn,0) + return + end + if ~ismember('.',fn) + fn = [fn '.cfit']; + end + dfgetset('dirname',pn); + fn = [pn fn]; +end + +% Clear current session +clearsession; + +% Get file contents without adding them to the data bases automatically +dsmgr = DataSetsManager.getDataSetsManager; +fmgr = FitsManager.getFitsManager; +dsmgr.turnOffUDDListener; +fmgr.turnOffUDDListener; +try + s = load('-mat',fn); +catch + uiwait(errordlg(sprintf('Error loading session file:\n%s', lasterr),... + 'Load Error','modal')) + dsmgr.turnOnUDDListener; + return +end +dsmgr.turnOnUDDListener; +fmgr.turnOnUDDListener; + +for j=1:nrequired + if ~isfield(s,varnames{j}) + uiwait(errordlg('Not a valid Distribution Fitting session file',... + 'File Invalid','modal')) + return + end +end +if ~isequal(s.ftype,ftype) + uiwait(errordlg('Not a valid Distribution Fitting session file',... + 'File Invalid','modal')) + return +end + +if ~ismember(s.version,allversions) + uiwait(errordlg('Bad version number in Distribution Fitting session file',... + 'Invalid Version','modal')) + return +end + +% Install the saved distribution definitions +dft = DistributionFitting.getDistributionFitting; +try + dfsetdistributions(dft,s.alldists); +catch + uiwait(errordlg(sprintf('Error loading saved session:\n%s',lasterr),... + 'File Invalid','modal')); +end + +% Reset some properties of the gui state +dffig = dfgetset('dffig'); +dfsetplottype(dffig, s.guistate.ftype, s.guistate.dtype); +if isfield(s.guistate,'binDlgInfo') + dfgetset('binDlgInfo',s.guistate.binDlgInfo); +end + +hFunctionList = findall(dffig, 'Tag', 'displaylist'); +ftypes = getappdata(hFunctionList,'codenames'); +value = strmatch(s.guistate.ftype, ftypes, 'exact'); +set(hFunctionList, 'Value', value); + +isprobplot = isequal(s.guistate.ftype, 'probplot'); +if isprobplot + hDistributionList = findall(dffig, 'Tag', 'typelist'); + dtypes = getappdata(hDistributionList,'okcodenames'); + value = strmatch(s.guistate.dtype, dtypes, 'exact'); + set(hDistributionList, 'Value', value); +end + +% Make sure distribution list is accurate +dfupdateppdists(dffig); + +% Plot datasets that are flagged for plotting +for j=1:length(s.allds); + dj = s.allds(j); + + % Now set the plot flag. Some non-serializable information + % (line handles, listeners, etc.) will be re-created at this point. + dj.line = []; + if s.dsinfo{j} + dj.plot = 1; + end + dsmgr.addDataSet(java(dj), dj.name); +end + +% Get exclusion rules (outliers) +omgr = OutliersManager.getOutliersManager; +omgr.init; + +% Fix up fit objects +fitdb = getfitdb; +outdb = getoutlierdb; +for j=1:length(s.allfits) + fj = s.allfits(j); + + % Restore all dataset handles + dsname = fj.dataset; + for k=1:length(s.allds) + if isequal(dsname,s.allds(k).name) + fj.dshandle = s.allds(k); + break; + end + end + + % Restore all exclusion rule handles + ername = fj.exclusionrulename; + if ~isempty(ername) + erhandle = find(outdb,'name',ername); + fj.exclusionrule = erhandle; + end + + % Connect this fit to the fit data base and add to fits manager + connect(fj,fitdb,'up'); + + % Now set the plot flag. Some non-serializable information + % (line handles, listeners, etc.) will be re-created at this point. + fj.line = []; + if s.fitinfo{j,1} + fj.plot = 1; + end +end + + +% ---------------------------------------------------------------- +function ok=clearsession +%DFCLEARSESSION Callback to clear distribution fitting session + +ok = true; + +% Trigger java listeners to clear all saved java content +import com.mathworks.toolbox.stats.*; +DFToolClearManager.getDFToolClearManager.listenerTrigger; + +% Delete all udd fit object instances +fitdb = getfitdb; +ft = down(fitdb); +while(~isempty(ft)) + ftnew = right(ft); + delete(ft); + ft = ftnew; +end + +% Delete all udd data set object instances +dsdb = getdsdb; +dj = down(dsdb); +while(~isempty(dj)) + if dj.plot + dj.plot = 0; + end + djnew = right(dj); + delete(dj); + dj = djnew; +end + +% Delete all udd outlier object instances +outdb = getoutlierdb; +outliers = find(outdb); +outliers(outliers==outdb) = []; +delete(outliers); + +%init (behind the scenes) analysis and plot diff --git a/boosting/weightedstats/private/dfsetbinwidthrules.m b/boosting/weightedstats/private/dfsetbinwidthrules.m new file mode 100644 index 0000000..ba145cf --- /dev/null +++ b/boosting/weightedstats/private/dfsetbinwidthrules.m @@ -0,0 +1,41 @@ +function dfsetbinwidthrules(dataset, v0, v1, v2, v3, v4, v5, v6) +% DFSETBINWIDTHRULES Helper function for the dfittool set default bin width panel + +% Copyright 2003-2004 The MathWorks, Inc. +% $Revision: 1.1.6.3 $ $Date: 2004/01/24 09:35:47 $ + +binDlgInfo.rule = v0; +binDlgInfo.nbinsExpr = v1; +binDlgInfo.nbins = str2num(v1); +binDlgInfo.widthExpr = v2; +binDlgInfo.width = str2num(v2); +binDlgInfo.placementRule = v3; +binDlgInfo.anchorExpr = v4; +binDlgInfo.anchor = str2num(v4); +binDlgInfo.applyToAll = v5; +binDlgInfo.setDefault = v6; + + +if (v5 == true) % apply to all + dsdb = getdsdb; + ds = down(dsdb); + while(~isempty(ds)) + ds.binDlgInfo = binDlgInfo; + ds = right(ds); + end + dfupdateallplots(true, false, true); +elseif ~isempty(dataset) + ds = handle(dataset); + ds.binDlgInfo = binDlgInfo; + clearplot(ds); + updateplot(ds); +end + +% Update the main fig axis limits to fit the new histograms +dfupdatexlim; +dfupdateylim; + +if (v6 == true) % set default + dfgetset('binDlgInfo', binDlgInfo); +end + diff --git a/boosting/weightedstats/private/dfsetconflev.m b/boosting/weightedstats/private/dfsetconflev.m new file mode 100644 index 0000000..d7507af --- /dev/null +++ b/boosting/weightedstats/private/dfsetconflev.m @@ -0,0 +1,66 @@ +function dfsetconflev(dffig,clev) +%DFSETCONFLEV Set confidence level for curve fitting + +% $Revision: 1.1.6.2 $ $Date: 2004/01/24 09:35:48 $ +% Copyright 2003-2004 The MathWorks, Inc. + +% Get new value +oldlev = dfgetset('conflev'); +if isempty(clev) + ctxt = inputdlg({'Confidence level (in percent):'},... + 'Set Confidence Level',1,{num2str(100*oldlev)}); + if isempty(ctxt) + clev = oldlev; + else + ctxt = ctxt{1}; + clev = str2double(ctxt); + if ~isfinite(clev) | ~isreal(clev) | clev<=0 | clev>=100 + errordlg(sprintf(... + ['Bad confidence level "%s".\n' ... + 'Must be a percentage larger than 0 and smaller than 100.\n' ... + 'Keeping old value %g.'],... + ctxt,100*oldlev),... + 'Error','modal'); + clev = oldlev; + else + clev = clev/100; + end + end +end +if oldlev~=clev + dfgetset('conflev',clev); + + % Update any existing data sets and fits + dsdb = getdsdb; + ds = down(dsdb); + while(~isempty(ds)) + ds.confLev = clev; + ds = right(ds); + end + fitdb = getfitdb; + ft = down(fitdb); + while(~isempty(ft)) + ft.confLev = clev; + ft = right(ft); + end + dfupdateylim; + + % Check the appropriate menu item + h = findall(dffig,'Type','uimenu','Tag','conflev'); + set(h,'Checked','off'); + verysmall = sqrt(eps); + if abs(clev-.95)=3 + dfgetset('dtype',dtype); +else + dtype = []; +end + +% Get rid of axis controls +if isequal(dfgetset('showaxlimctrl'),'on') + dftoggleaxlimctrl(dffig); +end + +% Determine and remember if this function type supports bounds +oktypes = {'cdf' 'survivor' 'cumhazard' 'icdf'}; +dobounds = ismember(ftype, oktypes); +dfgetset('dobounds',dobounds); + +% Get the array of data sets +dsdb = getdsdb; +ds = down(dsdb); + +if ~isequal(oldftype,ftype) + % Change the function type for each one + while(~isempty(ds)) + setftype(ds,ftype); + ds = right(ds); + end +elseif isequal(ftype,'probplot') && ~isequal(olddtype,dtype) + while(~isempty(ds)) + clearplot(ds); + ds = right(ds); + end +end + +% Get the array of fits +fitdb = getfitdb; +ft = down(fitdb); +referenceFit = []; + +if ~isequal(oldftype,ftype) + % Change the function type for each one + while(~isempty(ft)) + setftype(ft,ftype); + ft = right(ft); + end +end + +% Determine if a specific set of parameters (reference fit) is required +if isequal(ftype,'probplot') + if ishandle(dtype) + referenceFit = dtype; + else + referenceFit = []; + end +end + +% Update title, labels, appdata, and (for probability plots) axes +ax = get(dffig,'CurrentAxes'); +newplot(ax); +setappdata(ax,'ReferenceDistribution',''); +setappdata(ax,'CdfFunction',''); +setappdata(ax,'InverseCdfFunction',''); +setappdata(ax,'DistributionParameters',''); +setappdata(ax,'LogScale',''); + +% Define the colors to be used here +a = [3 0 2 1 3 3 3 2 2 0 2 3 0 1 2 1 0 1 0 1 1 + 0 0 1 1 0 3 2 2 1 2 0 1 3 2 3 0 1 3 0 2 0 + 0 3 0 1 3 0 1 2 3 1 1 2 0 3 1 2 2 2 0 0 2]'/3; +set(ax,'ColorOrder',a); + +% Turn on grid if requested +if isequal(dfgetset('showgrid'), 'on') + set(ax,'xgrid','on','ygrid','on') +end + +if isequal(ftype,'probplot') + if isempty(referenceFit) + probplot(ax,dtype); + else + probplot(ax,{referenceFit.distspec, referenceFit.params}) + end + title(ax,''); +elseif isequal(ftype,'icdf') + set(get(ax,'XLabel'),'String','Probability'); + set(get(ax,'YLabel'),'String','Quantile'); +else + othertypes = {'pdf' 'cdf' ... + 'survivor' 'cumhazard'}; + otherlabels = {'Density' 'Cumulative probability' ... + 'Survivor function' 'Cumulative hazard'}; + jtype = strmatch(lower(ftype),othertypes,'exact'); + if isempty(jtype) % should never happen + ylab = ftype; + else + ylab = otherlabels{jtype}; + end + set(get(ax,'XLabel'),'String','Data'); + set(get(ax,'YLabel'),'String',ylab); +end +set(ax, 'box','on','Tag','main',... + 'XLimMode','manual','YLimMode','manual','ZLimMode','manual',... + 'CLimMode','manual','AlimMode','manual'); + +% Reset the x limits, update plotted curves, and set y limits +dfupdateallplots(true,false); % update data sets +dfupdatexlim([],false); % get new x limits +dfupdateallplots(false,true); % update fits +dfupdateylim; % now compute y limits + +% Update the legend +dfupdatelegend(dffig); + +% Make sure each data set's plot property is enabled as appropriate +if isequal(oldftype,'probplot') || isequal(ftype,'probplot') + ds = down(dsdb); + while(~isempty(ds)) + com.mathworks.toolbox.stats.DataSetsManager.getDataSetsManager.dataSetChanged(... + java(ds), ds.name, ds.name); + ds = right(ds); + end +end \ No newline at end of file diff --git a/boosting/weightedstats/private/dfsetplottype.m b/boosting/weightedstats/private/dfsetplottype.m new file mode 100644 index 0000000..8f455c8 --- /dev/null +++ b/boosting/weightedstats/private/dfsetplottype.m @@ -0,0 +1,50 @@ +function dfsetplottype(dffig,ftype,dtype) +%DFSETPLOTTYPE Set plot type and distribution for distribution fitting + +% $Revision: 1.1.6.4 $ $Date: 2004/01/24 09:35:51 $ +% Copyright 2003-2004 The MathWorks, Inc. + +hFunctionList = findall(dffig, 'Tag', 'displaylist'); +hDistributionList = findall(dffig, 'Tag', 'typelist'); + +% Determine plot type +if nargin<2 || isempty(ftype) + choice = get(hFunctionList,'Value'); + ftypes = getappdata(hFunctionList,'codenames'); + ftype = ftypes{choice}; +end +isprobplot = isequal(ftype, 'probplot'); + +% Make sure distribution list is accurate +dfupdateppdists(dffig); + +% Get distribution type for probability plot +if isprobplot && (nargin<3 || isempty(dtype)) + choice = get(hDistributionList,'Value'); + dtypes = getappdata(hDistributionList,'okcodenames'); + ntypes = length(dtypes); + + % Choose either a distribution name or a fit + if choice<=ntypes + dtype = dtypes{choice}; + else + flist = getappdata(hDistributionList, 'fitnames'); + fname = flist{choice-ntypes}; + dtype = find(getfitdb, 'name', fname); + end +elseif ~isprobplot + dtype = []; +end + +% Enable or disable the distribution field for probability plots +typetext = findall(dffig, 'tag', 'typetext'); +if isprobplot + set(hDistributionList, 'Enable', 'on'); + set(typetext, 'Enable', 'on'); +else + set(hDistributionList, 'Enable', 'off'); + set(typetext, 'Enable', 'off'); +end + +% Update everything to use this type +dfsetfunction(dffig,ftype,dtype); diff --git a/boosting/weightedstats/private/dftips.m b/boosting/weightedstats/private/dftips.m new file mode 100644 index 0000000..d79ce72 --- /dev/null +++ b/boosting/weightedstats/private/dftips.m @@ -0,0 +1,210 @@ +function dftips(varargin) +%DFTIPS Display data and fit tips for Distribution Fitting figure + +%No input arguments are used here. + +% $Revision: 1.1.6.4 $ $Date: 2004/01/24 09:35:52 $ +% Copyright 2001-2004 The MathWorks, Inc. + + +dffig = gcbf; +if ~isequal(get(dffig,'SelectionType'),'normal') + return; +end + +h = hittest; +dftip = findobj(dffig,'tag','dftip'); +dfdot = findobj(dffig,'tag','dfdot'); +ftype = dfgetset('ftype'); + +% Figure out if the cursor is on something we know how to label +msg = ''; +if (~isempty(h)) && ishandle(h) && isequal(get(h,'type'),'line') + ax = get(h,'Parent'); + if isempty(ax) || ~isequal(get(ax,'Type'),'axes') + ax = get(dffig,'CurrentAxes'); + end + pt = get(ax,'CurrentPoint'); + x = pt(1,1); + y = pt(1,2); + htag = get(h,'tag'); + xlim = get(ax,'XLim'); + ylim = get(ax,'YLim'); + x = max(xlim(1), min(xlim(2),x)); + y = max(ylim(1), min(ylim(2),y)); + dx = diff(xlim) * 0.02; + dy = 0; + + % Create a label for the the selected data set + switch(htag) + case {'dfdata' 'dfdbounds'} + x = rounder(x,ax); % make x a rounder number + + ds = get(h,'UserData'); + h = ds.line; + xd = get(h,'XData'); + yd = get(h,'YData'); + if isequal(ftype,'probplot') + % Label the closest point on the probability plot + xyd = abs((xd-x)/diff(xlim)) + abs((yd-y)/diff(ylim)); + [ignore,jlo] = min(xyd); + if ~isempty(jlo) + jlo = jlo(1); + x = xd(jlo); + y = yd(jlo); + cdffunc = getappdata(ax,'CdfFunction'); + plotparams = getappdata(ax,'DistributionParameters'); + yname = feval(cdffunc,y,plotparams{:}); + msg = sprintf('%s\n(%g, %g)',ds.name,x,yname); + end + + elseif isequal(ftype,'pdf') + % Label a bar of the histogram + jlo = max(find(xd<=x)); + if ~isempty(jlo) && jlo0 + if isequal(get(ax,'XScale'),'linear') + pwr = floor(log10(0.005 * xrange)); + mult = 10 ^ -pwr; + x = round(mult * x) / mult; + elseif x>0 + pwr = floor(log10(x)) - 2; + mult = 10 ^ -pwr; + x = round(mult * x) / mult; + end +end diff --git a/boosting/weightedstats/private/dftoggleaxlimctrl.m b/boosting/weightedstats/private/dftoggleaxlimctrl.m new file mode 100644 index 0000000..53288d5 --- /dev/null +++ b/boosting/weightedstats/private/dftoggleaxlimctrl.m @@ -0,0 +1,27 @@ +function dftoggleaxlimctrl(dffig) +%DFTOGGLEAXLIMCTRL Toggle x and y axis limit controls on or off + +% $Revision: 1.1.6.2 $ $Date: 2004/01/24 09:35:53 $ +% Copyright 2003-2004 The MathWorks, Inc. + +% Get handle to menu item, may be current object or may not +h = gcbo; +if ~isequal(get(h,'Tag'),'showaxlimctrl') + h = findall(dffig,'Tag','showaxlimctrl'); +end + +% Get new state +onoff = on2off(get(h,'Checked')); +dfgetset('showaxlimctrl',onoff); + +% Add or remove controls +dfaxlimctrl(dffig,onoff) + +% Remove effects of controls on layout +if isequal(onoff,'off') + dfadjustlayout(dffig); +end + +% Change menu state +set(h,'Checked',onoff); + diff --git a/boosting/weightedstats/private/dftogglegrid.m b/boosting/weightedstats/private/dftogglegrid.m new file mode 100644 index 0000000..edb8156 --- /dev/null +++ b/boosting/weightedstats/private/dftogglegrid.m @@ -0,0 +1,32 @@ +function togglegrid(dffig) +%TOGGLEGGRID Toggle x and y axes grid on or off + +% $Revision: 1.1.6.2 $ $Date: 2004/01/24 09:35:54 $ +% Copyright 2003-2004 The MathWorks, Inc. + +% Get new state -- note uimenu state reflects old state, and +% uitoggletool state reflects new state +h = gcbo; +if isequal(get(h,'Type'),'uimenu') + onoff = on2off(get(h,'Checked')); +else + onoff = get(h,'State'); +end +dfgetset('showgrid',onoff); + +% Change grid +ax = findall(dffig,'Type','axes'); +for j=1:length(ax) + if ~isequal(get(ax(j),'Tag'),'legend') + set(ax(j),'xgrid',onoff,'ygrid',onoff) + end +end + +% Change menu state +h = findall(dffig,'Type','uimenu','Tag','showgrid'); +if ~isempty(h), set(h,'Checked',onoff); end + +% Change button state +h = findall(dffig,'Type','uitoggletool','Tag','showgrid'); +if ~isempty(h), set(h,'State',onoff); end + diff --git a/boosting/weightedstats/private/dftogglelegend.m b/boosting/weightedstats/private/dftogglelegend.m new file mode 100644 index 0000000..57d0e38 --- /dev/null +++ b/boosting/weightedstats/private/dftogglelegend.m @@ -0,0 +1,32 @@ +function togglelegend(dffig) +%TOGGLELEGEND Toggle curve fit plot legend on or off + +% $Revision: 1.1.6.2 $ $Date: 2004/01/24 09:35:55 $ +% Copyright 2003-2004 The MathWorks, Inc. + +% Get new state -- note uimenu state reflects old state, and +% uitoggletool state reflects new state +h = gcbo; +if isequal(get(h,'Type'),'uimenu') + onoff = on2off(get(h,'Checked')); +else + onoff = get(h,'State'); +end +dfgetset('showlegend',onoff); + +% Change menu state +h = findall(dffig,'Type','uimenu','Tag','showlegend'); +if ~isempty(h), set(h,'Checked',onoff); end + +% Change button state +h = findall(dffig,'Type','uitoggletool','Tag','showlegend'); +if ~isempty(h), set(h,'State',onoff); end + +% Forget previous legend location +if isequal(onoff,'off') + dfgetset('legendpos',[]); + dfgetset('rlegendpos',[]); +end + +% Change legend state +dfupdatelegend(dffig); diff --git a/boosting/weightedstats/private/dftoggletoolbar.m b/boosting/weightedstats/private/dftoggletoolbar.m new file mode 100644 index 0000000..ec71fcf --- /dev/null +++ b/boosting/weightedstats/private/dftoggletoolbar.m @@ -0,0 +1,26 @@ +function toggletoolbar(varargin) +%TOGGLETOOLBAR Toggle distribution fit plot toolbar on or off + +% $Revision: 1.1.6.2 $ $Date: 2004/01/24 09:35:56 $ +% Copyright 2003-2004 The MathWorks, Inc. + +if (nargin>0 & ishandle(varargin{1}) & ... + isequal(get(varargin{1},'Type'),'figure')) + dffig = varargin{1}; +else + dffig = gcbf; +end + +tbstate = get(dffig,'toolbar'); +h = findall(dffig,'Type','uitoolbar'); +if isequal(tbstate,'none') | isempty(h) + % Create toolbar for the first time + set(dffig,'toolbar','figure'); + dfadjusttoolbar(dffig); +elseif nargin>1 & isequal(varargin{2},'on') + % Hide toolbar + set(h,'Visible','on'); +else + % Show toolbar + set(h,'Visible','off'); +end diff --git a/boosting/weightedstats/private/dftoolgetudd.m b/boosting/weightedstats/private/dftoolgetudd.m new file mode 100644 index 0000000..3f3ff77 --- /dev/null +++ b/boosting/weightedstats/private/dftoolgetudd.m @@ -0,0 +1,18 @@ +function javaudd=dftoolgetudd(uddcmd,varargin); + +% $Revision: 1.1.6.2 $ $Date: 2004/01/24 09:35:57 $ +% Copyright 2003-2004 The MathWorks, Inc. + +% unwrap any UDD objects +for i=1:length(varargin) + if isa(varargin{i}, 'com.mathworks.jmi.bean.UDDObject') + varargin{i}=handle(varargin{i}); + end +end + +% wrap the return UDD object +if nargin == 1 + javaudd=java(eval(uddcmd)); +else + javaudd=java(feval(uddcmd,varargin{:})); +end diff --git a/boosting/weightedstats/private/dftoolinittemplate.m b/boosting/weightedstats/private/dftoolinittemplate.m new file mode 100644 index 0000000..38ef474 --- /dev/null +++ b/boosting/weightedstats/private/dftoolinittemplate.m @@ -0,0 +1,185 @@ +function s = dfittooldists +%DFITTOOLDISTS Initialize dfittool with custom distributions. +% +% S=DFITTOOLDISTS is called during the initialization of DFITTOOL to get +% any custom distributions you may want to define. This function should +% appear somewhere on your MATLAB path. You can edit it to define +% distributions that you want to be available for fitting in DFITTOOL. +% S is a structure or an array of structures with fields as defined below. +% +% You can load this after initialization by using the menu item +% File -> Import Custom Distributions. In that case you are not +% restricted to use the name DFITTOOLDISTS. +% +% See also DFITTOOL. + +% Copyright 2001-2004 The MathWorks, Inc. +% $Revision: 1.1.6.5 $ $Date: 2004/01/24 09:35:58 $ + +% Create a structure to receive distribution information +s = struct; + +% --------------------------------------------------------- +% ---- Remove the following return statement to define the +% ---- Laplace distributon +% --------------------------------------------------------- +return + +% Laplace (double exponential) distribution definition +j = 1; % custom distribution #1 +s(j).name = 'Laplace'; % name for display +s(j).pnames = {'mu' 'sigma'}; % names of parameters +s(j).pdescription = {'location' 'scale'}; % descriptions of parameters +s(j).cdffunc = @laplcdf; % function to compute cdf +s(j).pdffunc = @laplpdf; % function to compute density +s(j).invfunc = @laplinv; % function to compute inverse cdf +s(j).fitfunc = @laplfit; % function to do fit +s(j).statfunc = @laplstat; % function to compute mean and var +s(j).islocscale = true; % location/scale distribution? + + +% ---------------------------------------------------------------- +% ---- To enter your own distribution below, remove the following +% ---- return statement and modify the other statements as necessary +% ---------------------------------------------------------------- +return + +% Increment index +j = j+1; + +% Enter a text string to display as distribution name +s(j).name = 'Enter Name Here'; + +% Enter the names of the parameters +s(j).pnames = {'p1', 'p2'}; + +% (optional) Enter a short description of each parameter (default empty) +%s(j).pdescription = {'location' 'scale'}; + +% (optional) Enter a vector indicating whether each parameter must have +% its value specified (true) or if it can be estimated (false, default) +%s(j).prequired = [false false]; + +% Enter function handles to compute the cdf, pdf, and inverse cdf +s(j).cdffunc = @yourcdf; +s(j).pdffunc = @yourpdf; +s(j).invfunc = @yourinv; + +% (optional) Enter a function handle to compute the mean and variance +%s(j).statfunc = @yourstat; + +% (optional) Enter a code name to use internally (default is lower case +% version of the distribution name) +%s(j).code = 'entercode'; + +% (optional) Is this a continuouse distribution (true, default) or is it +% on the integers only (false) +%s(j).iscontinuous = true; + +% (optional) Is this distribution a location/scale family (default false) +%s(j).islocscale = false; + +% (optional) Define a function that can fit this distribution (default none) +%s(j).fitfunc = @yourfit; + +% (optional) Define a function that can compute the negative log +% likelihood (default none) +%s(j).likefunc = @yourlike; + +% (optional) Define functions that can compute the cdf and inverse cdf +% on the log scale, for example @normcdf and @norminv can do this for +% the lognormal distribution (default none) +%s(j).logcdffunc = @yourlogcdf; +%s(j).loginvfunc = @yourloginv; + +% (optional) Do the cdf and inverse cdf functions return confidence bounds +% as additional outputs? (default false) +%s(j).hasconfbounds = false; + +% (optional) Does the fit function support censoring? (default false) +%s(j).censoring = false; + +% (optional) Does the fit function return the fitted parameters as a +% single vector (true, default) or as separate scalars (false) +%s(j).paramvec = true; + +% (optional) Enter a two-element vector defining the range over which +% this distribution gives positive probability, such as: +% [-Inf Inf] The entire real line (default) +% [0 Inf] Positive values only +% [-1 1] Values between -1 and 1 only +%s(j).support = [-Inf Inf]; + +% (optional) Enter a two-element vector specifying whether a data value +% can be exactly at the boundary (true) or must be strictly within the +% boundary (false, default for both end points) +%s(j).closedbound = [false false]; + +% (optional) Should the probability plot be on the log scale? (default false) +%s(j).uselogpp = false; + + +% --------------------------------------------------- +% ---- Define additonal distributions similarly below +% --------------------------------------------------- + + + +% ----------------------------------------------------------------------- +% ---- Include any local functions required for your custom distributions +% ----------------------------------------------------------------------- + +function p=laplfit(x,alpha) +%LAPLFIT Fit the Laplace distribution + +% Fitting for this distribution is simple: +% mu = median(x); +% sigma = mean(abs(x-mu)); +% p = [mu sigma]; + +% For illustration, though, the following lines use a nonlinear +% fitting function that could also be used to fit other distributions + +% Starting values are needed: +mu = mean(x); +sigma = std(x); + +% Fit the Laplace distribution +p = mle(x,'pdf',@laplpdf,'cdf',@laplcdf,'start',[mu, sigma]); + + +function f=laplpdf(x,mu,sigma) +%LAPLDF Laplace distribution probability density function +if nargin<2, mu=0; end +if nargin<3, sigma=1; end +z = (x-mu)./sigma; +f = exp(-abs(z))/(2*sigma); + + +function f=laplcdf(x,mu,sigma) +%LAPLCDF Laplace distribution cumulative distribution function +if nargin<2, mu=0; end +if nargin<3, sigma=1; end +f = zeros(size(x)); +z = (x-mu)./sigma; +t = (z<=0); +f(t) = exp(z(t))/2; +f(~t) = 1 - exp(-z(~t))/2; + + +function z=laplinv(p,mu,sigma) +%LAPLINV Laplace distribution inverse cumulative distribution function +if nargin<2, mu=0; end +if nargin<3, sigma=1; end +z = zeros(size(p)); +t = (p<=.5); +z(t) = log(2*p(t)); +z(~t) = -log(1-2*(p(~t)-.5)); +z = mu + sigma*z; + + +function [m,v]=laplstat(mu,sigma) +%LAPLSTAT Laplace distribution statistics +m = mu; +v = 2 * sigma.^2; + diff --git a/boosting/weightedstats/private/dfupdateallplots.m b/boosting/weightedstats/private/dfupdateallplots.m new file mode 100644 index 0000000..9b034f0 --- /dev/null +++ b/boosting/weightedstats/private/dfupdateallplots.m @@ -0,0 +1,66 @@ +function dfupdateallplots(dods,dofit,force) +%DFUPDATEALLPLOTS Call update methods for all data sets and fits + +% $Revision: 1.1.6.5 $ $Date: 2004/01/24 09:35:59 $ +% Copyright 2003-2004 The MathWorks, Inc. + +le = lasterr; +msg = ''; +if nargin<3 + force = false; +end + +% Supply defaults if missing or if called as a listener (args not logical) +if nargin<1 || ~islogical(dods) + dods = true; +end +if nargin<2 || ~islogical(dofit) + dofit = true; +end + +% Get the array of data sets and update each one +if dods + dsdb = getdsdb; + ds = down(dsdb); + while(~isempty(ds)) + try + if (force) + clearplot(ds); + end + updateplot(ds); + catch + msg = appendmsg(msg,ds.name,lasterr); + end + ds = right(ds); + end +end + +% Get the array of fits and update each one +if dofit + fitdb = getfitdb; + ft = down(fitdb); + while(~isempty(ft)) + try + updateplot(ft); + catch + msg = appendmsg(msg,ft.name,lasterr); + end + ft = right(ft); + end +end + +lasterr(le); +if ~isempty(msg) + errordlg(msg,'Error Updating Plot','modal'); +end + + +%-------------------------------------------- +function msg = appendmsg(msg,objname,newmsg) +%APPENDMSG Append a new section to an existing set of error messages + +if isempty(msg) + msg = sprintf('Error plotting %s:\n%s',objname,newmsg); +elseif ~isempty(newmsg) + msg = sprintf('%s\n\nError plotting %s:\n%s',msg,objname,newmsg); +end diff --git a/boosting/weightedstats/private/dfupdatebinwidthpreview.m b/boosting/weightedstats/private/dfupdatebinwidthpreview.m new file mode 100644 index 0000000..ea8a036 --- /dev/null +++ b/boosting/weightedstats/private/dfupdatebinwidthpreview.m @@ -0,0 +1,15 @@ +function [err, imsource] = dfupdatebinwidthpreview(dataset, width, height, v0, v1, v2, v3, v4) +% DFUPDATEBINWIDTHPREVIEW Helper function for dfittool + +% Copyright 2003-2004 The MathWorks, Inc. +% $Revision: 1.1.6.2 $ $Date: 2004/01/24 09:36:00 $ + +ds = handle(dataset); +rules.rule = v0; +rules.nbins = str2num(v1); +rules.width = str2num(v2); +rules.placementRule = v3; +rules.anchor = str2num(v4); + +[err, imsource] = dfpreview('', '', '', width, height, ds, rules); + diff --git a/boosting/weightedstats/private/dfupdatelegend.m b/boosting/weightedstats/private/dfupdatelegend.m new file mode 100644 index 0000000..b15dff1 --- /dev/null +++ b/boosting/weightedstats/private/dfupdatelegend.m @@ -0,0 +1,220 @@ +function dfupdatelegend(dffig,reset) +%DFUPLDATELEGEND Update legend in dfittool window + +% $Revision: 1.1.6.5 $ $Date: 2004/01/24 09:36:01 $ +% Copyright 2001-2004 The MathWorks, Inc. + +if nargin<2, reset=false; end + +% If figure not passed in, find figure that contains this thing +while ~isequal(get(dffig,'parent'),0), + dffig = get(dffig,'parent'); +end + +% Remember info about old legend, if any +ax = get(dffig,'CurrentAxes'); +if isempty(get(ax,'Children')) + legh = []; +else + legh = legend('-find',ax); +end + +% Try to put the legend where the curves are not likely to be. +% Position "0" is supposed to do this, but it is deprecated +% and can be slow. Instead use a heurisitic. +ftype = dfgetset('ftype'); +if ischar(ftype) && ~isempty(ftype) && ismember(ftype, {'pdf' 'survivor'}) + % The survivor function is decreasing. Most pdf functions drop more + % quickly to the right. "top right" is probably good for these functions. + legendpos = 'TR'; +else + % The remaining ones are increasing, so "top left" is probably good. + legendpos = 'TL'; +end +legend(ax, 'off'); + +% Maybe no legend has been requested +if isequal(dfgetset('showlegend'),'off') + return +end + +% Get data line handles and labels +hh = flipud(findobj(ax,'Type','line')); +hData = findobj(hh,'flat','Tag','dfdata'); +n = length(hData); + +textData = cell(n,1); +for j=1:length(hData) + nm = ''; + ds = get(hData(j),'UserData'); + if ~isempty(ds) && ishandle(ds) && ~isempty(findprop(ds,'name')) + nm = ds.name; + end + if isempty(nm) + hData(j) = NaN; + else + textData{j} = nm; + end +end +t = ~isnan(hData); +textData = textData(t); +hData = hData(t); +sortData = 1000*(1:length(hData)); +if isempty(sortData) + maxnum = 0; +else + maxnum = max(sortData) + 1000; +end + +% Indent bounds if there are two or more data set lines +if n>1 + pre = ' '; +else + pre = ''; +end + +% Deal with confidence bounds, if any, around empirical cdf +n = length(hData); +textDataBounds = cell(n,1); +hDataBounds = repmat(NaN,n,1); +sortDataBounds = zeros(n,1); +for j=1:n + ds = get(hData(j),'UserData'); + if ds.showbounds + hbounds = ds.boundline; + if ~isempty(hbounds) && ishandle(hbounds) ... + && ~isempty(get(hbounds,'YData')) + textDataBounds{j} = [pre 'confidence bounds']; + hDataBounds(j) = hbounds; + sortDataBounds(j) = sortData(j) + .5; + end + end +end +if any(isnan(hDataBounds)) + t = isnan(hDataBounds); + textDataBounds(t) = []; + hDataBounds(t) = []; + sortDataBounds(t) = []; +end + +% Indent fits if there are two or more data lines +if (length(hData)>1) + pre = ' '; +else + pre = ''; +end + +% Get fit line handles and labels +hFit = findobj(hh,'flat','Tag','distfit'); +sortFit = NaN*hFit; +n = length(hFit); +hFitConf = NaN*zeros(n,1); +textFit = cell(n,1); +nms = cell(n,1); +for j=1:length(hFit) + try + fit = get(hFit(j),'UserData'); + nm = fit.name; + catch + nm = ''; + end + if isempty(nm) + hFit(j) = NaN; + else + nms{j} = nm; + textFit{j} = [pre nm]; + + % Find the dataset for this fit + ds = fit.dshandle; + sortFitj = maxnum + j; + for k=1:length(hData) + if isequal(ds.name,textData{k}) + sortFitj = sortData(k) + j; + break; + end + end + sortFit(j) = sortFitj; + + % Look for bounds + b = get(fit,'boundline'); + if ~isempty(b) + hFitConf(j) = b(1); + end + end +end +t = ~isnan(hFit); +nms = nms(t); +textFit = textFit(t); +hFit = hFit(t); +sortFit = sortFit(t); +hFitConf = hFitConf(t); + + +% Indent bounds if there are two or more fits +if (length(hFit)>1) + pre = [pre ' ']; +end + +% Get confidence bound line handles and labels +n = length(hFitConf); +textFitBounds = cell(n,1); +sortFitBounds = zeros(size(hFitConf)); +for j=1:length(hFitConf) + if ~isnan(hFitConf(j)) && ishandle(hFitConf(j)) ... + && ~isempty(get(hFitConf(j),'XData')) + textFitBounds{j} = sprintf('%sconfidence bounds (%s)',pre,nms{j}); + sortFitBounds(j) = sortFit(j) + 0.5; + else + hFitConf(j) = NaN; + end +end +t = ~isnan(hFitConf); +textFitBounds = textFitBounds(t); +hFitConf = hFitConf(t); +sortFitBounds = sortFitBounds(t); + +% Combine everything together for the legend +h = [hData(:); hDataBounds(:); hFit(:); hFitConf(:)]; +c = [textData; textDataBounds; textFit; textFitBounds]; +s = [sortData(:); sortDataBounds(:); sortFit(:); sortFitBounds(:)]; + +% Sort so related things are together +[s,j] = sort(s); +c = c(j); +h = h(j); + +% Create the legend +if (length(h)>0) + ws = warning; + lw = lastwarn; + warning('off'); + oldu = get(ax,'Units'); + set(ax,'Units','normalized'); + try + havecoords = isnumeric(legendpos) && length(legendpos)>1; + if havecoords + temppos = 0; + else + temppos = legendpos; + end + [legh,objh] = legend(ax,h,c,temppos); + if havecoords + set(legh,'Units','points'); + newpos = get(legh,'Position'); + newpos(1:2) = legendpos(1:2); + set(legh,'Position',newpos); + end + catch + objh = []; + end +% set(ax,'Units',oldu); + warning(ws); + lastwarn(lw); + + % Avoid treating ds/fit names as TeX strings + objh = findobj(objh,'flat','Type','text'); + set(objh,'Interpreter','none','Hittest','off'); +end + +% Set a resize function that will handle legend and layout +set(dffig,'resizefcn','dfittool(''adjustlayout'');'); diff --git a/boosting/weightedstats/private/dfupdateppdists.m b/boosting/weightedstats/private/dfupdateppdists.m new file mode 100644 index 0000000..4a9e8f4 --- /dev/null +++ b/boosting/weightedstats/private/dfupdateppdists.m @@ -0,0 +1,131 @@ +function dfupdateppdists(dffig) +%DFUPDATEPPDISTS Update distribution list for probability plots + +% $Revision: 1.1.6.5 $ $Date: 2004/01/24 09:36:02 $ +% Copyright 2003-2004 The MathWorks, Inc. + +if nargin<1 || isempty(dffig) + dffig = dfgetset('dffig'); +end + +% Get handle to control containing the distribution list +hsel = getappdata(dffig,'selectioncontrols'); + +% Determine plot type, don't continue unless it's a probability plot +h = hsel(3); % handle of display type control +choice = get(hsel(3),'Value'); +ftypes = getappdata(hsel(3),'codenames'); +ftype = ftypes{choice}; +if ~isequal(ftype, 'probplot'); + return +end + +% Look at all plotted data sets and check for negative data +dsdb = getdsdb; +dslist = find(dsdb); +dslist(dslist==dsdb) = []; +nds = length(dslist); +xmin = 1; +for j=1:nds + ds = dslist(j); + if ds.plot + xlimits = xlim(ds); + if (xlimits(1) < xmin) + xmin = xlimits(1); + if (xmin<0) + break + end + end + end +end + +% Get the current selection, and try to re-select it later +h = hsel(5); % handle of distribution control +dlist = get(h, 'String'); +choice = get(h, 'Value'); +if choice<=length(dlist) + cursel = dlist{choice}; +else + cursel = []; +end + +% Get the distribution entries only +dlist = getappdata(h, 'allfullnames'); +codenames = getappdata(h, 'allcodenames'); +ndists = length(dlist); + +% With 0 or negative data showing, omit some distributions +if (xmin<=0) + ok = true(ndists,1); + allspec = getappdata(h,'alldistspec'); + + % ------- Could vectorize the following + for j=1:ndists + spec = allspec(j); + ok(j) = checkdist(spec,xmin); + end + % ------- + + dlist = dlist(ok); + codenames = codenames(ok); + ndists = length(dlist); +end +setappdata(h,'okcodenames',codenames); + +% Create a list of fit names +fitdb = getfitdb; +flist = find(fitdb); +flist(flist==fitdb) = []; +nfits = length(flist); + +% Weed out fits that cannot be used for probability plotting +for j=nfits:-1:1 + fj = flist(j); + if ~fj.isgood || isequal(fj.fittype,'smooth') + flist(j) = []; + else + distspec = fj.distspec; + if isempty(distspec) || ~distspec.iscontinuous ... + || ~checkdist(distspec,xmin) + flist(j) = []; + end + end +end +nfits = length(flist); + +% Create combined list +newdlist = cell(ndists + nfits, 1); +newdlist(1:ndists) = dlist(:); +savelist = cell(nfits,1); +for j=1:nfits + fj = flist(j); + newdlist{ndists+j} = sprintf('Estimated %s (%s)',fj.distspec.name,fj.name); + savelist{j} = fj.name; +end +setappdata(h, 'fitnames', savelist); + +% Re-select the previous selection +set(h, 'String', newdlist); +choice = 1; +if ~isempty(cursel) + choices = strmatch(cursel,newdlist,'exact'); + if numel(choices) == 1 + choice = choices; + end +end +set(h, 'Value', choice); + + +% ----------------------------- +function ok = checkdist(spec,xmin) +%Check distribution against minimum data value + +lobnd = spec.support(1); +strict = ~spec.closedbound(1); +if strict && lobnd>=xmin + ok = false; +elseif ~strict && lobnd>xmin + ok = false; +else + ok = true; +end diff --git a/boosting/weightedstats/private/dfupdatexlim.m b/boosting/weightedstats/private/dfupdatexlim.m new file mode 100644 index 0000000..3c00daf --- /dev/null +++ b/boosting/weightedstats/private/dfupdatexlim.m @@ -0,0 +1,91 @@ +function dfupdatexlim(newminmax,updateplots) +%DFUPDATEXLIM Update the stored x axis min/max values + +% $Revision: 1.1.6.5 $ $Date: 2004/01/24 09:36:03 $ +% Copyright 2003-2004 The MathWorks, Inc. + +minmax = []; % to become new x limits +oldminmax = dfgetset('xminmax'); % previous limits +ftype = dfgetset('ftype'); +if nargin==0 + newminmax = []; +end + +if isempty(newminmax) && isequal(ftype, 'icdf') + % Default limits span most of the probability range + minmax = [.01 .99]; +elseif isempty(newminmax) + % Update limits from datasets with a plotting flag on + dsdb = getdsdb; + ds = down(dsdb); + while(~isempty(ds)) + if ds.plot == 1 + minmax = combineminmax(minmax,ds.xlim); + end + ds = right(ds); + end + + % Update from fits with a plotting flag on + fitdb = getfitdb; + ft = down(fitdb); + while(~isempty(ft)) + if ft.plot == 1 + minmax = combineminmax(minmax,xlim(ft)); + end + ft = right(ft); + end +else + minmax = newminmax; +end + +% Now update plot +dffig = dfgetset('dffig'); +if ~isempty(minmax) && isequal(zoom(dffig,'getmode'),'off') + ax = get(dffig,'CurrentAxes'); + islinscale = isequal(get(ax,'XScale'),'linear'); + if ~islinscale && any(minmax<=0) + warning('stats:dfupdatexlim:NegativeDataIgnored',... + 'Negative data ignored.'); + minmax = [1e-6 1] * max(abs(minmax)); + end + if isempty(newminmax) && ~isequal(ftype, 'icdf') + % Adjust axis limits to include a margin around plotted points + if islinscale + dx = diff(minmax) * 0.01 * [-1 1]; + if all(dx==0), dx = [-1 1]; end + else + dlogx = .01 * diff(log(minmax)); + if dlogx==0, dlogx = 1; end + dx = [minmax(1) * exp(-dlogx), minmax(2) * exp(dlogx)] - minmax; + end + elseif minmax(1)==minmax(2) + if islinscale + dx = [-1 1]; + else + dx = [minmax(1)/2, 2*minmax(1)]; + end + else + % Don't adjust the limits that were passed in or computed + dx = 0; + end + oldxlim = get(ax,'XLim'); + newxlim = minmax + dx; + if ~isequal(oldxlim,newxlim) + set(ax,'XLim',newxlim); + if nargin<2 || updateplots + dfupdateallplots(false,true); + end + end +end +dfgetset('xminmax',minmax); + +% ------------ Helper to combine old and new minmax values +function bothmm = combineminmax(oldmm,newmm) + +if isempty(oldmm) + bothmm = newmm; +elseif isempty(newmm) + bothmm = oldmm; +else + bothmm = [min(oldmm(1),newmm(1)) max(oldmm(2),newmm(2))]; +end diff --git a/boosting/weightedstats/private/dfupdateylim.m b/boosting/weightedstats/private/dfupdateylim.m new file mode 100644 index 0000000..fe75688 --- /dev/null +++ b/boosting/weightedstats/private/dfupdateylim.m @@ -0,0 +1,89 @@ +function dfupdateylim +%DFUPDATEYLIM Update the y axis min/max values + +% $Revision: 1.1.6.4 $ $Date: 2004/01/24 09:36:04 $ +% Copyright 2003-2004 The MathWorks, Inc. + +dminmax = []; % to indicate y data limits + +% Check y limits of all fits +fminmax = []; +fitdb = getfitdb; +ft = down(fitdb); +while(~isempty(ft)) + if ft.plot==1 && ~isempty(ft.linehandle) && ishandle(ft.linehandle) + fminmax = combineminmax(fminmax,ft.ylim); + else + ds = ft.dshandle; + if ds.plot==1 && ~isempty(ds.line) && ishandle(ds.line) + fminmax = combineminmax(fminmax,ds.ylim); + end + end + ft = right(ft); +end + +% Check any datasets with a plotting flag on +dsdb = getdsdb; +ds = down(dsdb); +while(~isempty(ds)) + if ds.plot == 1 + dminmax = combineminmax(dminmax,ds.ylim); + end + ds = right(ds); +end + +% Adjust data min/max to take fits into account, but don't allow +% fit extrapolations to overwhelm data values +if isempty(dminmax) + if isempty(fminmax), return; end + dminmax = fminmax; +elseif ~isempty(fminmax) + dy = diff(dminmax); + dminmax(1) = max(dminmax(1)-dy/2, min(dminmax(1),fminmax(1))); + dminmax(2) = min(dminmax(2)+dy/2, max(dminmax(2),fminmax(2))); +end + +dffig = dfgetset('dffig'); +ax = get(dffig,'CurrentAxes'); +if isequal(get(ax,'YScale'),'linear') + dy = diff(dminmax) * 0.05 * [-1 1]; + if all(dy==0) + dy = [-1 1]; + end +elseif dminmax(2)>dminmax(1) + dlogy = .01 * diff(log(dminmax)); + if (dlogy==0), dlogy = 1; end + dy = [dminmax(1) * exp(-dlogy), dminmax(2) * exp(dlogy)] - dminmax; +else + dy = 0; +end + +ftype = dfgetset('ftype'); +if isempty(ftype) % may happen during initialization + ftype = 'pdf'; +end +switch(ftype) + case {'cdf' 'survivor'} + % Bounded functions, no need to extend in either direction + dy(:) = 0; + + case {'pdf' 'cumhazard'} + % Positive functions, no need to extend below zero + dy = max(0,dy); +end + +if isequal(zoom(dffig,'getmode'),'off') + set(ax,'YLim',dminmax+dy); +end + + +% ------------ Helper to combine old and new minmax values +function bothmm = combineminmax(oldmm,newmm) + +if isempty(oldmm) + bothmm = newmm; +elseif isempty(newmm) + bothmm = oldmm; +else + bothmm = [min(oldmm(1),newmm(1)) max(oldmm(2),newmm(2))]; +end diff --git a/boosting/weightedstats/private/dfviewdata.m b/boosting/weightedstats/private/dfviewdata.m new file mode 100644 index 0000000..5204ec8 --- /dev/null +++ b/boosting/weightedstats/private/dfviewdata.m @@ -0,0 +1,14 @@ +function [D, C, F] = dfviewdata(dataset) +% DFVIEWDATA Helper function for the Curve Fitting toolbox viewdata panel +% +% [X, Y, W] = DFVIEWDATA(DATASET) +% returns the x, y and w values for the given dataset +% (in a manner that the Java GUI can use) + +% Copyright 2003-2004 The MathWorks, Inc. +% $Revision: 1.1.6.2 $ $Date: 2004/01/24 09:36:05 $ + +ds = handle(dataset); +D = ds.y; +C = ds.censored; +F = ds.frequency; diff --git a/boosting/weightedstats/private/dfviewdatapreview.m b/boosting/weightedstats/private/dfviewdatapreview.m new file mode 100644 index 0000000..45c4d56 --- /dev/null +++ b/boosting/weightedstats/private/dfviewdatapreview.m @@ -0,0 +1,21 @@ +function [imsource, D, C, F, yexp, censexp, freqexp] = dfviewdatapreview(dataset, width, height) +% DFVIEWDATA Helper function for the dfittool viewdata panel +% +% [X, Y, W] = DFVIEWDATA(DATASET) +% returns the x, y and w values for the given dataset +% (in a manner that the Java GUI can use) + +% Copyright 2003-2004 The MathWorks, Inc. +% $Revision: 1.1.6.5 $ $Date: 2004/01/24 09:36:06 $ + +ds = handle(dataset); + +[err, imsource] = dfpreview('', '', '', width, height, ds); + +[D, C, F] = dfviewdata(dataset); +C = double(C); %C might be logical - convert to double for GUI compatibility + +yexp = ds.yexp; +censexp = ds.censexp; +freqexp = ds.freqexp; + diff --git a/boosting/weightedstats/private/dfviewexcludepreview.m b/boosting/weightedstats/private/dfviewexcludepreview.m new file mode 100644 index 0000000..c0c95b9 --- /dev/null +++ b/boosting/weightedstats/private/dfviewexcludepreview.m @@ -0,0 +1,143 @@ +function [imsource, D, C, F] = dfviewexcludepreview(outlier, width, height, dsname) +% For use by DFITTOOL + +% $Revision: 1.1.6.5 $ +% Copyright 2003-2004 The MathWorks, Inc. + +tempfigure=figure('units','pixels','position',[0 0 width height], ... + 'handlevisibility','off', ... + 'integerhandle','off', ... + 'visible','off', ... + 'paperpositionmode', 'auto', ... + 'color','w'); + +NONE='(none)'; + +% We're excluding based on data in one dataset +ds = find(getdsdb,'name',dsname); +if isempty(ds) + return +end + +% Get data w/o NaNs but with no exclusion rule applied +[ydata, cens, freq] = getincludeddata(ds,[]); +ydata = real(ydata); +if isempty(cens) + cens = zeros(size(ydata)); +end +if isempty(freq) + freq = ones(size(ydata)); +end + +% Sort y and carry along the rest +[ydata,i] = sort(ydata); +cens = cens(i); +freq = freq(i); + +% Create x and y vectors to plot +n = sum(freq); +x = zeros(n,1); +y = zeros(n,1); +g = zeros(n,1); +j = 1; +x(1:freq(1)) = ydata(1); +y(1:freq(1)) = (1:freq(1))'; +g(1:freq(1)) = cens(1); +i = freq(1)+1; +for k=2:length(ydata) + for j=1:freq(k) + x(i) = ydata(k); + g(i) = cens(k); + if (i>1) && (x(i)==x(i-1)) + y(i) = y(i-1) + 1; + else + y(i) = 1; + end + i = i+1; + end +end + +o = handle(outlier); +ylo = o.YLow; +if isempty(ylo) + ylo = -Inf; +else + ylo = str2double(ylo); +end + +yhi = o.YHigh; +if isempty(yhi) + yhi = Inf; +else + yhi = str2double(yhi); +end + +ylotest = o.YLowLessEqual; +yhitest = o.YLowLessEqual; + +xlim = [min(x) max(x)]; +xlim = xlim + .05 * [-1 1] * diff(xlim); +ylim = [min(y) max(y)]; +ylim = ylim + .05 * [-1 1] * diff(ylim); +if ylim(1) == ylim(2) + ylim = [0 2]; +end +ax=axes('position',[.05 .05 .9 .9], ... + 'parent',tempfigure, ... + 'xtick',[],'ytick',[], ... + 'box','on', ... + 'visible','off', 'XLim',xlim,'YLim',ylim); + +if ylotest==0 + inbounds = x>=ylo; +else + inbounds = x>ylo; +end +if yhitest==0 + inbounds = inbounds & x<=yhi; +else + inbounds = inbounds & x= 0, as x approaches infinity, y approaches 1 and both +% derivatives approach 0. For small x and a, y is approximately x^a, so +% dgammainc(0,0) returns y == 1, dy == -Inf, and d2y == Inf. +% +% [..] = GAMMAINC(X,A,TAIL) specifies the tail of the incomplete gamma +% function when X is non-negative. Choices are 'lower' (the default) and +% 'upper'. The upper incomplete gamma function is defined as +% 1 - gammainc(x,a). +% +% Warning: When x is negative, results can be inaccurate for abs(x) > a+1. +% +% See also GAMMA, GAMMALN, GAMMAINC, PSI. + +% Copyright 1993-2004 The MathWorks, Inc. +% $Revision: 1.1.6.3 $ $Date: 2004/01/24 09:36:09 $ + +if nargin < 3 + lower = true; +else + switch tail + case 'lower', lower = true; + case 'upper', lower = false; + otherwise, error('stats:dgammainc:InvalidTailArg', ... + 'TAIL must be ''lower'' or ''upper''.'); + end +end + +% x and a must be compatible for addition. +try + y = x + a; + y(:) = NaN; +catch + error('stats:dgammainc:InputSizeMismatch', ... + 'X and A must be the same size, or scalars.') +end +dy = y; +if nargout > 2 + d2y = y; +end + +if any(a(:) < 0) + error('stats:dgammainc:NegativeArg', 'A must be non-negative.') +end + +% If a is a vector, make sure x is too. +ascalar = isscalar(a); +if ~ascalar && isscalar(x) + x = repmat(x,size(a)); +end + +% Upper limit for series and continued fraction. +amax = 2^20; + +% Approximation for a > amax. Accurate to about 5.e-5. +k = find(a > amax); +if ~isempty(k) + if ascalar + x = max(amax-1/3 + sqrt(amax/a).*(x-(a-1/3)),0); + a = amax; + else + x(k) = max(amax-1/3 + sqrt(amax./a(k)).*(x(k)-(a(k)-1/3)),0); + a(k) = amax; + end +end + +% +% Series expansion for lower incomplete gamma when x < a+1 +% +k = find(x < a+1 & x ~= 0); +if ~isempty(k) + xk = x(k); + if ascalar, ak = a; else ak = a(k); end + aplusn = ak; + del = 1; + ddel = 0; + d2del = 0; + sum = del; + dsum = ddel; + d2sum = d2del; + while norm(del,'inf') >= 100*eps(norm(sum,'inf')) + aplusn = aplusn + 1; + del = del .* xk ./ aplusn; + ddel = (ddel .* xk - del) ./ aplusn; + d2del = (d2del .* xk - 2 .* ddel) ./ aplusn; + sum = sum + del; + dsum = dsum + ddel; + d2sum = d2sum + d2del; + end + fac = exp(-xk + ak.*log(xk) - gammaln(ak+1)); + yk = fac.*sum; + % For very small a, the series may overshoot very slightly. + yk(xk > 0 & yk > 1) = 1; + if lower, y(k) = yk; else y(k) = 1 - yk; end + + dlogfac = (log(xk) - psi(ak+1)); + dfac = fac .* dlogfac; + dyk = dfac.*sum + fac.*dsum; + if lower, dy(k) = dyk; else dy(k) = -dyk; end + + if nargout > 2 + d2fac = dfac.*dlogfac - fac.*psi(1,ak+1); + d2yk = d2fac.*sum + 2.*dfac.*dsum + fac.*d2sum; + if lower, d2y(k) = d2yk; else d2y(k) = -d2yk; end + end +end + +% +% Continued fraction for upper incomplete gamma when x >= a+1 +% +k = find(x >= a+1); % & x ~= 0 +if ~isempty(k) + xk = x(k); + if ascalar, ak = a; else ak = a(k); end + n = 0; + a0 = 0; + a1 = ak; + b0 = 1; + b1 = xk; + da0 = 0; db0 = 0; da1 = 1; db1 = 0; + d2a0 = 0; d2b0 = 0; d2a1 = 0; d2b1 = 0; + g = ak ./ xk; + dg = 1 ./ xk; + d2g = 0; + d2gold = 1; % force one iteration, any nonzero value will do + % Testing d2g is the more stringent than testing g or dg. d2g may + % be zero, so use a strict inequality + while norm(d2g-d2gold,'inf') > 100*eps(norm(d2g,'inf')) + rescale = 1 ./ b1; % keep terms from overflowing + n = n + 1; + nminusa = n - ak; + d2a0 = (d2a1 + d2a0 .* nminusa - 2 .* da0) .* rescale; + d2b0 = (d2b1 + d2b0 .* nminusa - 2 .* db0) .* rescale; + da0 = (da1 + da0 .* nminusa - a0) .* rescale; + db0 = (db1 + db0 .* nminusa - b0) .* rescale; + a0 = (a1 + a0 .* nminusa) .* rescale; + b0 = 1 + (b0 .* nminusa) .* rescale; % (b1 + b0 .* nminusa) .* rescale + nrescale = n .* rescale; + d2a1 = d2a0 .* xk + d2a1 .* nrescale; + d2b1 = d2b0 .* xk + d2b1 .* nrescale; + da1 = da0 .* xk + da1 .* nrescale; + db1 = db0 .* xk + db1 .* nrescale; + a1 = a0 .* xk + a1 .* nrescale; + b1 = b0 .* xk + n; % b0 .* xk + b1 .* nrescale + d2gold = d2g; + g = a1 ./ b1; + dg = (da1 - g.*db1) ./ b1; + d2g = (d2a1 - dg.*db1 - g.*d2b1 - dg.*db1) ./ b1; + end + fac = exp(-xk + ak.*log(xk) - gammaln(ak+1)); + yk = fac.*g; + if lower, y(k) = 1 - yk; else y(k) = yk; end + + dlogfac = (log(xk) - psi(ak+1)); + dfac = fac .* dlogfac; + dyk = dfac.*g + fac.*dg; + if lower, dy(k) = -dyk; else dy(k) = dyk; end + + if nargout > 2 + d2fac = dfac.*dlogfac - fac.*psi(1,ak+1); + d2yk = d2fac.*g + 2.*dfac.*dg + fac.*d2g; + if lower, d2y(k) = -d2yk; else d2y(k) = d2yk; end + end +end + +% Handle x == 0 separately to get it exactly correct. +kx0 = find(x == 0); +if ~isempty(kx0) + if lower, y(kx0) = 0; else y(kx0) = 1; end + dy(kx0) = 0; + if nargout > 2 + d2y(kx0) = 0; + end +end + +% a == 0, x ~= 0 is already handled by the power series or continued +% fraction, now fill in dgammainc(0,0). While we're at it, make +% gammainc(x,0) or 1-gammainc(x,0) exact for any x, not just x == 0. +ka0 = find(a == 0); +if ~isempty(ka0) + if ascalar + if lower + y(:) = 1; + dy(kx0) = -Inf; + if nargout > 2, d2y(kx0) = Inf; end + else + y(:) = 0; + dy(kx0) = Inf; + if nargout > 2, d2y(kx0) = -Inf; end + end + else + ka0x0 = find(a == 0 & x == 0); + if lower + y(ka0) = 1; + dy(ka0x0) = -Inf; + if nargout > 2, d2y(ka0x0) = Inf; end + else + y(ka0) = 0; + dy(ka0x0) = Inf; + if nargout > 2, d2y(ka0x0) = -Inf; end + end + end +end diff --git a/boosting/weightedstats/private/export2wsdlg.m b/boosting/weightedstats/private/export2wsdlg.m new file mode 100644 index 0000000..5296112 --- /dev/null +++ b/boosting/weightedstats/private/export2wsdlg.m @@ -0,0 +1,448 @@ +function hDialog=export2wsdlg(checkboxLabels, defaultVariableNames, itemsToExport, varargin) +%EXPORT2WSDLG Exports variables to the workspace. +% EXPORT2WSDLG(CHECKBOXLABELS, DEFAULTVARIABLENAMES, ITEMSTOEXPORT) creates +% a dialog with a series of checkboxes and edit fields. CHECKBOXLABELS is a +% cell array of labels for the checkboxes. DEFAULTVARIABLENAMES is a cell +% array of strings that serve as a basis for variable names that appear in +% the edit fields. ITEMSTOEXPORT is a cell array of the values to be stored +% in the variables. If there is only one item to export, EXPORT2WSDLG creates +% a text control instead of a checkbox. +% +% EXPORT2WSDLG(CHECKBOXLABELS, DEFAULTVARIABLENAMES, ITEMSTOEXPORT, TITLE) +% creates the dialog with TITLE as its title. +% +% EXPORT2WSDLG(CHECKBOXLABELS, DEFAULTVARIABLENAMES, ITEMSTOEXPORT, TITLE, +% SELECTED) +% creates the dialog allowing the user to control which checkboxes are +% checked. SELECTED is a logical array, whose length is the same as +% CHECKBOXLABELS. True indicates that the checkbox should initially be +% checked, false unchecked. +% +% EXPORT2WSDLG(CHECKBOXLABELS, DEFAULTVARIABLENAMES, ITEMSTOEXPORT, TITLE, +% SELECTED, HELPFUNCTION) +% creates the dialog with a help button. HELPFUNCTION is a callback that +% displays help. +% +% EXPORT2WSDLG(CHECKBOXLABELS, DEFAULTVARIABLENAMES, ITEMSTOEXPORT, TITLE, +% SELECTED, HELPFUNCTION, FUNCTIONLIST) +% creates a dialog that enables the user to pass in FUNCTIONLIST, a cell +% array of functions and optional arguments that calcluate, then return +% the value to export. FUNCTIONLIST should be the same length as CHECKBOXLABELS. +% +% User can edit the text fields to modify the default variable names. If the +% same name appears in multiple edit fields, EXPORT2WSDLG creates a structure +% using that name. It then uses the DEFAULTVARIABLENAMES as fieldnames for +% that structure. +% +% The lengths of CHECKBOXLABELS, DEFAULTVARIABLENAMES, ITEMSTOEXPORT and +% SELECTED must all be equal. +% +% The strings in DEFAULTVARIABLENAMES must be unique. + +% Copyright 2003-2004 The MathWorks, Inc. +% $Revision: 1.1.4.1 $ $Date: 2004/04/01 16:23:58 $ + +if (~iscell(checkboxLabels) || ~iscell(defaultVariableNames) || ... + ~iscell(itemsToExport)) + error('stats:export2wsdlg:CellArrayRequired',... + ['CHECKBOXLABELS, DEFAULTVARIABLENAMES and '... + 'ITEMSTOEXPORT must all be cell arrays']); +end + +checkBoxCount = length(checkboxLabels); + +if checkBoxCount ~= length(defaultVariableNames) || ... + checkBoxCount ~= length(itemsToExport) + error('stats:export2wsdlg:InputSizeMismatch',... + ['CHECKBOXLABELS, DEFAULTVARIABLENAMES and '... + 'ITEMSTOEXPORT must all be the same length.']); +end + +if length(unique(defaultVariableNames)) ~= checkBoxCount + error('stats:export2wsdlg:BadDefaultNames',... + 'Default variable names must be unique'); +end + +if (nargin > 3) + title = varargin{1}; +else + title = 'Export To Workspace'; +end + +if (nargin > 4) + selected = varargin{2}; + if ~islogical(selected) + error('stats:export2wsdlg:BadSelected',... + 'SELECTED must be a logical array'); + elseif length(selected) ~= checkBoxCount + error('stats:export2wsdlg:InputSizeMismatch',... + 'SELECTED must be the same length as ITEMSTOEXPORT'); + end +else + selected = true(1, checkBoxCount); +end + +if (nargin > 6) % export functions + functionlist = varargin{4} + if length(functionlist) ~= checkBoxCount + error('stats:export2wsdlg:InputSizeMismatch',... + 'FUNCTIONLIST must be the same length as ITEMSTOEXPORT'); + end +else + functionlist = cell(1, checkBoxCount); +end + +hDialog = dialog('Visible', 'off', 'Name', title, 'WindowStyle', 'normal'); +variableNames = createVarNames(defaultVariableNames); + +if (nargin > 5) % help button wanted + helpButton = uicontrol(hDialog,'String', 'Help',... + 'Callback', {@HelpCallback, varargin{3}}); +else + helpButton = []; +end + +cancelButton = uicontrol(hDialog,'String', 'Cancel',... + 'Callback', {@CancelCallback, hDialog}); +okButton = uicontrol(hDialog,'String', 'OK', 'Fontweight', 'bold'); + +[checkBoxes, editFields] = layoutDialog(hDialog, okButton, cancelButton, helpButton, ... + checkboxLabels, variableNames, ... + selected, checkBoxCount); + +set(okButton, 'Callback', {@OKCallback, hDialog, checkBoxes, editFields, ... + itemsToExport, defaultVariableNames, functionlist}); +set(hDialog, 'KeyPressFcn', {@KeyPressCallback, hDialog, checkBoxes, editFields, ... + itemsToExport, defaultVariableNames, functionlist}); + +if (length(checkBoxes) > 1) + for i = 1:length(checkBoxes) + set(checkBoxes{i}, 'Callback', {@CBCallback, checkBoxes, ... + editFields, i}); + end +end + +set(hDialog, 'HandleVisibility', 'callback', 'WindowStyle', 'modal', 'Resize', 'on'); +set(hDialog, 'Visible', 'on'); + +%---------------------------------------------------------------------------- +function modifiedNames = createVarNames(defVariableNames) + % Preallocating for speed + modifiedNames = cell(1, length(defVariableNames)); + for i = 1:length(defVariableNames) + modifiedNames{i} = computename(defVariableNames{i}); + end + +%---------------------------------------------------------------------------- +function name = computename(nameprefix) + +if (evalin('base',['exist(''', nameprefix,''', ''var'');']) == 0) + name = nameprefix; + return +end + +% get all names that start with prefix in workspace +workvars = evalin('base', ['char(who(''',nameprefix,'*''))']); +% trim off prefix name +workvars = workvars(:,length(nameprefix)+1:end); + +if ~isempty(workvars) + % remove all names with suffixes that are "non-numeric" + lessthanzero = workvars < '0'; + morethannine = workvars > '9'; + notblank = (workvars ~= ' '); + notnumrows = any((notblank & (lessthanzero | morethannine)),2); + workvars(notnumrows,:) = []; +end + +% find the "next one" +if isempty(workvars) + name = [nameprefix, '1']; +else + nextone = max(str2num(workvars)) + 1; + if isempty(nextone) + name = [nameprefix, '1']; + else + name = [nameprefix, num2str(nextone)]; + end +end + +%---------------------------------------------------------------------------- +function OKCallback(obj, eventdata, dialog, cb, e, itm, defaultVariableNames, fcs) + + varnames = []; + items = []; + funcs = []; + fieldnames = []; + + if (length(e) == 1) + varnames{1} = get(e{1}, 'String'); + items{1} = itm{1}; + funcs{1} = fcs{1}; + fieldnames{1} = defaultVariableNames{1}; + else + % we only care about items that are checked + for i = 1:length(e) + if get(cb{i}, 'Value') == 1 + varnames{end + 1} = get(e{i}, 'String'); + items{end + 1} = itm{i}; + funcs{end + 1} = fcs{i}; + fieldnames{end + 1} = defaultVariableNames{i}; + end + end + + if (length(varnames) == 0) + errordlg('You must check a box to export variables', ... + 'Nothing selected','modal'); + return; + end + end + + %check for invalid and empty variable names + badnames = []; + emptynames = 0; + emptystrmsg = ''; + badnamemsg = ''; + for i = 1:length(varnames) + if strcmp('', varnames{i}) + emptynames = 1; + emptystrmsg = sprintf('%s\n', ... + 'An empty string is not a valid choice for a variable name.'); + elseif ~isvarname(varnames{i}) + badnames{end + 1} = varnames{i}; + end + end + badnames = unique(badnames); + + + if (length(badnames) > 0) + if (length(badnames) == 1) + badnamemsg = ['"' badnames{1} '"' ... + ' is not a valid MATLAB variable name.']; + elseif (length(badnames) == 2) + badnamemsg = ['"' badnames{1} '" and "' badnames{2} ... + '" are not valid MATLAB variable names.']; + else + badnamemsg = [sprintf('"%s", ', badnames{1:end-2}),... + '"' badnames{end-1} ... + '" and "' badnames{end} ... + '" are not valid MATLAB variable names.', ]; + end + end + + if (emptynames + length(badnames) > 0) + dialogname = 'Invalid variable names'; + if (emptynames + length(badnames) == 1) + dialogname = 'Invalid variable name'; + end + errordlg([emptystrmsg badnamemsg], dialogname,'modal'); + return; + end + + %check for names already in the workspace + dupnames = []; + for i = 1:length(varnames) + if evalin('base',['exist(''',varnames{i},''', ''var'');']) + dupnames{end + 1} = varnames{i}; + end + end + dupnames = unique(dupnames); + + if (length(dupnames) > 0) + dialogname = 'Duplicate variable names'; + if (length(dupnames) == 1) + queststr = ['"' dupnames{1} '"'... + ' already exists. Do you want to overwrite it?']; + dialogname = 'Duplicate variable name'; + elseif (length(dupnames) == 2) + queststr = ['"' dupnames{1} '" and "' dupnames{2} ... + '" already exist. Do you want to overwrite them?']; + else + queststr = [sprintf('"%s" , ', dupnames{1:end-2}), ... + '"' dupnames{end-1} '" and "' dupnames{end} ... + '" already exist. Do you want to overwrite them?']; + end + buttonName = questdlg(queststr, dialogname, 'Yes', 'No', 'Yes'); + if ~strcmp(buttonName, 'Yes') + return; + end + end + + + %Check for variable names repeated in the dialog edit fields + [uniqueArray ignore uniqueIndex] = unique(varnames); + for i = 1:length(uniqueArray) + found = find(uniqueIndex == i); + if (length(found) == 1) %variable name is not repeated + index = found(1); + if (isempty(funcs{index})) + itemtoexport = items{index}; + else + fun = funcs{index} + itemtoexport = feval(fun{1}, fun{2:end}) + end + assignin('base', varnames{found(1)}, itemtoexport); + else %repeated variable names + tempstruct = struct; + for j = 1:length(found) + index = found(j); + if (isempty(funcs{index})) + itemtoexport = items{index}; + else + fun = funcs{index}; + itemtoexport = feval(fun{1}, fun{2:end}) + end + tempstruct.(fieldnames{index}) = itemtoexport; + end + assignin('base', varnames{found(1)}, tempstruct); + end + end + + disp('Variables have been created in the current workspace.') + + delete(dialog); + + +%---------------------------------------------------------------------------- +function CBCallback(obj, eventdata, cb, e, num) + if (get(cb{num}, 'Value') == 0) + set(e{num}, 'Enable', 'off'); + set(e{num}, 'Backgroundcolor', [0.831373 0.815686 0.784314]); + else + set(e{num}, 'Enable', 'on'); + set(e{num}, 'Backgroundcolor', 'white'); + end + +%---------------------------------------------------------------------------- +function CancelCallback(obj, eventdata, dialog) + delete(dialog); + +%---------------------------------------------------------------------------- +function KeyPressCallback(obj, eventdata, dialog, cb, e, itm, defaultVariableNames, fcs) + asciiVal = get(dialog, 'CurrentCharacter'); + if ~isempty(asciiVal) + if (asciiVal==32 || asciiVal==13) % space bar or return is the "same" as OK + OKCallback(obj, eventdata, dialog, cb, e, itm, defaultVariableNames, fcs); + elseif (asciiVal == 27) % escape is the "same" as Cancel + delete(dialog); + end + end + +%---------------------------------------------------------------------------- +function HelpCallback(obj, eventdata, helpfun) + feval(helpfun{1}, helpfun{2:end}); + +%---------------------------------------------------------------------------- +function [cb, e] = layoutDialog(hDlg, okBut, cancelBut, helpBut, checkboxLabels, ... + variableNames, selected, itemCnt) + + EXTENT_WIDTH_INDEX = 3; % width is the third argument of extent + + POS_X_INDEX = 1; + POS_Y_INDEX = 2; + POS_WIDTH_INDEX = 3; + POS_HEIGHT_INDEX = 4; + + CONTROL_SPACING = 5; + EDIT_WIDTH = 100; + CHECK_BOX_WIDTH = 20; + DEFAULT_INDENT = 20; + + okPos = get(okBut, 'Position'); + cancelPos = get(cancelBut, 'Position'); + if isempty(helpBut) + helpPos = [0 0 0 0]; + helpWidth = 0; + else + helpPos = get(helpBut, 'Position'); + helpWidth = helpPos(POS_WIDTH_INDEX) + CONTROL_SPACING; + end + longestCBExtent = 0; + checkboxWidth = okPos(POS_WIDTH_INDEX) + cancelPos(POS_WIDTH_INDEX) + ... + helpPos(POS_WIDTH_INDEX); + ypos = okPos(POS_HEIGHT_INDEX) + okPos(POS_Y_INDEX)+ 2*CONTROL_SPACING; + cb = cell(itemCnt, 1); + e = cell(itemCnt, 1); + for i = itemCnt:-1:1 + cb{i} = uicontrol(hDlg, 'Style', 'checkbox', 'String', ... + checkboxLabels{i}); + check_pos = get(cb{i}, 'Position'); + check_pos(POS_Y_INDEX) = ypos; + extent = get(cb{i}, 'Extent'); + width = extent(EXTENT_WIDTH_INDEX); + check_pos(POS_WIDTH_INDEX) = width + CHECK_BOX_WIDTH; + set(cb{i}, 'Position', check_pos); + e{i} = uicontrol(hDlg, 'Style', 'edit', 'String', variableNames{i}, ... + 'BackgroundColor', 'white', ... + 'HorizontalAlignment', 'left'); + edit_pos = get(e{i}, 'Position'); + edit_pos(POS_Y_INDEX) = ypos; + edit_pos(POS_WIDTH_INDEX) = EDIT_WIDTH; + % cursor doesn't seem to appear in default edit height + edit_pos(POS_HEIGHT_INDEX) = edit_pos(POS_HEIGHT_INDEX) + 1; + set(e{i}, 'Position', edit_pos); + ypos = ypos + CONTROL_SPACING + edit_pos(POS_HEIGHT_INDEX); + if width > longestCBExtent + longestCBExtent = width; + end + if selected(i) + set(cb{i}, 'Value', 1) + else + set(e{i}, 'Enable', 'off'); + set(e{i}, 'Backgroundcolor', [0.831373 0.815686 0.784314]); + end + end + + % if there is only one item, make it a text control instead of a checkbox + if (itemCnt == 1) + set(cb{1}, 'Style', 'text'); + end + + % Position edit boxes + edit_x_pos = check_pos(POS_X_INDEX) + longestCBExtent + CONTROL_SPACING ... + + CHECK_BOX_WIDTH; + for i = 1:itemCnt + edit_pos = get(e{i}, 'Position'); + edit_pos(POS_X_INDEX) = edit_x_pos; + set(e{i}, 'Position', edit_pos); + end + h_pos = get(hDlg, 'Position'); + + h_pos(POS_WIDTH_INDEX) = max(edit_x_pos + edit_pos(POS_WIDTH_INDEX) + ... + CHECK_BOX_WIDTH, okPos(POS_WIDTH_INDEX) + ... + cancelPos(POS_WIDTH_INDEX) + helpWidth + ... + CONTROL_SPACING + (2 * DEFAULT_INDENT)); + h_pos(POS_HEIGHT_INDEX) = ypos; + set(hDlg, 'Position', h_pos); + + % Make sure it is on-screen + oldu = get(0,'Units'); + set(0,'Units','pixels'); + screenSize = get(0,'ScreenSize'); + set(0,'Units',oldu); + outerPos = get(hDlg,'OuterPosition'); + if outerPos(1)+outerPos(3) > screenSize(3) + outerPos(1) = screenSize(3) - outerPos(3); + end + if outerPos(2)+outerPos(4) > screenSize(4) + outerPos(2) = screenSize(4) - outerPos(4); + end + set(hDlg, 'OuterPosition', outerPos); + + x_ok = (h_pos(POS_WIDTH_INDEX))/2 - (okPos(POS_WIDTH_INDEX) + ... + helpWidth + CONTROL_SPACING + cancelPos(POS_WIDTH_INDEX))/2; + okPos(POS_X_INDEX) = x_ok; + set(okBut, 'Position', okPos); + cancelPos(POS_X_INDEX) = okPos(POS_X_INDEX) + okPos(POS_WIDTH_INDEX) + ... + CONTROL_SPACING; + set(cancelBut, 'Position', cancelPos); + if ~isempty(helpBut) + helpPos(POS_X_INDEX) = cancelPos(POS_X_INDEX) + cancelPos(POS_WIDTH_INDEX) + ... + CONTROL_SPACING; + set(helpBut, 'Position', helpPos); + end + + % Reorder the children so that tabbing makes sense + children = get(hDlg, 'children'); + children = flipud(children); + set(hDlg, 'children', children); \ No newline at end of file diff --git a/boosting/weightedstats/private/getclassindex.m b/boosting/weightedstats/private/getclassindex.m new file mode 100644 index 0000000..7514f84 --- /dev/null +++ b/boosting/weightedstats/private/getclassindex.m @@ -0,0 +1,30 @@ +function idx = getclassindex(cnames,g) +%GETCLASSINDEX Find indices for class names in another list of names +% IDX = GETCLASSINDEX(CNAMES,G) takes a list CNAMES of class names +% (such as the grouping variable values in the treefit or classify +% function) and another list G of group names (as might be supplied +% in the "prior" argument to those functions), and finds the indices +% of the CNAMES names in the G list. CNAMES should be a cell array +% of strings. G can be numbers, a string array, or a cell array of +% strings + +% Copyright 1993-2002 The MathWorks, Inc. +% $Revision: 1.1 $ $Date: 2002/03/22 22:03:02 $ + +% Convert to common string form, whether input is char, cell, or numeric +if isnumeric(g) + g = cellstr(strjust(num2str(g(:)), 'left')); +elseif ~iscell(g) + g = cellstr(g); +end + +nclasses = length(cnames); +idx = zeros(1,nclasses); + +% Look up each class in the grouping variable. +for i = 1:nclasses + j = strmatch(cnames{i}, g, 'exact'); + if ~isempty(j) + idx(i) = j(1); + end +end diff --git a/boosting/weightedstats/private/getdsdb.m b/boosting/weightedstats/private/getdsdb.m new file mode 100644 index 0000000..b7d0785 --- /dev/null +++ b/boosting/weightedstats/private/getdsdb.m @@ -0,0 +1,14 @@ +function thedsdb=getdsdb(varargin) + +% $Revision: 1.1.6.2 $ $Date: 2004/01/24 09:36:11 $ +% Copyright 2003-2004 The MathWorks, Inc. + +thedsdb = dfgetset('thedsdb'); + +% Create a singleton class instance +if isempty(thedsdb) + thedsdb = stats.dsdb; + dfgetset('thedsdb',thedsdb); +end + + diff --git a/boosting/weightedstats/private/getfitdb.m b/boosting/weightedstats/private/getfitdb.m new file mode 100644 index 0000000..2ca3900 --- /dev/null +++ b/boosting/weightedstats/private/getfitdb.m @@ -0,0 +1,15 @@ +function fitdb=getfitdb(varargin) +% GETFITDB A helper function for DFITTOOL + +% $Revision: 1.1.6.3 $ $Date: 2004/01/24 09:36:12 $ +% Copyright 2003-2004 The MathWorks, Inc. + +thefitdb = dfgetset('thefitdb'); + +% Create a singleton class instance +if isempty(thefitdb) + thefitdb = stats.fitdb; +end + +dfgetset('thefitdb',thefitdb); +fitdb=thefitdb; diff --git a/boosting/weightedstats/private/getoutlierdb.m b/boosting/weightedstats/private/getoutlierdb.m new file mode 100644 index 0000000..75087fe --- /dev/null +++ b/boosting/weightedstats/private/getoutlierdb.m @@ -0,0 +1,12 @@ +function theoutlierdb=getoutlierdb(varargin) + +% $Revision: 1.1.6.2 $ +% Copyright 2003-2004 The MathWorks, Inc. + +theoutlierdb = dfgetset('theoutlierdb'); + +% Create a singleton class instance +if isempty(theoutlierdb) + theoutlierdb = stats.outlierdb; + dfgetset('theoutlierdb',theoutlierdb); +end diff --git a/boosting/weightedstats/private/idummy.m b/boosting/weightedstats/private/idummy.m new file mode 100644 index 0000000..65aa224 --- /dev/null +++ b/boosting/weightedstats/private/idummy.m @@ -0,0 +1,34 @@ +function d = idummy(x, method) +%DUMMY Creates a matrix of dummy variables for a discrete variable +% D=IDUMMY(X,METHOD) creates an array D of dummy variables for the +% grouping variable I (integers 1,...,g), using the method specified: +% +% method = 1: 0/-1/1 coding, full rank +% method = 2: 0/1 coding, full rank +% method = 3: 0/1 coding, overdetermined + +% Copyright 1993-2002 The MathWorks, Inc. +% $Revision: 1.4 $ $Date: 2002/02/04 19:25:43 $ + +if (nargin < 2) + method = 1; +end + +n = length(x); +g = max(x); +ncols = g - (method ~= 3); +d = repmat(0, n, ncols); + +if (g > 1) + % Fill in -1 for the first level + if (method == 1) + i = find(x == 1); + d(find(x == 1),:) = -1; + end + + % Fill in 1 in the appropriate column for other levels + m3 = (method == 3); + for j=(2-m3):g + d(find(x == j),j-1+m3) = 1; + end +end diff --git a/boosting/weightedstats/private/im2mis.m b/boosting/weightedstats/private/im2mis.m new file mode 100644 index 0000000..8b3090b --- /dev/null +++ b/boosting/weightedstats/private/im2mis.m @@ -0,0 +1,204 @@ +function mis = im2mis(varargin) +%IM2MIS Convert image to Java MemoryImageSource. +% +% MIS = IM2MIS(I) converts the intensity image I to a Java +% MemoryImageSource. +% +% MIS = IM2MIS(X,MAP) converts the indexed image X with colormap MAP to a +% Java MemoryImageSource. +% +% MIS = IM2MIS(RGB) converts the RGB image RGB to a Java +% MemoryImageSource. + +% Copyright 2001-2004 The MathWorks, Inc. +% $Revision: 1.1.6.3 $ $Date: 2004/01/24 09:36:15 $ + +% This is really equivalent to images\images\im2mis.m Rev 1.3. + +% Input-output specs +% ------------------ +% I: 2-D, real, full matrix +% uint8, uint16, or double +% logical ok but ignored +% +% RGB: 3-D, real, full matrix +% size(RGB,3)==3 +% uint8, uint16, or double +% logical ok but ignored +% +% X: 2-D, real, full matrix +% uint8 or double +% if isa(X,'uint8'): X <= size(MAP,1)-1 +% if isa(X,'double'): 1 <= X <= size(MAP,1) +% logical ok but ignored +% +% MAP: 2-D, real, full matrix +% size(MAP,1) <= 256 +% size(MAP,2) == 3 +% double +% logical ok but ignored +% +% MIS: Java MemoryImageSource + +[img,map,method,msg] = ParseInputs(varargin{:}); +if ~isempty(msg) + if isstruct(msg) + error(msg) + else + error('stats:im2mis:BadImage',msg); + end +end + +% Assign function according to method +switch method + case 'intensity' + mis = im2mis_intensity(img); + case 'rgb' + mis = im2mis_rgb(img); + case 'indexed' + mis = im2mis_indexed(img,map); +end + + +%---------------------------------------------------- +function mis = im2mis_intensity(I) + +mis = im2mis_indexed(I,gray(256)); + + +%---------------------------------------------------- +function mis = im2mis_rgb(RGB) + +mis = im2mis_packed(RGB(:,:,1),RGB(:,:,2),RGB(:,:,3)); + + +%---------------------------------------------------- +function mis = im2mis_packed(red,green,blue) + +[mrows,ncols,p] = size(red); +alpha = 255*ones(mrows,ncols); +packed = bitshift(uint32(alpha),24); +packed = bitor(packed,bitshift(uint32(red),16)); +packed = bitor(packed,bitshift(uint32(green),8)); +packed = bitor(packed,uint32(blue)); +pixels = packed'; +mis = java.awt.image.MemoryImageSource(ncols,mrows,pixels(:),0,ncols); + + +%---------------------------------------------------- +function mis = im2mis_indexed(x,map) + +[mrows,ncols] = size(x); +map8 = uint8(round(map*255)); % convert color map to uint8 +% Instantiate a ColorModel with 8 bits of depth +cm = java.awt.image.IndexColorModel(8,size(map8,1),map8(:,1),map8(:,2),map8(:,3)); +xt = x'; +mis = java.awt.image.MemoryImageSource(ncols,mrows,cm,xt(:),0,ncols); + + +%------------------------------- +% Function ParseInputs +% +function [img, map, method, msg] = ParseInputs(varargin) + +% defaults +img = []; +map = []; +method = 'intensity'; +msg = ''; + +msg = nargchk(1,2,nargin,'struct'); +if ~isempty(msg); + return; +end + +img = varargin{1}; + +if ~isnumeric(img) | ~isreal(img) | issparse(img) + msg = 'Image must be real and cannot be sparse.'; + return; +end + + +switch nargin + case 1 + % figure out if intensity or RGB + if ndims(img) == 2 + method = 'intensity'; + elseif ndims(img)==3 & size(img,3)==3 + method = 'rgb'; + else + msg = 'Image must be an intensity, RGB, or indexed image.'; + return; + end + + % Convert to uint8. + if isa(img,'double') + img = uint8(img * 255 + 0.5); + + elseif isa(img,'uint16') + img = uint8(bitshift(img, -8)); + + elseif isa(img, 'uint8') + % Nothing to do. + + else + error('stats:im2mis:BadImage',... + 'Intensity or RGB image must be uint8, uint16, or double.'); + end + + case 2 + + % indexed image + method = 'indexed'; + map = varargin{2}; + + % validate map + if ~isnumeric(map) | ~isreal(map) | issparse(map) | ~isa(map,'double') + msg = 'MAP must be real, double, and cannot be sparse.'; + return; + end + + if size(map,2) ~= 3 + msg = 'MAP must be M-by-3 colormap.'; + return; + end + + ncolors = size(map,1); + if ncolors > 256 + msg = 'MAP has too many colors for 8-bit integer storage.'; + return; + end + + % validate img + if ndims(img) ~= 2 + msg = 'X must have 2 dimensions.'; + return; + end + + if isa(img,'uint8') + if max(img(:)) > ncolors-1 + msg = 'Invalid indexed image: an index falls outside colormap.'; + return; + end + elseif isa(img,'double') + if max(img(:)) > ncolors + msg = 'Invalid indexed image: an index falls outside colormap.'; + return; + end + if min(img(:)) < 1 + msg = 'Invalid indexed image: an index was less than 1.'; + return; + end + + img = uint8(img - 1); + else + msg = 'X must be uint8 or double.'; + return; + end + + otherwise + msg = 'Internal problem: too many input arguments.'; + return; + +end diff --git a/boosting/weightedstats/private/iseuclidean.m b/boosting/weightedstats/private/iseuclidean.m new file mode 100644 index 0000000..f8e8252 --- /dev/null +++ b/boosting/weightedstats/private/iseuclidean.m @@ -0,0 +1,45 @@ +function t = iseuclidean(D) +%ISEUCLIDEAN Is a distance matrix Euclidean? +% T = ISEUCLIDEAN(D) returns a logical indicating whether or not the +% dissimilarity matrix D is a Euclidean distance matrix, i.e., whether +% there exist n points in p-dimensional space (for some p < n) such that +% their Euclidean distances are given by D. D may be specified as either +% a full (square, symmetric) dissimilarity matrix, or as the lower +% triangle (e.g., output by PDIST). +% +% This algorithm is essentially classical multidimensional scaling. +% +% See also CMDSCALE, PDIST, LINKAGE. +% +% References: +% [1] Seber, G.A.F., Multivariate Observations, Wiley, 1984 + +% Copyright 1993-2002 The MathWorks, Inc. +% $Revision: 1.4 $ $Date: 2002/02/04 19:25:44 $ + +[n m] = size(D); +del = 10*eps; + +% lower triangle form for D +if n == 1 + % make sure it's a valid dissimilarity matrix + n = (1+sqrt(1+8*m))/2; + if n == fix(n) & all(D >= 0) + D = squareform(D); + else + warning('Not a valid dissimilarity or distance matrix.') + t = logical(0); + return + end + +% full matrix form, make sure it's valid dissimilarity matrix +elseif n ~= m | any(any(D < 0 | abs(D - D') > del*max(max(D)))) | any(diag(D) > del) + warning('Not a valid dissimilarity or distance matrix.') + t = logical(0); + return +end + +P = eye(n) - repmat(1/n,n,n); +B = P * (-.5 * D .* D) * P; +g = eig((B+B')./2); % guard against spurious complex e-vals from roundoff +t = all(-eps^(3/4)*max(abs(g)) <= g); % all non-negative eigenvals (within roundoff)? diff --git a/boosting/weightedstats/private/isinaxes.m b/boosting/weightedstats/private/isinaxes.m new file mode 100644 index 0000000..661b587 --- /dev/null +++ b/boosting/weightedstats/private/isinaxes.m @@ -0,0 +1,19 @@ +function r = isinaxes(point, axes) +%ISINAXES determines whether or not a point is in an axes. +% ISINAXES returns true if POINT is in AXES and false +% if it is not. POINT is a CurrentPoint. This utility routine +% works only checks X and Y coordinates. + +% Copyright 2002-2004 The MathWorks, Inc. +% $Revision: 1.1.4.2 $ $Date: 2004/04/16 22:22:03 $ + +xr = get(axes,'Xlim'); +yr = get(axes,'Ylim'); +cx = point(1,1); +cy = point(1,2); +if cx >= xr(1) && cx <= xr(2) && cy >= yr(1) && cy <= yr(2) + r = true; +else + r = false; +end; + diff --git a/boosting/weightedstats/private/linkagemex.c b/boosting/weightedstats/private/linkagemex.c new file mode 100644 index 0000000..3ed6b5e --- /dev/null +++ b/boosting/weightedstats/private/linkagemex.c @@ -0,0 +1,566 @@ +#include "mex.h" +#include +#include + +/* linkage.c - Create hierarchical cluster tree + + This is a MEX-file for MATLAB. + Copyright 2003-2004 The MathWorks, Inc. + + $Revision: 1.1.6.2 $ + + This function may give the wrong answer if compiled with MSVC 6.0 or + less in Windows (see line 177) */ + +#define ISNAN(a) (a != a) +/* #define ISNAN(a) (mxIsNaN(a)) */ + +void mexFunction( + int nlhs, /* Number of left hand side (output) arguments */ + mxArray *plhs[], /* Array of left hand side (output) arguments */ + int nrhs, /* Number of right hand side (input) arguments */ + const mxArray *prhs[] /* Array of right hand side (input) arguments */ +) +{ +static double inf; +int n,m,m2,m2m3,m2m1,bn,bc,bp,p1,p2,q,q1,q2,h,i,j,k,l,g; +int nk,nl,ng,nkpnl,sT,N; +int *obp,*scl,*K,*L; +double *y,*yi,*s,*b1,*b2,*T; +double t1,t2,t3,rnk,rnl; +char method[3]; +enum method_types + {single,complete,average,weighted,centroid,median,ward} + method_key; +int uses_scl = false, no_squared_input = true; + +/* check for proper number of arguments */ +if(nrhs!=2) + mexErrMsgTxt("Two inputs required."); +if(nlhs>1) + mexErrMsgTxt("Too many output arguments."); + +/* check input type */ +if (!mxIsDouble(prhs[0])) + mexErrMsgTxt("Function linkagemex is only defined for values of class 'double'."); +if (mxIsSparse(prhs[0])) + mexErrMsgTxt("Function linkagemex is not defined for 'sparse' matrices."); + +/* create a pointer to the input vector y */ +yi = mxGetPr(prhs[0]); + +/* get the dimensions of input vector y (pairwise distances) */ +n = mxGetN(prhs[0]); + +/* calculate some constants */ +m2 = (int) (1+sqrt(1+8*n)); /* 2*m */ +m2m3 = m2 - 3; /* 2*m - 3 */ +m2m1 = m2 - 1; /* 2*m - 1 */ +m = m2 >> 1; /* number of observations --> m */ +bn = m-1; /* number of branches --> bn */ +inf = mxGetInf(); /* inf */ + +/* get the method */ +mxGetString(prhs[1],method,3); +if ( strcmp(method,"si") == 0 ) method_key = single; +else if ( strcmp(method,"co") == 0 ) method_key = complete; +else if ( strcmp(method,"av") == 0 ) method_key = average; +else if ( strcmp(method,"we") == 0 ) method_key = weighted; +else { + no_squared_input = false; + if ( strcmp(method,"ce") == 0 ) method_key = centroid; + else if ( strcmp(method,"me") == 0 ) method_key = median; + else if ( strcmp(method,"wa") == 0 ) method_key = ward; + else mexErrMsgTxt("Unknown linkage method."); +} + +/* allocate space for the output matrix */ +plhs[0] = mxCreateDoubleMatrix(bn, 3, mxREAL); + +/* create pointers to the output matrix */ +b1 = mxGetPr(plhs[0]); /*leftmost column */ +b2 = b1 + bn; /*center column */ +s = b2 + bn; /*rightmost column */ + +/* find the best value for N (size of the temporal vector of */ +/* minimuns) depending on the problem size */ +if (m>1023) N = 512; +else if (m>511) N = 256; +else if (m>255) N = 128; +else if (m>127) N = 64; +else if (m>63) N = 32; +else N = 16; +if (method_key == single) N = N >> 2; + +/* set space for the vector of minimuns (and indexes) */ +T = mxMalloc(N * sizeof(double)); +K = mxMalloc(N * sizeof(int)); +L = mxMalloc(N * sizeof(int)); + +/* set space to copy the input */ +y = mxMalloc(n * sizeof(double)); +/* copy input and compute Y^2 or 0.5*Y^2 if necessary */ +if (no_squared_input) memcpy(y,yi,n * sizeof(double)); +else if (method_key == ward) + for (i=0; i bc = [ 0:bn-1] +bp is a "branch pointer" --> bp = [ m:m+bc-1 ], it is used to point + branches in the output since the values [0:m-1]+1 are reserved for + leaves. +*/ + + /* /* /* /* /* /* /* /* /* /* /* /* /* /* /* /* /* /* /* /* /* /* /* /* + find the "k","l" indices of the minimum distance "t1" in the remaining + half matrix, the new computed distances to the new cluster will be placed + in the row/col "l", then the leftmost column in the matrix of pairwise + distances will be moved to the row/col "k", so the whole matrix of + distances is smaller at every step */ + + /* OLD METHOD: search for the minimun in the whole "y" at every branch + iteration + t1 = inf; + p1 = ((m2m1 - bc) * bc) >> 1; /*finds where the remaining matrix starts + for (j=bc; j> 1; /* finds where the matrix starts */ + for (j=bc; j0)); h--) { + T[h]=T[h-1]; + K[h]=K[h-1]; + L[h]=L[h-1]; + } /* for (h=N-1 ... */ + T[h] = t2; + K[h] = j; + L[h] = i; + sT++; + } /* if (t2N) sT=N; + } /* if (sT<1) */ + + /* if sT==0 but bcL[h]) { + K[h] = L[h]; + L[h] = k; + } /* if (k> ...*/ + else K[h] = k; + } /* if (bc== ... */ + h++; + } /* if k!= ... */ + } /* for (h=0 ... */ + sT=h; /* the new size of "T" after the shifting */ + + /* Update output for this branch, puts smaller pointers always in the + leftmost column */ + if (obp[k]> 1) + k - bc - 1; + p2 = p1 - k + l; + + if (uses_scl) { + /* Get the cluster cardinalities */ + nk = scl[k]; + nl = scl[l]; + nkpnl = nk + nl; + + /* Updates cluster cardinality "scl" */ + scl[k] = scl[bc]; /* letfmost column cluster cardinality */ + scl[l] = nkpnl; /* new cluster cardinality */ + + } /* if (uses_scl) */ + + /* some other values that we want to compute outside the loops */ + switch (method_key) { + case centroid: + t1 = t1 * (nk * nl) / (nkpnl * nkpnl); + case average: + /* Computes weighting ratios */ + rnk = (double) nk / nkpnl; + rnl = (double) nl / nkpnl; + break; + case median: + t1 = t1/4; + } /* switch (method_key) */ + + switch (method_key) { + case average: + for (q=bn-bc-1; q>q1; q--) { + t2 = y[p1] * rnk + y[p2] * rnl; + if (t2 < t3) t3 = t2 ; + y[p2] = t2; + p1 = p1 + q; + p2 = p2 + q; + } + p1++; + p2 = p2 + q; + for (q=q1-1; q>q2; q--) { + t2 = y[p1] * rnk + y[p2] * rnl; + if (t2 < t3) t3 = t2 ; + y[p2] = t2; + p1++; + p2 = p2 + q; + } + p1++; + p2++; + for (q=q2+1; q>0; q--) { + t2 = y[p1] * rnk + y[p2] * rnl; + if (t2 < t3) t3 = t2 ; + y[p2] = t2; + p1++; + p2++; + } + break; /* case average */ + + case single: + for (q=bn-bc-1; q>q1; q--) { + if (y[p1] < y[p2]) y[p2] = y[p1]; + else if (ISNAN(y[p2])) y[p2] = y[p1]; + if (y[p2] < t3) t3 = y[p2]; + p1 = p1 + q; + p2 = p2 + q; + } + p1++; + p2 = p2 + q; + for (q=q1-1; q>q2; q--) { + if (y[p1] < y[p2]) y[p2] = y[p1]; + else if (ISNAN(y[p2])) y[p2] = y[p1]; + if (y[p2] < t3) t3 = y[p2]; + p1++; + p2 = p2 + q; + } + p1++; + p2++; + for (q=q2+1; q>0; q--) { + if (y[p1] < y[p2]) y[p2] = y[p1]; + else if (ISNAN(y[p2])) y[p2] = y[p1]; + if (y[p2] < t3) t3 = y[p2]; + p1++; + p2++; + } + break; /* case simple */ + + case complete: + for (q=bn-bc-1; q>q1; q--) { + if (y[p1] > y[p2]) y[p2] = y[p1]; + else if (ISNAN(y[p2])) y[p2] = y[p1]; + if (y[p2] < t3) t3 = y[p2]; + p1 = p1 + q; + p2 = p2 + q; + } + p1++; + p2 = p2 + q; + for (q=q1-1; q>q2; q--) { + if (y[p1] > y[p2]) y[p2] = y[p1]; + else if (ISNAN(y[p2])) y[p2] = y[p1]; + if (y[p2] < t3) t3 = y[p2]; + p1++; + p2 = p2 + q; + } + p1++; + p2++; + for (q=q2+1; q>0; q--) { + if (y[p1] > y[p2]) y[p2] = y[p1]; + else if (ISNAN(y[p2])) y[p2] = y[p1]; + if (y[p2] < t3) t3 = y[p2]; + p1++; + p2++; + } + break; /* case complete */ + + case weighted: + for (q=bn-bc-1; q>q1; q--) { + t2 = (y[p1] + y[p2])/2; + if (t2q2; q--) { + t2 = (y[p1] + y[p2])/2; + if (t20; q--) { + t2 = (y[p1] + y[p2])/2; + if (t2q1; q--) { + t2 = y[p1] * rnk + y[p2] * rnl - t1; + if (t2q2; q--) { + t2 = y[p1] * rnk + y[p2] * rnl - t1; + if (t20; q--) { + t2 = y[p1] * rnk + y[p2] * rnl - t1; + if (t2q1; q--) { + t2 = (y[p1] + y[p2])/2 - t1; + if (t2q2; q--) { + t2 = (y[p1] + y[p2])/2 - t1; + if (t20; q--) { + t2 = (y[p1] + y[p2])/2 - t1; + if (t2q1; q--) { + ng = scl[g++]; + t2 = (y[p1]*(nk+ng) + y[p2]*(nl+ng) - t1*ng) / (nkpnl+ng); + if (t2q2; q--) { + ng = scl[g++]; + t2 = (y[p1]*(nk+ng) + y[p2]*(nl+ng) - t1*ng) / (nkpnl+ng); + if (t20; q--) { + ng = scl[g++]; + t2 = (y[p1]*(nk+ng) + y[p2]*(nl+ng) - t1*ng) / (nkpnl+ng); + if (t2> 1) + k - 1; + p2 = p1 - k + bc + 1; + + for (q=bn-bc-1; q>q1; q--) { + p1 = p1 + q; + y[p1] = y[p2++]; + } + p1 = p1 + q + 1; + p2++; + for ( ; q>0; q--) { + y[p1++] = y[p2++]; + } + } /*if (k!=bc) */ +} /*for (bc=0,bp=m;bc X(J) => YHAT(I) >= YHAT(J), i.e., the values in YHAT are +% monotonically non-decreasing with respect to X (sometimes referred +% to as "weak monotonicity"). LSQISOTONIC uses the "pool adjacent +% violators" algorithm. +% +% If X(I) == X(J), then YHAT(I) may be <, ==, or > YHAT(J) (sometimes +% referred to as the "primary approach"). If ties do occur in X, a plot +% of YHAT vs. X may appear to be non-monotonic at those points. In fact, +% the above monotonicity constraint is not violated, and a reordering +% within each group of ties, by ascending YHAT, will produce the desired +% appearance in the plot. +% +% YHAT = LSQISOTONIC(X,Y,W) performs weighted isotonic regression using +% the non-negative weights in W. + +% Copyright 2003-2004 The MathWorks, Inc. +% $Revision: 1.1.6.3 $ $Date: 2004/02/01 22:10:40 $ + +% References: +% [1] Kruskal, J.B. (1964) "Nonmetric multidimensional scaling: a +% numerical method", Psychometrika 29:115-129. +% [2] Cox, R.F. and Cox, M.A.A. (1994) Multidimensional Scaling, +% Chapman&Hall. + +n = numel(x); + +% Sort points ascending in x, break ties with y. +[xyord,ord] = sortrows([x(:) y(:)]); iord(ord) = 1:n; + +% Initialize fitted values to the given values. +yhat = xyord(:,2); + +block = 1:n; +if (nargin == 3) && ~isempty(w) + w = w(:); w = w(ord); % reorder w as a column + + % Merge zero-weight points with preceeding pos-weighted point (or + % with the following pos-weighted point if at start). + posWgts = (w > 0); + if any(~posWgts) + idx = cumsum(posWgts); idx(idx == 0) = 1; + w = w(posWgts); + yhat = yhat(posWgts); + block = idx(block); + end + +else + w = ones(size(yhat),class(yhat)); +end + +while true + % If all blocks are monotonic, then we're done. + diffs = diff(yhat); + if all(diffs >= 0), break; end + + % Otherwise, merge blocks of non-increasing fitted values, and set the + % fitted value within each block equal to a constant, the weighted mean + % of values in that block. + idx = cumsum([1; (diffs>0)]); + sumyhat = accumarray(idx,w.*yhat); + w = accumarray(idx,w); + yhat = sumyhat ./ w; + block = idx(block); +end + +% Broadcast merged blocks out to original points, and put back in the +% original order and shape. +yhat = yhat(block); +yhat = reshape(yhat(iord), size(y)); diff --git a/boosting/weightedstats/private/mgrp2idx.m b/boosting/weightedstats/private/mgrp2idx.m new file mode 100644 index 0000000..63e8ded --- /dev/null +++ b/boosting/weightedstats/private/mgrp2idx.m @@ -0,0 +1,64 @@ +function [ogroup,glabel,gname,multigroup] = mgrp2idx(group,rows,sep); +%MGRP2IDX Convert multiple grouping variables to index vector +% [OGROUP,GLABEL,GNAME,MULTIGROUP] = MGRP2IDX(GROUP,ROWS) takes +% the inputs GROUP, ROWS, and SEP. GROUP is a grouping variable (numeric +% vector, string matrix, or cell array of strings) or a cell array +% of grouping variables. ROWS is the number of observations. +% SEP is a separator for the grouping variable values. +% +% The output OGROUP is a vector of group indices. GLABEL is a cell +% array of group labels, each label consisting of the values of the +% various grouping variables separated by the characters in SEP. +% GNAME is a cell array containing one column per grouping variable +% and one row for each distinct combination of grouping variable +% values. MULTIGROUP is 1 if there are multiple grouping variables +% or 0 if there are not. + +% Tom Lane, 12-17-99 +% Copyright 1993-2002 The MathWorks, Inc. +% $Revision: 1.4 $ $Date: 2002/02/04 19:25:44 $ + +multigroup = (iscell(group) & size(group,1)==1); +if (~multigroup) + [ogroup,gname] = grp2idx(group); + glabel = gname; +else + % Group according to each distinct combination of grouping variables + ngrps = size(group,2); + grpmat = zeros(rows,ngrps); + namemat = cell(1,ngrps); + + % Get integer codes and names for each grouping variable + for j=1:ngrps + [g,gn] = grp2idx(group{1,j}); + grpmat(:,j) = g; + namemat{1,j} = gn; + end + + % Find all unique combinations + [urows,ui,uj] = unique(grpmat,'rows'); + + % Create a cell array, one col for each grouping variable value + % and one row for each observation + ogroup = uj; + gname = cell(size(urows)); + for j=1:ngrps + gn = namemat{1,j}; + gname(:,j) = gn(urows(:,j)); + end + + % Create another cell array of multi-line texts to use as labels + glabel = cell(size(gname,1),1); + if (nargin > 2) + nl = sprintf(sep); + else + nl = sprintf('\n'); + end + fmt = sprintf('%%s%s',nl); + lnl = length(fmt)-3; % one less than the length of nl + for j=1:length(glabel) + gn = sprintf(fmt, gname{j,:}); + gn(end-lnl:end) = []; + glabel{j,1} = gn; + end +end diff --git a/boosting/weightedstats/private/mlecustom.m b/boosting/weightedstats/private/mlecustom.m new file mode 100644 index 0000000..667ac04 --- /dev/null +++ b/boosting/weightedstats/private/mlecustom.m @@ -0,0 +1,527 @@ +function [phat, pci] = mlecustom(data,varargin) +%MLE Maximum likelihood estimation for custom univariate distributions. +% +% See help for MLE. + +% Copyright 1993-2004 The MathWorks, Inc. +% $Revision: 1.1.6.7 $ $Date: 2004/04/04 03:42:19 $ + +% Process any optional input arguments. +pnames = {'pdf' 'cdf' 'logpdf' 'logsf' 'nloglf' 'start' 'censoring' 'frequency' ... + 'alpha' 'options' 'lowerbound' 'upperbound' 'optimfun' 'optimoptions'}; +defaults = {[] [] [] [] [] [] zeros(size(data)) ones(size(data)) ... + 0.05 [] [] [] 'fminsearch' []}; +[eid,errmsg,pdf,cdf,logpdf,logsf,nloglf,start,cens,freq,alpha,userOpts,... + lb,ub,optimFun,optimOpts] = statgetargs(pnames, defaults, varargin{:}); +if ~isempty(eid) + error(sprintf('stats:mle:%s',eid),errmsg); +end + +userOpts = statset(statset('mlecustom'), userOpts); +haveGrad = strcmp(userOpts.GradObj, 'on'); +checkFunVals = strcmp(userOpts.FunValCheck, 'on'); +delta = userOpts.DerivStep; + +% Determine how the distribution is specified, and get handles to the +% specified functions. +if ~isempty(nloglf) + if isa(nloglf,'function_handle') + nloglfFun = nloglf; + nloglfAddArgs = {}; + elseif iscell(nloglf) && isa(nloglf{1},'function_handle') + nloglfFun = nloglf{1}; + nloglfAddArgs = nloglf(2:end); + else + error('stats:mle:InvalidNloglf','The ''nloglf'' parameter value must be a function handle or a cell array\ncontaining a function handle.'); + end + funType = 'nloglf'; + % Assume we'll use llf_nloglf. Will switch to using llf_nloglf_diff if + % optimFun is 'fmincon', haveGrad is false, and optimOpts.GradObj is 'on'. + llf = @llf_nloglf; + + funArgs = {nloglfFun nloglfAddArgs{:}}; + +elseif ~isempty(logpdf) + if isa(logpdf,'function_handle') + logpdfFun = logpdf; + logpdfAddArgs = {}; + elseif iscell(logpdf) && isa(logpdf{1},'function_handle') + logpdfFun = logpdf{1}; + logpdfAddArgs = logpdf(2:end); + else + error('stats:mle:InvalidLogpdf','The ''logpdf'' parameter value must be a function handle or a cell array\ncontaining a function handle.'); + end + if ~isempty(logsf) + if isa(logsf,'function_handle') + logsfFun = logsf; + logsfAddArgs = {}; + elseif iscell(logsf) && isa(logsf{1},'function_handle') + logsfFun = logsf{1}; + logsfAddArgs = logsf(2:end); + else + error('stats:mle:InvalidLogsf','The ''logsf'' parameter value must be a function handle or a cell array\ncontaining a function handle.'); + end + funType = 'logpdflogsf'; + elseif isempty(logsf) && sum(cens) == 0 + logsfFun = []; + logsfAddArgs = {}; + funType = 'logpdf'; + else + error('stats:mle:LogSFRequired','You must provide a log SF along with the log PDF if the data include censoring.'); + end + llf = @llf_logpdflogsf; + haveGrad = false; + + fun1Args = {logpdfFun logpdfAddArgs{:}}; + fun2Args = {logsfFun logsfAddArgs{:}}; + +elseif ~isempty(pdf) + if isa(pdf,'function_handle') + pdfFun = pdf; + pdfAddArgs = {}; + elseif iscell(pdf) && isa(pdf{1},'function_handle') + pdfFun = pdf{1}; + pdfAddArgs = pdf(2:end); + else + error('stats:mle:InvalidPdf','The ''pdf'' parameter value must be a function handle or a cell array\ncontaining a function handle.'); + end + if ~isempty(cdf) + if isa(cdf,'function_handle') + cdfFun = cdf; + cdfAddArgs = {}; + elseif iscell(cdf) && isa(cdf{1},'function_handle') + cdfFun = cdf{1}; + cdfAddArgs = cdf(2:end); + else + error('stats:mle:InvalidCdf','The ''cdf'' parameter value must be a function handle or a cell array\ncontaining a function handle.'); + end + funType = 'pdfcdf'; + elseif isempty(cdf) && sum(cens) == 0 + cdfFun = []; + cdfAddArgs = {}; + funType = 'pdf'; + else + error('stats:mle:CDFRequired','You must provide a CDF with the PDF if the data include censoring.'); + end + llf = @llf_pdfcdf; + haveGrad = false; + + fun1Args = {pdfFun pdfAddArgs{:}}; + fun2Args = {cdfFun cdfAddArgs{:}}; + +else + error('stats:mle:DistFunsRequired','You must provide function handles as values for either the ''pdf'' and ''cdf'' parameters, or the ''logpdf''\nand ''logsf'' parameters, or the ''nloglf'' parameter.'); +end + +% Determine the size of the parameter vector from the (required) specified +% initial parameter values. +if ~isempty(start) + nparams = numel(start); +else + error('stats:mle:StartRequired','You must supply the ''start'' parameter value.'); +end + +% Make sure specified parameter bounds have correct sizes. The defaults +% are set here because they depend on the size of the params. +if isempty(lb) + lb = repmat(-Inf, size(start)); +elseif ~isequal(size(start), size(lb)) + error('stats:mle:BndsSizeMismatch','The values of the ''lowerbound'' and ''start'' parameters must be the same size.'); +end +if isempty(ub) + ub = repmat(Inf, size(start)); +elseif ~isequal(size(start), size(ub)) + error('stats:mle:BndsSizeMismatch','The values of the ''upperbound'' and ''start'' parameters must be the same size.'); +end +if ~all((lb <= start) & (start <= ub)) + error('stats:mle:StartOutOfRange','The value of the ''start'' parameter must satisfy the upper and lower parameter bounds.'); +end + +% Now that start is valid, check once for errors thrown by the +% user-supplied functions. +if isequal(funType, 'nloglf') + checkFunErrs('nloglf',nloglfFun,start,data,cens,freq,nloglfAddArgs,haveGrad); + +else + % For the PDF/CDF or logPDF/logSF forms, divide the data up into censored + % and uncensored observations to save work later. + c = (cens == 0); + uncensData = data(c); + uncensFreq = freq(c); + censData = data(~c); + censFreq = freq(~c); + + switch funType + case {'logpdflogsf' 'logpdf'} + checkFunErrs('logpdf',logpdfFun,start,uncensData,[],[],logpdfAddArgs); + checkFunErrs('logsf',logsfFun,start,censData,[],[],logsfAddArgs); + otherwise + checkFunErrs('pdf',pdfFun,start,uncensData,[],[],pdfAddArgs); + checkFunErrs('cdf',cdfFun,start,censData,[],[],cdfAddArgs); + end +end + +% Use fminsearch (the default), with any specified options. +if isequal(optimFun, 'fminsearch') + opts = optimset(userOpts, 'GradObj','off', 'FunValCheck','off'); + if ~isempty(optimOpts), opts = optimset(opts, optimOpts); end + switch funType + case 'nloglf' + % Call llf_nloglf. fminsearch will never ask for a gradient. + [phat,nll,err,output] = ... + fminsearch(llf,start,opts,data,cens,freq,funArgs,checkFunVals,lb,ub); + otherwise + % Call either llf_pdfcdf or llf_logpdflogsf. Neither can return a + % gradient, and fminsearch will never ask for one. + [phat,nll,err,output] = ... + fminsearch(llf,start,opts,uncensData,censData,uncensFreq,censFreq,fun1Args,fun2Args,checkFunVals,lb,ub); + end + +% If requested, use fmincon, with any specified options. +elseif isequal(optimFun, 'fmincon') + opts = optimset(userOpts,'LargeScale','on', 'GradObj','on', 'PrecondBandWidth',Inf, 'FunValCheck','off'); + if ~isempty(optimOpts), opts = optimset(opts, optimOpts); end + + % Use TolBnd to approximate open bounds. + lb = lb + userOpts.TolBnd; + ub = ub - userOpts.TolBnd; + + switch funType + case 'nloglf' + % If nloglf can return a gradient, or if fmincon does not expect + % one, call llf_nloglf. + if haveGrad || strcmp(opts.GradObj, 'off') + [phat,nll,err,output] = ... + fmincon(llf,start,[],[],[],[],lb,ub,[],opts,data,cens,freq,funArgs,checkFunVals); + + % If nloglf cannot return a gradient, and fmincon expects one, call + % llf_nloglf_diff to compute a finite difference approximation. + % This is the default (large scale) situation for nloglf/fmincon. + else + llf = @llf_nloglf_diff; + [phat,nll,err,output] = ... + fmincon(llf,start,[],[],[],[],lb,ub,[],opts,data,cens,freq,funArgs,checkFunVals,delta); + end + + otherwise + if strcmp(opts.GradObj, 'on') + % Call a wrapper that will in turn call either llf_pdfcdf or + % llf_logpdflogsf. Neither can return a gradient, so, the wrapper + % will have do it numerically. This is the default (large scale) + % situation for pdf/cdf/fmincon and logpdf/logsf/fmincon. + [phat,nll,err,output] = ... + fmincon(@llf_diff,start,[],[],[],[],lb,ub,[],opts,uncensData,censData,uncensFreq,censFreq,llf,fun1Args,fun2Args,checkFunVals,delta); + else + % Call either llf_pdfcdf or llf_logpdflogsf directly. Neither can + % return a gradient, but because opts.GradObj is 'off', fmincon + % will never ask for one. + [phat,nll,err,output] = ... + fmincon(llf,start,[],[],[],[],lb,ub,[],opts,uncensData,censData,uncensFreq,censFreq,fun1Args,fun2Args,checkFunVals); + end + end + +else + error('stats:mle:IllegalOptimFunValue','The value of the ''optimfun'' parameter must be ''fminsearch'' or ''fmincon''.'); +end + +if (err == 0) + % the optimizer may print its own output text; in any case give something + % more statistical here, controllable via warning IDs. + if output.funcCount >= opts.MaxFunEvals + wmsg = 'Maximum likelihood estimation did not converge. Function evaluation limit exceeded.'; + else + wmsg = 'Maximum likelihood estimation did not converge. Iteration limit exceeded.'; + end + warning('stats:mle:IterOrEvalLimit',wmsg); +elseif err < 0 + error('stats:mle:MLEsNotFound','Unable to reach a maximum likelihood solution.'); +end + +% Compute the hessian with respect to the parameters, and invert it to get +% an estimate of the asymptotic covariance matrix of the param estimates. +if nargout > 1 + % The default FD step size is different for mlecov than for mlecustom, + % because the former is computing a hessian, not a gradient. We won't + % tell mlecov what to use. + userOpts.DerivStep = []; + switch funType + case 'nloglf' + acov = mlecov(phat, data, 'nloglf',nloglf, 'cens',cens, 'freq',freq, 'options',userOpts); + case {'logpdflogsf' 'logpdf'} + acov = mlecov(phat, data, 'logpdf',logpdf, 'logsf',logsf, 'cens',cens, 'freq',freq, 'options',userOpts); + case {'pdfcdf' 'pdf'} + acov = mlecov(phat, data, 'pdf',pdf, 'cdf',cdf, 'cens',cens, 'freq',freq, 'options',userOpts); + end + + % Compute CIs using a normal approximation for phat. + probs = [alpha/2; 1-alpha/2]; + se = sqrt(diag(acov))'; + pci = norminv(repmat(probs,1,numel(phat)), [phat; phat], [se; se]); +end + + +%========================================================================== + +function [nll,ngrad] = llf_nloglf(params, data, cens, freq, nloglfArgs, checkVals, lb, ub) +% Given a function handle to a negative log LF, evaluate the negative +% log-likelihood for PARAMS given DATA. + +nloglfFun = nloglfArgs{1}; +nloglfAddArgs = nloglfArgs(2:end); + +% Bounds checking needed when function called by fminsearch. When called +% by fmincon, the last two args will not be there. +if nargin > 6 + if any(params<=lb | ub<=params) + nll = Inf; + return + end +end + +if nargout == 1 + nll = feval(nloglfFun, params, data, cens, freq, nloglfAddArgs{:}); +else + [nll,ngrad] = feval(nloglfFun, params, data, cens, freq, nloglfAddArgs{:}); +end + +% Make sure returned llf values are valid. +if checkVals + if ~isfinite(nll) + error('stats:mle:NonfiniteNloglfVal','The NLOGLF function returned a NaN or infinite log-likelihood value.'); + end + if nargout == 2 + if any(~isfinite(ngrad)) + error('stats:mle:NonfiniteNloglfGrad','The NLOGLF function returned NaN or infinite gradient values.'); + end + end +end + + +%========================================================================== + +function [nll,ngrad] = llf_nloglf_diff(params, data, cens, freq, nloglfArgs, checkVals, delta) +% Given a function handle to a negative log LF, evaluate the negative +% log-likelihood for PARAMS given DATA, and approximate its gradient using +% central differences. + +nloglfFun = nloglfArgs{1}; +nloglfAddArgs = nloglfArgs(2:end); + +% No need to bounds check, this function only ever called by fmincon. + +% Evaluate the log-likelihood itself, using the specified nlogLF. +nll = feval(nloglfFun, params, data, cens, freq, nloglfAddArgs{:}); + +% Approximate the gradient with central differences. +if nargout > 1 + deltaparams = delta*max(abs(params), 1); % limit smallest absolute step + nparams = length(params); + + e = zeros(1,nparams); + ngrad = zeros(size(params)); + for j=1:nparams + e(j) = deltaparams(j); + ngrad(j) = feval(nloglfFun, params+e, data, cens, freq, nloglfAddArgs{:}) ... + - feval(nloglfFun, params-e, data, cens, freq, nloglfAddArgs{:}); + e(j) = 0; + end + + % Normalize by increment to get derivative estimates. + ngrad = ngrad ./ (2 * deltaparams); +end + +% Make sure returned llf values are valid. +if checkVals + if ~isfinite(nll) + error('stats:mle:NonfiniteNloglfVal','The NLOGLF function returned a NaN or infinite log-likelihood value.'); + end + if nargout == 2 + if any(~isfinite(ngrad)) + error('stats:mle:NonfiniteNloglfVal','The NLOGLF function returned a NaN or infinite log-likelihood value.'); + end + end +end + + +%========================================================================== + +function nll = llf_logpdflogsf(params, uncensData, censData, uncensFreq, censFreq, logpdfArgs, logsfArgs, checkVals, lb, ub) +% Given function handles to a log PDF and a log SF, evaluate the negative +% log-likelihood for PARAMS given DATA. + +logpdfFun = logpdfArgs{1}; +logpdfAddArgs = logpdfArgs(2:end); +logsfFun = logsfArgs{1}; +logsfAddArgs = logsfArgs(2:end); + +% Bounds checking needed when function called by fminsearch. When called +% by fmincon, the last two args will not be there. +if nargin > 8 + if any(params<=lb | ub<=params) + nll = Inf; + return + end +end + +% Log-likelihood = logPDF(uncensored values) + logSF(censored values) +% +% First, evaluate the specified logPDF of the uncensored data. +paramsCell = num2cell(params); +logpdfVals = feval(logpdfFun, uncensData, paramsCell{:}, logpdfAddArgs{:}); + +% Make sure returned logpdf values are valid. +if checkVals + if any(~isfinite(logpdfVals)) + error('stats:mle:NonfiniteLogpdfVal','The LOGPDF function returned NaN or infinite values.'); + end +end + +% Compute negative log-likelihood from uncensored values, using +% frequencies. +nll = -sum(uncensFreq.*logpdfVals); + +% If there is censoring, evaluate the specified logSF of the censored data. +if ~isempty(censData) + logsfVals = feval(logsfFun, censData, paramsCell{:}, logsfAddArgs{:}); + + % Make sure returned logsf values are valid. + if checkVals + if any(~isfinite(logsfVals)) + error('stats:mle:NonfiniteLogsfVal','The LOGSF function returned NaN or infinite values.'); + elseif any(logsfVals > 0) + error('stats:mle:PositiveLogsfVal','The LOGSF function returned positive values.'); + end + end + + % Update negative log-likelihood with censored values, using + % frequencies. + nll = nll - sum(censFreq.*logsfVals); +end + + +%========================================================================== + +function nll = llf_pdfcdf(params, uncensData, censData, uncensFreq, censFreq, pdfArgs, cdfArgs, checkVals, lb, ub) +% Given function handles to a PDF and a CDF, evaluate the negative +% log-likelihood for PARAMS given DATA. + +pdfFun = pdfArgs{1}; +pdfAddArgs = pdfArgs(2:end); +cdfFun = cdfArgs{1}; +cdfAddArgs = cdfArgs(2:end); + +% Bounds checking needed when function called by fminsearch. When called +% by fmincon, the last two args will not be there. +if nargin > 8 + if any(params<=lb | ub<=params) + nll = Inf; + return + end +end + +% Log-likelihood = log(PDF(uncensored values) + log(1-CDF(censored values)) +% +% First, evaluate the specified PDF of the uncensored data. +paramsCell = num2cell(params); +pdfVals = feval(pdfFun, uncensData, paramsCell{:}, pdfAddArgs{:}); + +% Make sure returned pdf values are valid. +if checkVals + if any(~isfinite(pdfVals)) + error('stats:mle:NonfinitePdfVal','The PDF function returned NaN or infinite values.'); + elseif any(pdfVals <= 0) + error('stats:mle:NonpositivePdfVal','The PDF function returned negative or zero values.'); + end +end + +% Compute negative log-likelihood from uncensored values, using +% frequencies. +nll = -sum(uncensFreq.*log(pdfVals)); + +% If there is censoring, evaluate the specified CDF of the censored data. +if ~isempty(censData) + cdfVals = feval(cdfFun, censData, paramsCell{:}, cdfAddArgs{:}); + + % Make sure returned cdf values are valid. + if checkVals + if any(~isfinite(cdfVals)) + error('stats:mle:NonfiniteCdfVal','The CDF function returned NaN or infinite values.'); + elseif any(cdfVals < 0) + error('stats:mle:NegativeCdfVal','The CDF function returned negative values.'); + elseif any(cdfVals >= 1) + error('stats:mle:GTOneCdfVal','The CDF function returned values greater than or equal to 1.'); + end + end + + % Update negative log-likelihood with censored values, using + % frequencies. + nll = nll - sum(censFreq.*log(1-cdfVals)); +end + + +%========================================================================== + +function [nll,ngrad] = llf_diff(params, uncensData, censData, uncensFreq, censFreq, llf, fun1Args, fun2Args, checkVals, delta) +% A wrapper around llf_logpdflogsf or llf_pdfcdf to evaluate the (negative) +% log-likelihood for PARAMS given DATA, and approximate its gradient using +% central differences. LLF is a function handle to either llf_logpdflogsf +% or llf_pdfcdf, that in turn call either a PDF and CDF, or a logPDF and +% logSF, respectively. + +% No need to bounds check, this function only ever called by fmincon. + +% Evaluate the log-likelihood itself, using either the PDF and CDF, or the +% logPDF and logSF. +nll = feval(llf, params, uncensData, censData, uncensFreq, censFreq, fun1Args, fun2Args, checkVals); + +% Approximate the gradient with central differences. +if nargout >= 2 + deltaparams = delta*max(abs(params), 1); % limit smallest absolute step + nparams = length(params); + + e = zeros(1,nparams); + ngrad = zeros(size(params)); + for j=1:nparams + e(j) = deltaparams(j); + ngrad(j) = feval(llf, params+e, uncensData, censData, uncensFreq, censFreq, fun1Args, fun2Args, checkVals) ... + - feval(llf, params-e, uncensData, censData, uncensFreq, censFreq, fun1Args, fun2Args, checkVals); + e(j) = 0; + end + + % Normalize by increment to get derivative estimates. + ngrad = ngrad ./ (2 * deltaparams); +end + +% No need to check values, that's done in llf in the above feval calls. + + +%========================================================================== + +function checkFunErrs(type,fun,params,data,cens,freq,addArgs,haveGrad) +%CHECKFUNERRS Check for errors in evaluation of user-supplied function + +if isempty(fun), return; end + +try + switch type + case 'nloglf' + if haveGrad + [nll,ngrad] = feval(fun, params, data, cens, freq, addArgs{:}); + else + nll = feval(fun, params, data, cens, freq, addArgs{:}); + end + otherwise + paramsCell = num2cell(params); + vals = feval(fun, data, paramsCell{:}, addArgs{:}); + end +catch + switch type + case 'nloglf', errID = 'stats:mle:NloglfError'; + case 'logpdf', errID = 'stats:mle:LogpdfError'; + case 'logsf', errID = 'stats:mle:LogsfError'; + case 'pdf', errID = 'stats:mle:PdfError'; + case 'cdf', errID = 'stats:mle:CdfError'; + end + error(errID, ['The following error occurred while trying to evaluate\nthe ', ... + 'user-supplied %s function ''%s'':\n\n%s'], type,func2str(fun),lasterr); +end diff --git a/boosting/weightedstats/private/on2off.m b/boosting/weightedstats/private/on2off.m new file mode 100644 index 0000000..bdb2676 --- /dev/null +++ b/boosting/weightedstats/private/on2off.m @@ -0,0 +1,11 @@ +function a = on2off(b) +%ON2OFF Simple helper returns 'on' given 'off' and vice versa + +% $Revision: 1.1.6.2 $ $Date: 2004/01/24 09:36:22 $ +% Copyright 2003-2004 The MathWorks, Inc. + +if isequal(b,'on') + a = 'off'; +else + a = 'on'; +end diff --git a/boosting/weightedstats/private/pdistmex.c b/boosting/weightedstats/private/pdistmex.c new file mode 100644 index 0000000..4fd4c48 --- /dev/null +++ b/boosting/weightedstats/private/pdistmex.c @@ -0,0 +1,812 @@ +/* + * pdistmex.c + * + * Calculates pairwise distances between observations. + * Helper function to pdist.m + * + * This is a MEX-file for MATLAB. + * Copyright 1993-2004 The MathWorks, Inc. + */ + +/* $Revision: 1.1.6.3 $ $Date: 2004/01/24 09:36:23 $ */ + +#include "mex.h" +#include +#include + +/* Euclidean distance */ +void eucdist(double *x, int m, int n, double *d) +{ + /* + d = sqrt(sum((XI-XJ).^2,2)); % Euclidean + */ + int i,j,k; + double theSum,Y; + double *XI, *XJ, *XI0; + + XI = x; + for (i=0; itheMax) { + theMax = Y; + } + } + *(d++) = theMax; + } + } + } + mxFree(theNaN); +} + +/************************************************************/ +/* Euclidean distance */ +void eucdistS(float *x, int m, int n, float *d) +{ + /* + d = sqrt(sum((XI-XJ).^2,2)); % Euclidean + */ + int i,j,k; + float theSum,Y; + float *XI, *XJ, *XI0; + + XI = x; + for (i=0; itheMax) { + theMax = Y; + } + } + *(d++) = theMax; + } + } + } + mxFree(theNaN); +} + +/* the gateway function */ +void mexFunction( int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) +{ + + int status,numCoords,numPoints; + char metric[4]; + double *argD; + + /* check for proper number of arguments */ + /* NOTE: You do not need an else statement when using mexErrMsgTxt + within an if statement, because it will never get to the else + statement if mexErrMsgTxt is executed. (mexErrMsgTxt breaks you out of + the MEX-file) */ + if (nrhs<2) { + mexErrMsgIdAndTxt("stats:pdistmex:TooFewInputs", + "Two input arguments required."); + } else if(nlhs>1) { + mexErrMsgIdAndTxt("stats:pdistmex:TooManyOutputs", + "Too many output arguments."); + } + + /* get the metric */ + status = mxGetString(prhs[1],metric,4); + + /* Check the type of the input array */ + /* Currently only works with double or single(float) */ + if (mxIsDouble(prhs[0])) { + double *x,*d,*arg,scalarArg; + /* create a pointer to the input matrix y */ + x = mxGetPr(prhs[0]); + + /* get the dimensions of the matrix input y */ + numCoords = mxGetM(prhs[0]); + numPoints = mxGetN(prhs[0]); + /* get extra arg */ + if (nrhs>2 && !mxIsEmpty(prhs[2])) { + if (mxGetNumberOfElements(prhs[2]) == 1) { /*scalar case */ + scalarArg = mxGetScalar(prhs[2]); + } else if (mxIsDouble(prhs[2])) { + arg = mxGetPr(prhs[2]); + } else { + mexErrMsgIdAndTxt("stats:pdistmex:MixedInputTypes", + "Additional input arguments must be the same class as X."); + } + } + /* set the output pointer to the output matrix */ + plhs[0] = mxCreateDoubleMatrix(1,(numPoints * (numPoints-1))/2, mxREAL); + + /* create a pointer to a copy of the output matrix */ + d = mxGetPr(plhs[0]); + + /* call the appropriate distance subroutine */ + if (strcmp(metric,"euc") == 0) + eucdist(x,numPoints,numCoords,d); + else if(strcmp(metric,"seu") == 0) + seudist(x,numPoints,numCoords,arg,d); + else if(strcmp(metric,"cit") == 0) + citdist(x,numPoints,numCoords,d); + else if(strcmp(metric,"min") == 0) + mindist(x,numPoints,numCoords,scalarArg,d); + else if(strcmp(metric,"cos") == 0) + coscordist(x,numPoints,numCoords,d); + else if(strcmp(metric,"cor") == 0) + coscordist(x,numPoints,numCoords,d); + else if(strcmp(metric,"ham") == 0) + hamdist(x,numPoints,numCoords,d); + else if(strcmp(metric,"jac") == 0) + jacdist(x,numPoints,numCoords,d); + else if(strcmp(metric,"che") == 0) + chedist(x,numPoints,numCoords,d); + else if(strcmp(metric,"mah") == 0) + mahdist(x,numPoints,numCoords,arg,d); + + /* deal with non double types */ + } else if (mxIsSingle(prhs[0])) { + float *x,*d,*arg,scalarArg; + + /* create a pointer to the input matrix y */ + x = (float*)mxGetData(prhs[0]); + + /* get the dimensions of the matrix input y */ + numCoords = mxGetM(prhs[0]); + numPoints = mxGetN(prhs[0]); + + /* get extra arg */ + if (nrhs>2 && !mxIsEmpty(prhs[2])) { + if (mxGetNumberOfElements(prhs[2]) == 1) { /*scalar case */ + scalarArg = (float)mxGetScalar(prhs[2]); + } else if (mxIsSingle(prhs[2])) { + arg = mxGetData(prhs[2]); + } else { + mexErrMsgIdAndTxt("stats:pdistmex:MixedInputTypes", + "Additional input arguments must be the same class as X."); + } + } + + /* set the output pointer to the output matrix */ + plhs[0] = mxCreateNumericMatrix(1,(numPoints * (numPoints-1))/2, mxSINGLE_CLASS, mxREAL); + + /* create a pointer to a copy of the output matrix */ + d = (float*)mxGetData(plhs[0]); + + /* call the appropriate distance subroutine */ + if (strcmp(metric,"euc") == 0) + eucdistS(x,numPoints,numCoords,d); + else if(strcmp(metric,"seu") == 0) + seudistS(x,numPoints,numCoords,arg,d); + else if(strcmp(metric,"cit") == 0) + citdistS(x,numPoints,numCoords,d); + else if(strcmp(metric,"min") == 0) + mindistS(x,numPoints,numCoords,scalarArg,d); + else if(strcmp(metric,"cos") == 0) + coscordistS(x,numPoints,numCoords,d); + else if(strcmp(metric,"cor") == 0) + coscordistS(x,numPoints,numCoords,d); + else if(strcmp(metric,"ham") == 0) + hamdistS(x,numPoints,numCoords,d); + else if(strcmp(metric,"jac") == 0) + jacdistS(x,numPoints,numCoords,d); + else if(strcmp(metric,"che") == 0) + chedistS(x,numPoints,numCoords,d); + else if(strcmp(metric,"mah") == 0) + mahdistS(x,numPoints,numCoords,arg,d); + + } else { + mexErrMsgIdAndTxt("stats:pdistmex:BadInputType", + "PDISTMEX only supports real DOUBLE and SINGLE data."); + } + +} +/* +% ---------------------------------------------- +function d = distcalc(XI,XJ,s,arg) +%DISTCALC Perform distance calculation for PDIST. +switch s +case 'euc' d = sqrt(sum((XI-XJ).^2,2)); % Euclidean +case 'seu' d = sqrt(((XI-XJ).^2) * arg); % Standardized Euclidean +case 'cit' d = sum(abs((XI-XJ)),2); % City Block +case 'mah' Y = XI - XJ; + d = sqrt(sum((Y*arg).*Y,2)); % Mahalanobis +case 'min' d = sum(abs((XI-XJ)).^arg,2).^(1/arg); % Minkowski +case 'che' d = max(abs(XI-XJ),[],2); % Chebychev +case 'cos' d = 1 - sum(XI.*XJ,2); % Cosine +case 'cor' d = 1 - sum(XI.*XJ,2); % Correlation +case 'ham' d = sum(XI ~= XJ,2) / size(XI,2); % Hamming +case 'jac' nz = XI ~= 0 | XJ ~= 0; + ne = XI ~= XJ; + d = sum(ne&nz,2) ./ sum(nz,2); % Jaccard +end +*/ diff --git a/boosting/weightedstats/private/pdistmex.mexglx b/boosting/weightedstats/private/pdistmex.mexglx new file mode 100644 index 0000000..1b46198 Binary files /dev/null and b/boosting/weightedstats/private/pdistmex.mexglx differ diff --git a/boosting/weightedstats/private/pdistmex.mexglx.csf b/boosting/weightedstats/private/pdistmex.mexglx.csf new file mode 100644 index 0000000..f14b2fd Binary files /dev/null and b/boosting/weightedstats/private/pdistmex.mexglx.csf differ diff --git a/boosting/weightedstats/private/placetitlebar.m b/boosting/weightedstats/private/placetitlebar.m new file mode 100644 index 0000000..718ebe7 --- /dev/null +++ b/boosting/weightedstats/private/placetitlebar.m @@ -0,0 +1,21 @@ +function placetitlebar(fig) +%PLACETITLEBAR ensures that a figure's titlebar is on screen. +% +% Copyright 1993-2004 The MathWorks, Inc. +% $Revision: 1.1.4.1 $ $Date: 2004/01/24 09:36:24 $ + +oldRootUnits = get(0, 'Units'); +oldFigUnits = get(fig, 'Units'); + +set(0, 'Units', 'pixels'); +set(fig, 'Units', 'pixels'); + +screenpos = get(0, 'Screensize'); +outerpos = get(fig, 'Outerposition'); +if outerpos(2) + outerpos(4) > screenpos(4) + outerpos(2) = screenpos(4) - outerpos(4); + set(fig, 'Outerposition', outerpos); +end +%restore units +set(0, 'Units', oldRootUnits); +set(fig, 'Units', oldFigUnits); \ No newline at end of file diff --git a/boosting/weightedstats/private/randsample.m b/boosting/weightedstats/private/randsample.m new file mode 100644 index 0000000..2b84373 --- /dev/null +++ b/boosting/weightedstats/private/randsample.m @@ -0,0 +1,45 @@ +function y = randsample(n, k) +%RANDSAMPLE Random sampling, without replacement +% Y = RANDSAMPLE(N,K) returns K values sampled at random, without +% replacement, from the integers 1:N. + +% Copyright 1993-2002 The MathWorks, Inc. +% $Revision: 1.1 $ $Date: 2002/03/13 23:15:54 $ + +% RANDSAMPLE does not (yet) implement weighted sampling. + +if nargin < 2 + error('Requires two input arguments.'); +end + +% If the sample is a sizeable fraction of the population, just +% randomize the whole population (which involves a full sort +% of n random values), and take the first k. +if 4*k > n + rp = randperm(n); + y = rp(1:k); + +% If the sample is a small fraction of the population, a full +% sort is wasteful. Repeatedly sample with replacement until +% there are k unique values. +else + x = zeros(1,n); % flags + sumx = 0; + while sumx < k + x(ceil(n * rand(1,k-sumx))) = 1; % sample w/replacement + sumx = sum(x); % count how many unique elements so far + end + y = find(x > 0); + y = y(randperm(k)); +end + +% a scalar loop version +% +% x = 1:n; +% n = n:(-1):(n-k+1); +% y = zeros(1,k); +% j = ceil(n .* rand(1,k)); +% for i = 1:k +% y(i) = x(j(i)); +% x(j(i)) = x(n(i)); +% end diff --git a/boosting/weightedstats/private/statchol.m b/boosting/weightedstats/private/statchol.m new file mode 100644 index 0000000..774de7b --- /dev/null +++ b/boosting/weightedstats/private/statchol.m @@ -0,0 +1,27 @@ +function [T,p] = statchol(sigma) +%STATCHOL Do Cholesky-like decomposition, allowing zero eigenvalues +% SIGMA must be symmetric. In general T is not square or triangular. +% P is the number of negative eigenvalues, and T is empty if P>0. + +% $Revision: 1.1 $ $Date: 2001/08/01 20:57:49 $ + +[T p] = chol(sigma); + +if p > 0 + % Can get factors of the form sigma==T'*T using the eigenvalue + % decomposition of a symmetric matrix, so long as the matrix + % is positive semi-definite. + [U,D] = eig((sigma+sigma')/2); + D = diag(D); + + tol = max(D) * length(D) * eps; + t = (abs(D) > tol); + D = D(t); + p = sum(D<0); + + if (p==0) + T = diag(sqrt(D)) * U(:,t)'; + else + T = []; + end +end diff --git a/boosting/weightedstats/private/statctexact.m b/boosting/weightedstats/private/statctexact.m new file mode 100644 index 0000000..b9cff6a --- /dev/null +++ b/boosting/weightedstats/private/statctexact.m @@ -0,0 +1,288 @@ +function pval=statctexact(x,wts,tstar,dispopt) +%STATCTEXACT Compute exact p-value for contingency table +% P=STATCTEXACT(X,WTS,T,DISPOPT) uses a network algorithm to compute +% the exact p-value P for a 2-by-K contingency table X. The test +% statistic T is the weighted sum of the elements in the first row. +% Set DISPOPT=true to display debugging output. +% +% Private function used by the RANKSUM function. + +% Copyright 2003-2004 The MathWorks, Inc. +% $Revision: 1.1.6.2 $ $Date: 2004/01/24 09:36:26 $ + + +[r,c] = size(x); +if (r~=2) + error('Internal error, table must have two rows.'); +end + +if (nargin<2), wts = []; end +if (nargin<3), tstar = []; end +if (nargin<4), dispopt = false; end + +% Get the test statistic and weights if not already done +if isempty(wts) || isempty(tstar) + [tstar, wts, expected] = teststat(x, wts, dispopt); +else + expected = []; % will not be used +end + +% Start of network algorithm for exact p-value computation + +% Make nodes and arcs +[nodes,arcs] = makenodes(x,wts,expected); + +% Backward induction +nodes = backward(nodes,arcs); + +% Forward pass +pvals = getsigprob(nodes, arcs, tstar, dispopt); + +TP = nodes{4,1}; +pvals = pvals / TP; +pval = pvals(2) + min(pvals(1), pvals(3)); +p2 = min(1, 2 * pval); + +if dispopt + disp( 'Exact results:'); + disp(sprintf(' test statistic = %g', tstar)); + disp(sprintf(' Prob[T < %g] = %g', tstar, pvals(1))); + disp(sprintf(' Prob[T <= %g] = %g', tstar, pvals(1)+pvals(2))); + disp(sprintf(' Prob[T = %g] = %g', tstar, pvals(2))); + disp(sprintf(' Prob[T >= %g] = %g', tstar, pvals(2)+pvals(3))); + disp(sprintf(' Prob[T > %g] = %g', tstar, pvals(3))); + disp(sprintf(' 2-sided p = %g', p2)); +end + +% ---------------------------------------------------------- +function [tstar, wts, expected] = teststat(x, wts, dispopt); +% Compute the test statistic for this observed table + +[r,c] = size(x); +rowsum = sum(x,2); +colsum = sum(x,1); +if (length(wts) == 0) + obs = x; + expected = repmat(rowsum,1,c) .* repmat(colsum,r,1) ./ sum(rowsum); + tstar = sum(sum((obs-expected).^2 ./ expected)); +else + expected = []; + tstar = sum(wts .* x(1,:)); +end +wts = wts(:)'; + +% --------------------------------------------------- +function [nodes,arcs] = makenodes(x,wts,expected) +%MAKENODES Make structures describing nodes and arcs +% [nodes,arcs] = makenodes(x,wts,expected) + +[r,c] = size(x); +rowsum = sum(x,2); +colsum = sum(x,1); +oldnodes = zeros(1,2); % nodes added during the last pass +oldlo = 0; % min possible sum so far +oldhi = 0; % max possible sum so far +oldnn = 1; % node numbers (row numbers) from last pass +xsum = rowsum(1); % sum of entries in first row +nodecount = 1; + +nodes = cell(4,c+1); % to hold nodes +% row 1: n-by-2 array, n = # of nodes, row = [j,mj] +% row 2: n-vector of longest path to end from here +% row 3: n-vector of shortest path to end from here +% row 4: n-vector of total probability to end from here + +arcs = cell(3,c); % to hold node connections (arcs) in the network +% row 1: n-by-2 array, n = # of connections, row = pair connected +% row 2: n-vector of arc lengths +% row 3: n-vector of arc probabilities + +nodes{1,1} = zeros(1,2); +nodes{2,c+1} = 0; +nodes{3,c+1} = 0; +nodes{4,c+1} = 1; +for j=1:c % loop over nodes + % Figure out which nodes are possible at the next step + nj = colsum(j); + lo = max(oldlo, xsum-sum(colsum(j+1:end))); + hi = min(xsum, oldhi+nj); + newnodes = zeros(hi-lo+1,2); + newnodes(:,1) = j; + newnodes(:,2) = (lo:hi)'; + newnn = 1:size(newnodes,1); + nodecount = nodecount + size(newnodes,1); + nodes{1,j+1} = newnodes; + + % Figure out which arcs are possible to the next step + [a0,a1] = meshgrid(oldnn,newnn); + a0 = a0(:); + a1 = a1(:); + oldsum = oldnodes(a0,2); + newsum = newnodes(a1,2); + xj = newsum - oldsum; + ok = (xj >= 0) & (xj <= nj); + arcs{1,j} = [a0(ok) a1(ok)]; % arc connections + xj = xj(ok); + if (length(wts) > 0) % arch lengths + arcs{2,j} = wts(j)*xj; + else + arcs{2,j} = 2 * (xj - expected(1,j)).^2 ./ expected(1,j); + end + pj = exp(gammaln(nj+1) - gammaln(xj+1) - gammaln(nj-xj+1)); + arcs{3,j} = pj; % arc probabilities + + % Update data structures + oldlo = lo; + oldhi = hi; + oldnodes = newnodes; + oldnn = newnn; +end + +% ----------------------------------------------------------- +function nodes = backward(nodes,arcs) +%BACKWARD Do backward induction, add information to NODES array +% nodes = backward(nodes,arcs) + +% initialize for final node +c = size(nodes,2) - 1; +startSP = zeros(1); +startLP = startSP; +startTP = ones(1); +startnode = nodes{1,c+1}; +for j=c:-1:1 + % destination nodes are previous start nodes + endSP = startSP; + endLP = startLP; + endTP = startTP; + endnode = startnode; + + % get new start nodes and information about them + a = arcs{1,j}; + startnode = nodes{1,j}; + startmax = max(a(:,1)); + startSP = zeros(startmax,1); + startLP = startSP; + startTP = startSP; + arclen = arcs{2,j}; + arcprob = arcs{3,j}; + for nodenum=1:startmax + % for each start node, compute SP, LP, TP + k1 = find(a(:,1) == nodenum); + k2 = a(k1,2); + startLP(nodenum) = max(arclen(k1) + endLP(k2)); + startSP(nodenum) = min(arclen(k1) + endSP(k2)); + startTP(nodenum) = sum(arcprob(k1) .* endTP(k2)); + end + + % store information about nodes at this level + nodes{2,j} = startLP; + nodes{3,j} = startSP; + nodes{4,j} = startTP; +end + +% ---------------------------------------------------- +function pvals = getsigprob(nodes, arcs, tstar, dispopt) +%GETSIGPROB Get p-values by scanning the network + +NROWS = 50; + +pvals = zeros(3,1); % [ProbT] +stack = zeros(NROWS, 4); +stack(:,1) = Inf; +stack(1,1) = 1; % level of current node +stack(1,2) = 1; % number at this level of current node +stack(1,3) = 0; % length so far to this node +stack(1,4) = 1; % probability so far of reaching this node +N = size(stack,1); + +i1 = 0; i2 = 0; i3 = 0; + +while(1) + % Get next node to process, visiting lowest levels first + minlevel = min(stack((stack(1:N)>0))); + if (isinf(minlevel)), break; end + sp = find(stack(1:N)==minlevel); + sp = sp(1); + + L = stack(sp,1); + J = stack(sp,2); + pastL = stack(sp,3); + pastP = stack(sp,4); + stack(sp,1) = Inf; + + % Get info for arcs at level L and their target nodes + nj = nodes{1,L+1}; + LP = nodes{2,L+1}; + SP = nodes{3,L+1}; + TP = nodes{4,L+1}; + aj = arcs{1,L}; + arclen = arcs{2,L}; + arcprob = arcs{3,L}; + + % Look only at arcs from node J + seps = sqrt(eps); + arows = find(aj(:,1)==J)'; + for k=arows + tonode = aj(k,2); + thisL = arclen(k); + thisP = pastP * arcprob(k); + len = pastL + thisL; + + % See if no paths from here are signicant + if (len + LP(tonode) < tstar - seps) + pvals(1) = pvals(1) + thisP * TP(tonode); + continue; + + % See if all paths from here are significant + elseif (len + SP(tonode) > tstar + seps) + pvals(3) = pvals(3) + thisP * TP(tonode); + continue; + + % See if there is no range, then we match exactly + elseif (SP(tonode) == LP(tonode)) + pvals(2) = pvals(2) + thisP * TP(tonode); + continue; + + % See if we can merge this with another already stored + else + % Find a stored node that matches this one + r = find(stack(:,1) == L+1); + if (any(r)) + r = r(stack(r,2) == tonode); + if (any(r)) + r = r(abs(stack(r,3) - len) < seps); + end + end + + if (any(r)) + % If one is found, merge this one with it + sp = r(1); + stack(sp,4) = stack(sp,4) + thisP; + i1 = i1+1; + else + % Otherwise add a new node, extending array if necessary + z = find(isinf(stack(:,1))); + if (isempty(z)) + i2 = i2+1; + block = zeros(NROWS,4); + block(:,1) = Inf; + stack = [stack; block]; + sp = N+1; + N = N+NROWS; + else + i3 = i3+1; + sp = z(1); + end + stack(sp,1) = L+1; + stack(sp,2) = tonode; + stack(sp,3) = len; + stack(sp,4) = thisP; + end + end + end +end + +if dispopt + disp('merged, extended, inserted = '); + disp([i1 i2 i3]); +end diff --git a/boosting/weightedstats/private/statgetargs.m b/boosting/weightedstats/private/statgetargs.m new file mode 100644 index 0000000..92de25a --- /dev/null +++ b/boosting/weightedstats/private/statgetargs.m @@ -0,0 +1,77 @@ +function [emsg,varargout]=statgetargs(pnames,dflts,varargin) +%STATGETARGS Process parameter name/value pairs for statistics functions +% [EMSG,A,B,...]=STATGETARGS(PNAMES,DFLTS,'NAME1',VAL1,'NAME2',VAL2,...) +% accepts a cell array PNAMES of valid parameter names, a cell array +% DFLTS of default values for the parameters named in PNAMES, and +% additional parameter name/value pairs. Returns parameter values A,B,... +% in the same order as the names in PNAMES. Outputs corresponding to +% entries in PNAMES that are not specified in the name/value pairs are +% set to the corresponding value from DFLTS. If nargout is equal to +% length(PNAMES)+1, then unrecognized name/value pairs are an error. If +% nargout is equal to length(PNAMES)+2, then all unrecognized name/value +% pairs are returned in a single cell array following any other outputs. +% +% EMSG is empty if the arguments are valid, or the text of an error message +% if an error occurs. STATGETARGS does not actually throw any errors, but +% rather returns an error message so that the caller may throw the error. +% Outputs will be partially processed after an error occurs. +% +% This utility is used by some Statistics Toolbox functions to process +% name/value pair arguments. +% +% Example: +% pnames = {'color' 'linestyle', 'linewidth'} +% dflts = { 'r' '_' '1'} +% varargin = {{'linew' 2 'nonesuch' [1 2 3] 'linestyle' ':'} +% [emsg,c,ls,lw] = statgetargs(pnames,dflts,varargin{:}) % error +% [emsg,c,ls,lw,ur] = statgetargs(pnames,dflts,varargin{:}) % ok + +% Copyright 1993-2002 The MathWorks, Inc. +% $Revision: 1.4 $ $Date: 2002/02/04 19:25:45 $ + +% We always create (nparams+1) outputs: +% one for emsg +% nparams varargs for values corresponding to names in pnames +% If they ask for one more (nargout == nparams+2), it's for unrecognized +% names/values + +% Initialize some variables +emsg = ''; +nparams = length(pnames); +varargout = dflts; +unrecog = {}; +nargs = length(varargin); + +% Must have name/value pairs +if mod(nargs,2)~=0 + emsg = sprintf('Wrong number of arguments.'); +else + % Process name/value pairs + for j=1:2:nargs + pname = varargin{j}; + if ~ischar(pname) + emsg = sprintf('Parameter name must be text.'); + break; + end + i = strmatch(lower(pname),pnames); + if isempty(i) + % if they've asked to get back unrecognized names/values, add this + % one to the list + if nargout > nparams+1 + unrecog((end+1):(end+2)) = {varargin{j} varargin{j+1}}; + + % otherwise, it's an error + else + emsg = sprintf('Invalid parameter name: %s.',pname); + break; + end + elseif length(i)>1 + emsg = sprintf('Ambiguous parameter name: %s.',pname); + break; + else + varargout{i} = varargin{j+1}; + end + end +end + +varargout{nparams+1} = unrecog; diff --git a/boosting/weightedstats/private/statgetargsuser.m b/boosting/weightedstats/private/statgetargsuser.m new file mode 100644 index 0000000..4bfc0ca --- /dev/null +++ b/boosting/weightedstats/private/statgetargsuser.m @@ -0,0 +1,96 @@ +function [emsg,varargout]=statgetargsuser(pnames,dflts,delim,varargin) +%STATGETARGS Process parameter name/value pairs for statistics functions +% [EMSG,A,B,... USER]= +% STATGETARGS(PNAMES,DFLTS,DELIM,'NAME1',VAL1,'NAME2',VAL2,...) +% accepts a cell array PNAMES of valid parameter names, a cell array +% DFLTS of default values for the parameters named in PNAMES, the +% name of a "user parameter delimiter", and additional parameter name/value +% pairs. Returns parameter values A,B,... in the same order as the names +% in PNAMES, plus a single cell array USER of all input args that follow +% the name specified by DELIM. Outputs corresponding to entries in PNAMES +% that are not specified in the name/value pairs are set to the +% corresponding value from DFLTS, and USER if set to [] if the name in +% DELIM does not appear in the name/value pairs. If nargout is equal to +% length(PNAMES)+2, then unrecognized name/value pairs are an error. If +% nargout is equal to length(PNAMES)+3, then all unrecognized name/value +% pairs are returned in a single cell array following any other outputs. +% +% EMSG is empty if the arguments are valid, or the text of an error message +% if an error occurs. STATGETARGS does not actually throw any errors, but +% rather returns an error message so that the caller may throw the error. +% Outputs will be partially processed after an error occurs. +% +% This utility is used by some Statistics Toolbox functions to process +% name/value pair arguments. +% +% Example: +% pnames = {'color' 'linestyle', 'linewidth'} +% dflts = { 'r' '_' '1'} +% delim = 'userargs' +% varargin = {'linew' 2 'nonesuch' [1 2 3] 'linestyle' ':' ... +% 'userargs' 'pretty much' 'anything here' [1 2 3] {1 2 3}} +% [emsg,c,ls,lw,ua] = statgetargs(pnames,dflts,varargin{:}) % error +% [emsg,c,ls,lw,ua,ur] = statgetargs(pnames,dflts,varargin{:}) % ok +% % c is 'r', ls is ':', and lw is 2 +% % ua is {'pretty much' 'anything here' [1 2 3] {1 2 3}} +% % ur is {'nonesuch' [1 2 3]} + +% Copyright 1993-2002 The MathWorks, Inc. +% $Revision: 1.2 $ $Date: 2002/02/04 19:25:45 $ + +% We always create (nparams+2) outputs: +% one for emsg +% nparams varargs for values corresponding to names in pnames +% one vararg for all user values +% If they ask for one more (nargout == nparams+3), it's for unrecognized +% names/values + +% Initialize some variables +emsg = ''; +nparams = length(pnames); +varargout = {dflts{:} {}}; % user args default to empty +unrecog = {}; +nargs = length(varargin); + +pnames = {pnames{:} delim}; % (nparams+1)th element is the user args delimiter + +% Process name/value pairs +for j=1:2:nargs + pname = varargin{j}; + if ~ischar(pname) + emsg = sprintf('Parameter name must be text.'); + break; + % Must have one or more args after a name + elseif j == nargs + emsg = sprintf('Wrong number of arguments.'); + break; + end + i = strmatch(lower(pname),pnames); + if isempty(i) + % if they've asked to get back unrecognized names/values, add this + % one to the list + if nargout > nparams+2 + unrecog((end+1):(end+2)) = {varargin{j} varargin{j+1}}; + + % otherwise, it's an error + else + emsg = sprintf('Invalid parameter name: %s.',pname); + break; + end + elseif length(i)>1 + emsg = sprintf('Ambiguous parameter name: %s.',pname); + break; + + % matched delimiter, everything remaining is user args, return them + % all untouched as a single cell array + elseif i == nparams+1 + varargout{i} = varargin((j+1):end); + break; + + % a regular old name/value pair + else + varargout{i} = varargin{j+1}; + end +end + +varargout{nparams+2} = unrecog; diff --git a/boosting/weightedstats/private/statgetcolor.m b/boosting/weightedstats/private/statgetcolor.m new file mode 100644 index 0000000..cb582d5 --- /dev/null +++ b/boosting/weightedstats/private/statgetcolor.m @@ -0,0 +1,106 @@ +function varargout = statgetcolor(ax, linetype, objh) +%STATGETCOLOR Get a color, marker, and linestyle suitable for a new line +% +% [C,M,L,W] = STATGETCOLOR(AX,LINETYPE,OBJH) gets a color, marker, +% linestyle, and width for drawing a new line of type LINETYPE (either +% 'data' or 'fit') in the axes AX. OBJH is the handle for the +% containing dataset or fit object. + +% Copyright 2003-2004 The MathWorks, Inc. +% $Revision: 1.1.6.3 $ $Date: 2004/01/24 09:36:28 $ + +allcolors = get(ax, 'ColorOrder'); +lineproperties = {'Color' 'LineStyle' 'LineWidth' 'Marker'}; +allmarkers = {'o' '+' '*' 'x' 's' 'd' '.'}; + +% For bad call, these will be returned +c = [0 0 0]; +m = 'none'; +l = '-'; +w = 1; + +% Get values already stored, if any +if ~isempty(objh) & ~isempty(objh.ColorMarkerLine) ... + & nargout<=length(objh.ColorMarkerLine) + [varargout{1:nargout}] = deal(objh.ColorMarkerLine{1:nargout}); + if nargout>4 && isempty(varargout{5}) + % Supply suitable residual values if they are missing + varargout(5:nargout) = varargout(1:nargout-4); + if nargout>=6, varargout{6} = '.'; end + if nargout>=8, varargout{8} = 1; end + end + return +end + +switch linetype + case 'data' + h = findobj(ax, 'Type','line', 'Tag','dfdata'); + dsdb = dfswitchyard('getdsdb'); + unplottedds = [find(dsdb,'plot',1); find(dsdb,'plot',0)]; + unplottedds(unplottedds==objh) = []; + + % Start data colors from the end, to reduce collisions with fit colors + ncolors = size(allcolors,1) + 1; + + % Find an unused color/marker combination + for j=1:length(allmarkers) + m = allmarkers{j}; + h1 = findobj(h,'flat','Marker',m); + a=1; + for k=1:size(allcolors,1) + c = allcolors(ncolors-k,:); + a = findobj(h1,'flat','Color',c); + if isempty(a) + for j=1:length(unplottedds) + cml = get(unplottedds(j),'ColorMarkerLine'); + if iscell(cml) & ~isempty(cml) & ... + isequal(cml{1},c) & isequal(cml{2},m) + a = j; + break + end + end + if isempty(a) + varargout = {c m l w}; + return + end + end + end + end + varargout = {c m l w}; + + case 'fit' + w = 2; + h = findobj(ax, 'Type','line', 'Tag','distfit'); + unplottedfit = [find(getfitdb,'plot',0); find(getfitdb,'plot',1)]; + + % Find an unused color/linestyle combination, prefer linestyle = '-' + allstyles = {'-' '--' '-.'}; + a = 1; + iter = 0; + for j=1:length(allstyles) + l = allstyles{j}; + h1 = findobj(h,'flat','LineStyle',l); + for k=1:size(allcolors,1) + iter = iter+1; + c = allcolors(k,:); + m = allmarkers{1+mod(iter,length(allmarkers))}; + a = findobj(h1,'flat','Color',c); + if isempty(a) + for j=1:length(unplottedfit) + cml = get(unplottedfit(j),'ColorMarkerLine'); + if iscell(cml) & ~isempty(cml) & ... + isequal(cml{1},c) & isequal(cml{3},l) + a = j; + break + end + end + if isempty(a) + varargout = {c m l w}; + return + end + end + end + end + + varargout = {c m l w}; +end diff --git a/boosting/weightedstats/private/statglmeval.m b/boosting/weightedstats/private/statglmeval.m new file mode 100644 index 0000000..d6a5bf6 --- /dev/null +++ b/boosting/weightedstats/private/statglmeval.m @@ -0,0 +1,153 @@ +function retval=statglmeval(action,fn,varargin) +%STATGLMEVAL Evaluate or test link function in proper environment +% STATGLMEVAL('eval',FN,ARGS,...) evaluates the function FN in an +% environment in which certain functions such as LOGIT and +% D_LOGIT are defined. This allows the function FN to be either +% a user-defined function or one of the pre-defined functions +% provided in the GLMFIT function, without contaminating the name +% space with those pre-defined functions. +% +% STATGLMEVAL('testlink',FN) test for the existence of FN as a +% function handle, inline function, or text string containing the +% name of a function in an M-file. + +% Author: Tom Lane, 3-6-2000 +% Copyright 1993-2002 The MathWorks, Inc. +% $Revision: 1.3 $ $Date: 2002/02/04 19:25:46 $ + +switch(action) + case 'eval' + retval = feval(fn,varargin{:}); + + case 'testlink' + c = class(fn); + fnclass = class(@logit); + if (isequal(c,fnclass) | isequal(c,'inline') | ... + (isequal(c,'char') & ~isempty(which(fn)))) + retval = 1; + else + retval = 0; + end +end + + +%%%%%%%%%%%%%%%%%%%%%%%%%% +% The following functions are related to the link function that +% links the distribution parameter mu with the linear combination +% eta of predictor variables. For example, the logit link defines +% +% eta = log(mu/(1-mu)) + +%%%% functions for identity link +function a=identity(b) +a=b; + +function a=d_identity(b) +a = ones(size(b)); + +function b=i_identity(a) +b=a; + +%%%% functions for logit link +function a=logit(p) +a = log(p ./ (1-p)); + +function a=d_logit(p) +a = 1 ./ max(eps, (p .* (1-p))); + +function p=i_logit(a) +p = 1 ./ (1 + exp(-a)); + + +%%%% functions for probit link +function a=probit(p) +a = norminv(p); + +function a=d_probit(p) +a = 1 ./ max(eps, normpdf(norminv(p))); + +function p=i_probit(a) +p = normcdf(a); + + +%%%% functions for complementary log log link +function a=comploglog(p) +a = log(-log(1-max(eps,p))); + +function a=d_comploglog(p) +a = 1 ./ -(max(eps,1-p) .* log(1-max(eps,p))); + +function p=i_comploglog(a) +p = 1 - exp(-exp(a)); + + +%%%% functions for log log link +function a=logloglink(p) +a = log(-log(max(eps,p))); + +function a=d_logloglink(p) +a = 1 ./ (max(eps, p) .* log(max(eps,p))); + +function p=i_logloglink(a) +p = exp(-exp(a)); + + +%%%% functions for log link +function a=d_log(b); +a = 1 ./ max(eps,b); + +function b=i_log(a); +b = exp(a); + +%%%% functions for reciprocal link +function a=reciprocal(b) +a = 1 ./ max(eps, b); + +function a=d_reciprocal(b); +a = -1 ./ max(eps,b).^2; + +function b=i_reciprocal(a); +b = 1 ./ max(eps, a); + + +%%%% functions for power link +function a=power(b,p) +if (p==0) + a = log(max(eps,b)); +else + a = max(eps,b) .^ p; +end + +function a=d_power(b,p); +if (p==0) + a = 1 ./ max(eps,b); +else + a = p * max(eps,b).^(p-1); +end + +function b=i_power(a,p); +if (p==0) + b = exp(a); +else + b = max(eps,a) .^ (1/p); +end + + +%%%%%%%%%%%%%%%%%%%%%%%%%%% +% The following functions define the variance + +function a=normalvariance(b) +a = ones(size(b)); + +function a=poissonvariance(b) +a = b; + +function a=binomialvariance(p,N) +a = p .* (1-p) ./ N; + +function a=gammavariance(b) +a = b.^2; + +function a=inversegaussianvariance(b) +a = b.^3; + \ No newline at end of file diff --git a/boosting/weightedstats/private/statinsertnan.m b/boosting/weightedstats/private/statinsertnan.m new file mode 100644 index 0000000..27ca070 --- /dev/null +++ b/boosting/weightedstats/private/statinsertnan.m @@ -0,0 +1,23 @@ +function [varargout]=statinsertnan(wasnan,varargin) +%STATINSERTNAN Insert NaN values into inputs where they were removed + +% Copyright 1993-2002 The MathWorks, Inc. +% $Revision: 1.4 $ $Date: 2002/02/04 19:25:46 $ + +nanvec = zeros(size(wasnan))*NaN; +ok = ~wasnan; + +% Find NaN, check length, and store outputs temporarily +for j=1:nargin-1 + y = varargin{j}; + if (size(y,1)==1), y = y'; end + + [n p] = size(y); + if (p==1) + x = nanvec; + else + x = repmat(nanvec,1,p); + end + x(ok,:) = y; + varargout{j} = x; +end diff --git a/boosting/weightedstats/private/statremovenan.m b/boosting/weightedstats/private/statremovenan.m new file mode 100644 index 0000000..d3249f9 --- /dev/null +++ b/boosting/weightedstats/private/statremovenan.m @@ -0,0 +1,37 @@ +function [badin,wasnan,varargout]=statremovenan(varargin) +%STATREMOVENAN Remove NaN values from inputs + +% Copyright 1993-2002 The MathWorks, Inc. +% $Revision: 1.3 $ $Date: 2002/02/04 19:25:47 $ + +badin = 0; +wasnan = 0; +n = -1; + +% Find NaN, check length, and store outputs temporarily +for j=1:nargin + y = varargin{j}; + if (size(y,1)==1), y = y'; end + + ny = size(y,1); + if (n==-1) + n = ny; + elseif (n~=ny & ny~=0) + if (badin==0), badin = j; end + end + + varargout{j} = y; + + if (badin==0 & ny>0), wasnan = wasnan | any(isnan(y),2); end +end + +if (badin>0), return; end + +% Fix outputs +if (any(wasnan)) + t = ~wasnan; + for j=1:nargin + y = varargout{j}; + if (length(y)>0), varargout{j} = y(t,:); end + end +end diff --git a/boosting/weightedstats/private/statrobustfit.m b/boosting/weightedstats/private/statrobustfit.m new file mode 100644 index 0000000..60e3089 --- /dev/null +++ b/boosting/weightedstats/private/statrobustfit.m @@ -0,0 +1,164 @@ +function [b,stats] = statrobustfit(X,y,wfun,tune,wasnan,addconst) +%STATROBUSTFIT Calculation function for ROBUSTFIT + +% Tom Lane 2-11-2000 +% Copyright 1993-2002 The MathWorks, Inc. +% $Revision: 1.4 $ $Date: 2002/02/04 19:25:48 $ + +% Must check for valid function in this scope +c = class(wfun); +fnclass = class(@bisquare); +if (~isequal(c,fnclass) & ~isequal(c,'inline') ... + & (~isequal(c,'char') | isempty(which(wfun)))) + error('Third argument (weight function) is not valid.'); +end + +[n,p] = size(X); +if (addconst) + X = [ones(n,1) X]; + p = p+1; +end +if (n<=p), error('Not enough points to perform robust estimation.'); end + +% Find the least squares solution. +[Q, R]=qr(X,0); +b = R\(Q'*y); +b0 = zeros(size(b)); + +% Adjust residuals using leverage, as advised by DuMouchel & O'Brien +E = X/R; +h = min(.9999, sum((E.*E)')'); +adjfactor = 1 ./ sqrt(1-h); + +dfe = n-p; +ols_s = norm(y-X*b) / sqrt(dfe); + +% Perform iteratively reweighted least squares to get coefficient estimates +D = 1e-6; +iter = 0; +iterlim = 50; +while((iter==0) | any(abs(b-b0) > D*max(abs(b),abs(b0)))) + iter = iter+1; + if (iter>iterlim), warning('Iteration limit reached.'); break; end + + % Compute residuals from previous fit, then compute scale estimate + r = y - X*b; + radj = r .* adjfactor; + s = madsigma(radj,p); + if (s==0), s=1; end + + % Compute new weights from these residuals, then re-fit + w = feval(wfun, radj/(s*tune)); + b0 = b; + b = wfit(y,X,w); +end + +if (nargout>1) + % Compute robust mse according to DuMouchel & O'Brien (1989) + r = y - X*b; + radj = r .* adjfactor; + mad_s = madsigma(radj,p); + robust_s = robustsigma(wfun, radj, p, mad_s, tune, h); + + % Shrink robust value toward ols value if appropriate + sigma = max(robust_s, sqrt((ols_s^2 * p^2 + robust_s^2 * n) / (p^2 + n))); + + % Get coefficient standard errors and related quantities + RI = R\eye(p); + C = (RI * RI') * sigma^2; + se = sqrt(max(eps,diag(C))); + C = C ./ (se * se'); + + % Make outputs conform with inputs + [r,w,h,adjfactor] = statinsertnan(wasnan,r,w,h,adjfactor); + + % Save everything + stats.ols_s = ols_s; + stats.robust_s = robust_s; + stats.mad_s = mad_s; + stats.s = sigma; + stats.resid = r; + stats.rstud = r .* adjfactor / sigma; + stats.se = se; + stats.coeffcorr = C; + stats.t = b ./ se; + stats.p = 2 * tcdf(-abs(stats.t), dfe); + stats.w = w; + stats.R = R; + stats.dfe = dfe; + stats.h = h; +end + +% ----------------------------- +function [b,R,xw] = wfit(y,x,w) +%WFIT weighted least squares fit +sw = sqrt(w); +[r c] = size(x); +yw = y .* sw; +xw = x .* sw(:,ones(1,c)); +[Q,R]=qr(xw,0); +b = R\(Q'*yw); + +% ----------------------------- +function s = madsigma(r,p); +%MADSIGMA Compute sigma estimate using MAD of residuals +m = median(r); +rs = sort(abs(r-m)); +if (abs(m) > rs(end)) + % Unexpectedly all residuals are very small + rs = sort(abs(r)); +end +s = median(rs(p:end)) / 0.6745; +if (s==0), s = .5*mean(rs); end + +% ----------------------------- +function s = robustsigma(wfun,r,p,s,t,h) +%ROBUSTSIGMA Compute robust sigma estimate of DuMouchel & O'Brien +% This function uses a formula from DuMouchel & O'Brien. It is +% based on ideas in Huber, pp. 172-175 and 195-198. + +st = s*t; +n = length(r); +u = r ./ st; +phi = u .* feval(wfun,u); +delta = 0.01; +u1 = u + delta; +phi1 = u1 .* feval(wfun,u1); +dphi = (phi1 - phi) ./ delta; +m1 = mean(dphi); +m2 = mean((dphi-m1).^2); +K = 1 + (p/n) * (m2 / m1.^2); + +s = (K/m1) * sqrt(sum(phi.^2 .* st^2 .* (1-h)) ./ (n-p)); + + +% --------- weight functions + +function w = andrews(r) +r = max(sqrt(eps), abs(r)); +w = (abs(r)= 1e10); arg2 = (l <= -1e10); +u(arg) = inf*ones(length(arg(arg>0)),1); +l(arg2) = -inf*ones(length(arg2(arg2>0)),1); +if any(u == l) + errmsg=sprintf('%s\n%s',... + 'Equal upper and lower bounds not permitted in this large-scale method.',... + 'Use equality constraints and the medium-scale method instead.'); + error(errmsg) +elseif min(u-l) <= 0 + error('Inconsistent bounds.') +end +if min(min(u-xcurr),min(xcurr-l)) < 0, xcurr = startx(u,l); end + +% get options out +typx = optimget(options,'TypicalX',defaultopt,'fast') ; +% In case the defaults were gathered from calling: optimset('quadprog'): +numberOfVariables = n; +if ischar(typx) + if isequal(lower(typx),'ones(numberofvariables,1)') + typx = ones(numberOfVariables,1); + else + error('Option ''TypicalX'' must be an integer value if not the default.') + end +end + +% Will be user-settable later: +pcmtx = optimget(options,'Preconditioner','hprecon') ; % not a default yet + +mtxmpy = optimget(options,'HessMult',defaultopt,'fast') ; +if isempty(mtxmpy) + mtxmpy = @hmult; % to detect name clash with user hmult.m, need this +end + +active_tol = optimget(options,'ActiveConstrTol',sqrt(eps)); % not a default yet, so use slow optimget +pcflags = optimget(options,'PrecondBandWidth',defaultopt,'fast') ; +tol2 = optimget(options,'TolX',defaultopt,'fast') ; +tol1 = optimget(options,'TolFun',defaultopt,'fast') ; +tol = tol1; +maxiter = optimget(options,'MaxIter',defaultopt,'fast') ; +maxfunevals = optimget(options,'MaxFunEvals',defaultopt,'fast') ; +pcgtol = optimget(options,'TolPCG',defaultopt,'fast') ; % pcgtol = .1; +kmax = optimget(options,'MaxPCGIter', defaultopt,'fast') ; +if ischar(kmax) + if isequal(lower(kmax),'max(1,floor(numberofvariables/2))') + kmax = max(1,floor(numberOfVariables/2)); + else + error('Option ''MaxPCGIter'' must be an integer value if not the default.') + end +end +if ischar(maxfunevals) + if isequal(lower(maxfunevals),'100*numberofvariables') + maxfunevals = 100*numberOfVariables; + else + error('Option ''MaxFunEvals'' must be an integer value if not the default.') + end +end +maxcount = min(maxiter, maxfunevals); % numfunevals = iterations, so just take minimum + +dnewt = []; gopt = []; +ex = 0; posdef = 1; npcg = 0; + +%tol1 = tol; tol2 = sqrt(tol1)/10; +if strcmp(optimget(options,'DerivativeCheck',defaultopt,'fast'),'on') + warnstr = sprintf('%s\n%s\n', ... + 'Trust region algorithm does not currently check user-supplied gradients,', ... + ' ignoring OPTIONS.DerivativeCheck.'); + warning(warnstr); +end + +vpos(1,1) = 1; vpcg(1,1) = 0; nbnds = 1; +pcgit = 0; delta = 10;nrmsx = 1; ratio = 0; degen = inf; +if (all(u == inf) & all(l == -inf)) nbnds = 0; end +DS = speye(n); v = zeros(n,1); dv = ones(n,1); del = 10*eps; +oval = inf; g = zeros(n,1); newgrad = g; Z = []; + +% Make x conform to the user's input x +x(:) = xcurr; +% Evaluate f,g, and H +if ~isempty(Hstr) % use sparse finite differencing + %[val,g] = feval(fname,x); + switch funfcn{1} + case 'fun' + error('should not reach this') + case 'fungrad' + %[val,g(:)] = feval(funfcn{3},x,varargin{:}); + val = initialf; g(:) = initialGRAD; + case 'fun_then_grad' + % val = feval(funfcn{3},x,varargin{:}); + % g(:) = feval(funfcn{4},x,varargin{:}); + val = initialf; g(:) = initialGRAD; + otherwise + if isequal(funfcn{2},'fmincon') + error('Undefined calltype in FMINCON'); + else + error('Undefined calltype in FMINUNC'); + end + end + + % Determine coloring/grouping for sparse finite-differencing + p = colmmd(Hstr)'; p = (n+1)*ones(n,1)-p; group = color(Hstr,p); + % pass in the user shaped x + H = sfd(x,g,Hstr,group,[],funfcn,varargin{:}); + % +else % user-supplied computation of H or dnewt + % [val,g,H] = feval(fname,x); + switch funfcn{1} + case 'fungradhess' + % [val,g(:),H] = feval(funfcn{3},x,varargin{:}); + val = initialf; g(:) = initialGRAD; H = initialHESS; + case 'fun_then_grad_then_hess' + % val = feval(funfcn{3},x,varargin{:}); + % g(:) = feval(funfcn{4},x,varargin{:}); + % H = feval(funfcn{5},x,varargin{:}); + val = initialf; g(:) = initialGRAD; H = initialHESS; + otherwise + if isequal(funfcn{2},'fmincon') + error('Undefined calltype in FMINCON'); + else + error('Undefined calltype in FMINUNC'); + end + end +end + +delbnd = max(100*norm(xcurr),1); +[nn,pp] = size(g); + +% Extract the Newton direction? +if pp == 2, dnewt = g(1:n,2); end +if verb > 1 + disp(header) +end + +% MAIN LOOP: GENERATE FEAS. SEQ. xcurr(it) S.T. f(x(it)) IS DECREASING. +while ~ex + if ~isfinite(val) | any(~isfinite(g)) + errmsg= sprintf('%s%s%s',funfcn{2},' cannot continue: ',... + 'user function is returning Inf or NaN values.'); + error(errmsg) + end + + % Update + [v,dv] = definev(g(:,1),xcurr,l,u); + gopt = v.*g(:,1); gnrm = norm(gopt,inf); + vgnrm(it,1)=gnrm; + r = abs(min(u-xcurr,xcurr-l)); degen = min(r + abs(g(:,1))); + vdeg(it,1) = min(degen,1); bndfeas = min(min(xcurr-l,u-xcurr)); + if ((u == inf*ones(n,1)) & (l == -inf*ones(n,1))) degen = -1; end + + % Display + if verb > 1 + currOutput = sprintf(formatstr,it,val,nrmsx,gnrm,pcgit); + disp(currOutput); + end + + % TEST FOR CONVERGENCE + diff = abs(oval-val); + oval = val; + if (nrmsx < .9*delta)&(ratio > .25)&(diff < tol1*(1+abs(oval))) + ex = 1; + if verb > .5 + disp('Optimization terminated successfully:') + disp(' Relative function value changing by less than OPTIONS.TolFun'); + end + + elseif (it > 1) & (nrmsx < tol2) + ex = 2; + if verb > .5 + disp('Optimization terminated successfully:') + disp(' Norm of the current step is less than OPTIONS.TolX'); + end + + elseif ((gnrm < tol1) & (posdef ==1) ) + ex = 3; + if verb > .5 + disp('Optimization terminated successfully:') + disp(' First-order optimality less than OPTIONS.TolFun, and no negative/zero curvature detected'); + end + end + + % Step computation + if ~ex + + % Determine trust region correction + dd = abs(v); D = sparse(1:n,1:n,full(sqrt(dd))); + sx = zeros(n,1); theta = max(.95,1-gnrm); + oposdef = posdef; + [sx,snod,qp,posdef,pcgit,Z] = trdog(xcurr, g(:,1),H,D,delta,dv,... + mtxmpy,pcmtx,pcflags,pcgtol,kmax,theta,l,u,Z,dnewt,'hessprecon',varargin{:}); + if isempty(posdef), posdef = oposdef; end + nrmsx=norm(snod); npcg=npcg + pcgit; + newx=xcurr + sx; vpcg(it+1,1)=pcgit; + + % Perturb? + [pert,newx] = perturb(newx,l,u); + vpos(it+1,1) = posdef; + + % Make newx conform to user's input x + x(:) = newx; + % Evaluate f, g, and H + if ~isempty(Hstr) % use sparse finite differencing + %[newval,newgrad] = feval(fname,x); + switch funfcn{1} + case 'fun' + error('should not reach this') + case 'fungrad' + [newval,newgrad(:)] = feval(funfcn{3},x,varargin{:}); + case 'fun_then_grad' + newval = feval(funfcn{3},x,varargin{:}); + newgrad(:) = feval(funfcn{4},x,varargin{:}); + otherwise + error('Undefined calltype in FMINUNC'); + end + newH = sfd(x,newgrad,Hstr,group,[],funfcn,varargin{:}); + + else % user-supplied computation of H or dnewt + %[newval,newgrad,newH] = feval(fname,x); + switch funfcn{1} + case 'fungradhess' + [newval,newgrad(:),newH] = feval(funfcn{3},x,varargin{:}); + case 'fun_then_grad_then_hess' + newval = feval(funfcn{3},x,varargin{:}); + newgrad(:) = feval(funfcn{4},x,varargin{:}); + newH = feval(funfcn{5},x,varargin{:}); + otherwise + error('Undefined calltype in FMINUNC'); + end + + end + [nn,pp] = size(newgrad); + aug = .5*snod'*((dv.*abs(newgrad(:,1))).*snod); + ratio = (newval + aug -val)/qp; vratio(it,1) = ratio; + + if (ratio >= .75) & (nrmsx >= .9*delta) + delta = min(delbnd,2*delta); + elseif ratio <= .25 + delta = min(nrmsx/4,delta/4); + end + if newval == inf + delta = min(nrmsx/20,delta/20); + end + + % Update + if newval < val + xold = xcurr; xcurr=newx; val = newval; g= newgrad; H = newH; + Z = []; + + % Extract the Newton direction? + if pp == 2, dnewt = newgrad(1:n,2); end + end + it = it+1; vval(it,1) = val; + end + if it > maxcount, + ex=4; + it = it-1; + if verb > 0 + if it > maxiter + disp('Maximum number of iterations exceeded;') + disp(' increase options.MaxIter') + elseif it > maxfunevals + disp('Maximum number of function evaluations exceeded;') + disp(' increase options.MaxFunEvals') + end + end + end +end % while + +HESSIAN = H; +GRAD = g; +FVAL = val; +LAMBDA = []; +if ex==4 + EXITFLAG = 0; +elseif ex==10 + EXITFLAG = -1; +else + EXITFLAG = 1; +end +OUTPUT.iterations = it; +OUTPUT.funcCount = it; +OUTPUT.cgiterations = npcg; +OUTPUT.firstorderopt = gnrm; +OUTPUT.algorithm = 'large-scale: trust-region reflective Newton'; +x(:) = xcurr; +if computeLambda + g = full(g); + + LAMBDA.lower = zeros(length(l),1); + LAMBDA.upper = zeros(length(u),1); + argl = logical(abs(xcurr-l) < active_tol); + argu = logical(abs(xcurr-u) < active_tol); + + LAMBDA.lower(argl) = (g(argl)); + LAMBDA.upper(argu) = -(g(argu)); + LAMBDA.ineqlin = []; LAMBDA.eqlin = []; LAMBDA.ineqnonlin=[]; LAMBDA.eqnonlin=[]; +else + LAMBDA = []; +end + + +%===== definev.m ================================================= + + +function [v,dv]= definev(g,x,l,u); +%DEFINEV Scaling vector and derivative +% +% [v,dv]= DEFINEV(g,x,l,u) returns v, distances to the +% bounds corresponding to the sign of the gradient g, where +% l is the vector of lower bounds, u is the vector of upper +% bounds. Vector dv is 0-1 sign vector (See ?? for more detail.) +% + +n = length(x); +v = zeros(n,1); +dv=zeros(n,1); +arg1 = (g < 0) & (u < inf ); +arg2 = (g >= 0) & (l > -inf); +arg3 = (g < 0) & (u == inf); +arg4 = (g >= 0) & (l == -inf); +v(arg1) = (x(arg1) - u(arg1)); +dv(arg1) = 1; +v(arg2) = (x(arg2) - l(arg2)); +dv(arg2) = 1; +v(arg3) = -1; +dv(arg3) = 0; +v(arg4) = 1; +dv(arg4) = 0; + + +%===== trdog.m =================================================== + + +function[s,snod,qpval,posdef,pcgit,Z] = trdog(x,g,H,D,delta,dv,... + mtxmpy,pcmtx,pcoptions,tol,kmax,theta,l,u,Z,dnewt,preconflag,varargin); +%TRDOG Reflected (2-D) trust region trial step (box constraints) +% +% [s,snod,qpval,posdef,pcgit,Z] = TRDOG(x,g,H,D,delta,dv,... +% mtxmpy,pcmtx,pcoptions,tol,theta,l,u,Z,dnewt,preconflag); +% +% Determine the trial step `s', an approx. trust region solution. +% `s' is chosen as the best of 3 steps: the scaled gradient +% (truncated to maintain strict feasibility), +% a 2-D trust region solution (truncated to remain strictly feas.), +% and the reflection of the 2-D trust region solution, +% (truncated to remain strictly feasible). +% +% The 2-D subspace (defining the trust region problem) is defined +% by the scaled gradient direction and a CG process (returning +% either an approximate Newton step of a direction of negative curvature. +% Driver functions are: SNLS, SFMINBX +% SNLS actually calls TRDOG with the Jacobian matrix (and a special +% Jacobian-matrix multiply function in MTXMPY). + +% Initialization +n = length(g); +pcgit = 0; +grad = D*g; +DM = D; +DG = sparse(1:n,1:n,full(abs(g).*dv)); +posdef = 1; +pcgit = 0; +tol2 = sqrt(eps); +v1 = dnewt; +qpval1 = inf; +qpval2 = inf; +qpval3 = inf; + +% DETERMINE A 2-DIMENSIONAL SUBSPACE +if isempty(Z) + if isempty(v1) + switch preconflag + case 'hessprecon' + % preconditioner based on H, no matter what it is + [R,permR] = feval(pcmtx,H,pcoptions,DM,DG,varargin{:}); + case 'jacobprecon' + [R,permR] = feval(pcmtx,H,pcoptions,DM,DG,varargin{:}); + otherwise + error('Invalid string used for PRECONFLAG argument to TRDOG'); + end + % We now pass kmax in from calling function + %kmax = max(1,floor(n/2)); + if tol <= 0, + tol = .1; + end + + [v1,posdef,pcgit] = pcgr(DM,DG,grad,kmax,tol,... + mtxmpy,H,R,permR,preconflag,varargin{:}); + end + if norm(v1) > 0 + v1 = v1/norm(v1); + end + Z(:,1) = v1; + if n > 1 + if (posdef < 1) + v2 = D*sign(grad); + if norm(v2) > 0 + v2 = v2/norm(v2); + end + v2 = v2 - v1*(v1'*v2); + nrmv2 = norm(v2); + if nrmv2 > tol2 + v2 = v2/nrmv2; + Z(:,2) = v2; + end + else + if norm(grad) > 0 + v2 = grad/norm(grad); + else + v2 = grad; + end + v2 = v2 - v1*(v1'*v2); + nrmv2 = norm(v2); + if nrmv2 > tol2 + v2 = v2/nrmv2; + Z(:,2) = v2; + end + end + end +end + +% REDUCE TO THE CHOSEN SUBSPACE +W = DM*Z; +switch preconflag +case 'hessprecon' + WW = feval(mtxmpy,H,W,varargin{:}); +case 'jacobprecon' + WW = feval(mtxmpy,H,W,0,varargin{:}); +otherwise + error('Invalid string used for PRECONFLAG argument to TRDOG'); +end + +W = DM*WW; +MM = full(Z'*W + Z'*DG*Z); +rhs=full(Z'*grad); + +% Determine 2-D TR soln +[st,qpval,po,fcnt,lambda] = trust(rhs,MM,delta); +ss = Z*st; +s = abs(diag(D)).*ss; +s = full(s); +ssave = s; +sssave = ss; +stsave = st; + +% Truncate the TR solution? +arg = (abs(s) > 0); +if isnan(s) + error('Trust region step contains NaN''s.') +end +% No truncation if s is zero length +if isempty(find(arg)) + alpha = 1; + mmdis = 1; +else + mdis = inf; + dis = max((u(arg)-x(arg))./s(arg), (l(arg)-x(arg))./s(arg)); + [mmdis,ipt] = min(dis); + mdis = theta*mmdis; + alpha = min(1,mdis); +end +s = alpha*s; +st = alpha*st; +ss = full(alpha*ss); +qpval1 = rhs'*st + (.5*st)'*MM*st; +if n > 1 + % Evaluate along the reflected direction? + qpval3 = inf; + ssssave = mmdis*sssave; + if norm(ssssave) < .9*delta + r = mmdis*ssave; + ns = ssave; + ns(ipt) = -ns(ipt); + nx = x+r; + stsave = mmdis*stsave; + qpval0 = rhs'*stsave + (.5*stsave)'*MM*stsave; + switch preconflag + case 'hessprecon' + ng = feval(mtxmpy,H,r,varargin{:}); + case 'jacobprecon' + ng = feval(mtxmpy,H,r,0,varargin{:}); + otherwise + error('Invalid string used for PRECONFLAG argument to TRDOG'); + end + + ng = ng + g; + ngrad = D*ng; + ngrad = ngrad + DG*ssssave; + + % nss is the reflected direction + nss = sssave; + nss(ipt) = -nss(ipt); + ZZ(:,1) = nss/norm(nss); + W = DM*ZZ; + + switch preconflag + case 'hessprecon' + WW = feval(mtxmpy,H,W,varargin{:}); + case 'jacobprecon' + WW = feval(mtxmpy,H,W,0,varargin{:}); + otherwise + error('Invalid string used for PRECONFLAG argument to TRDOG'); + end + + + W = DM*WW; + MM = full(ZZ'*W + ZZ'*DG*ZZ); + nrhs=full(ZZ'*ngrad); + [nss,tau] = quad1d(nss,ssssave,delta); + nst = tau/norm(nss); + ns = abs(diag(D)).*nss; + ns = full(ns); + + % Truncate the reflected direction? + arg = (abs(ns) > 0); + if isnan(ns) + error('Reflected trust region step contains NaN''s.') + end + % No truncation if s is zero length + if isempty(find(arg)) + alpha = 1; + else + mdis = inf; + dis = max((u(arg)-nx(arg))./ns(arg), (l(arg)-nx(arg))./ns(arg)); + mdis = min(dis); + mdis = theta*mdis; + alpha = min(1,mdis); + end + ns = alpha*ns; + nst = alpha*nst; + nss = full(alpha*nss); + qpval3 = qpval0 + nrhs'*nst + (.5*nst)'*MM*nst; + end + + % Evaluate along gradient direction + ZZ(:,1) = grad/norm(grad); + W = DM*ZZ; + + switch preconflag + case 'hessprecon' + WW = feval(mtxmpy,H,W,varargin{:}); + case 'jacobprecon' + WW = feval(mtxmpy,H,W,0,varargin{:}); + otherwise + error('Invalid string used for PRECONFLAG argument to TRDOG'); + end + + + W = DM*WW; + MM = full(ZZ'*W + ZZ'*DG*ZZ); + rhs=full(ZZ'*grad); + [st,qpval,po,fcnt,lambda] = trust(rhs,MM,delta); + ssg = ZZ*st; + sg = abs(diag(D)).*ssg; + sg = full(sg); + + % Truncate the gradient direction? + arg = (abs(sg) > 0); + if isnan(sg) + % No truncation if s is zero length + error('Gradient step contains NaN''s.') + end + if isempty(find(arg)) + alpha = 1; + else + mdis = inf; + dis = max((u(arg)-x(arg))./sg(arg), (l(arg)-x(arg))./sg(arg)); + mdis = min(dis); + mdis = theta*mdis; + alpha = min(1,mdis); + end + sg = alpha*sg; + st = alpha*st; + ssg = full(alpha*ssg); + qpval2 = rhs'*st + (.5*st)'*MM*st; +end + +% Choose the best of s, sg, ns. +if qpval2 <= min(qpval1,qpval3) + qpval = qpval2; + s = sg; + snod = ssg; +elseif qpval1 <= min(qpval2,qpval3) + qpval = qpval1; + snod = ss; +else + qpval = qpval3; + s = ns + r; + snod = nss + ssssave; +end + +%----------------------------------------------------------- +function[nx,tau] = quad1d(x,ss,delta) +%QUAD1D 1D quadratic zero finder for trust region step +% +% [nx,tau] = quad1d(x,ss,delta) tau is min(1,step-to-zero) +% of a 1-D quadratic ay^2 + b*y + c. +% a = x'*x; b = 2*(ss'*x); c = ss'*ss-delta^2). nx is the +% new x value, nx = tau*x; + +% Algorithm: +% numer = -(b + sign(b)*sqrt(b^2-4*a*c)); +% root1 = numer/(2*a); +% root2 = c/(a*root1); % because root2*root1 = (c/a); + +a = x'*x; +b = 2*(ss'*x); +c = ss'*ss-delta^2; + +numer = -(b + sign(b)*sqrt(b^2-4*a*c)); +warnstate = warning('off'); % Avoid divide by zero warnings +r1 = numer/(2*a); +r2 = c/(a*r1); +warning(warnstate); + +tau = max(r1,r2); +tau = min(1,tau); +if tau <= 0, + error('square root error in trdog/quad1d'); +end +nx = tau*x; + + +%===== pcgr.m ==================================================== + + +function[p,posdef,k] = pcgr(DM,DG,g,kmax,tol,mtxmpy,H,R,pR,callerflag,varargin); +%PCGR Preconditioned conjugate gradients +% +% [p,posdef,k] = PCGR(DM,DG,g,kmax,tol,mtxmpy,H,R,pR) apply +% a preconditioned conjugate gradient procedure to the quadratic +% +% q(p) = .5p'Mp + g'p, where +% +% M = DM*H*DM + DG. kmax is a bound on the number of permitted +% CG-iterations, tol is a stopping tolerance on the residual (default +% is tol = .1), mtxmpy is the function that computes products +% with the Hessian matrix H, +% and R is the cholesky factor of the preconditioner (transpose) of +% M. So, R'R approximates M(pR,pR), where pR is a permutation vector. +% On ouput p is the computed direction, posdef = 1 implies +% only positive curvature (in M) has been detected; posdef = 0 +% implies p is a direction of negative curvature (for M). +% Output parameter k is the number of CG-iterations used (which +% corresponds to the number of multiplications with H). +% + +% Initializations. +n = length(DG); +r = -g; +p = zeros(n,1); +val = 0; +m = 0; + +% Precondition . +z = preproj(r,R,pR); +znrm = norm(z); +stoptol = tol*znrm; +inner2 = 0; +inner1 = r'*z; +posdef = 1; + +kmax = max(kmax,1); % kmax must be at least 1 +% PRIMARY LOOP. +for k = 1:kmax + if k==1 + d = z; + else + beta = inner1/inner2; + d = z + beta*d; + end + ww = DM*d; + switch callerflag + case 'hessprecon' + w = feval(mtxmpy,H,ww,varargin{:}); + case 'jacobprecon' + w = feval(mtxmpy,H,ww,0,varargin{:}); + otherwise + error('PCGR does not recognize this calling function.') + end + ww = DM*w +DG*d; + denom = d'*ww; + if denom <= 0 + if norm(d) == 0 + p = d; + else + p = d/norm(d); + end + posdef = 0; + break + else + alpha = inner1/denom; + p = p + alpha*d; + r = r - alpha*ww; + end + z = preproj(r,R,pR); + + % Exit? + if norm(z) <= stoptol + break; + end + inner2 = inner1; + inner1 = r'*z; +end + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +function w = preproj(r,RPCMTX,ppvec); +%PREPROJ Apply preconditioner +% +% w = preproj(r,RPCMTX,ppvec) Apply a preconditioner to vector r. +% The conceptual preconditioner is H(ppvec,ppvec) = RPCMTX'*RPCMTX + +% Initialization +n = length(r); +one = ones(n,1); +vtol = 100*eps*one; + +if nargin < 3 | isempty(ppvec) + ppvec = (1:n); + if nargin < 2 | isempty(RPCMTX) + RPCMTX = speye(n); + end +end + +% Precondition +wbar = RPCMTX'\r(ppvec); +w(ppvec,1) = RPCMTX\wbar; + + +%===== hmult.m =================================================== + +function W = hmult(Hinfo,Y,varargin); +%HMULT Hessian-matrix product +% +% W = HMULT(Y,Hinfo) An example of a Hessian-matrix product function +% file, e.g. Hinfo is the actual Hessian and so W = Hinfo*Y. +% +% Note: varargin is not used but must be provided in case +% the objective function has additional problem dependent +% parameters (which will be passed to this routine as well). + +W = Hinfo*Y; + + +%===== trust.m =================================================== + +function [s,val,posdef,count,lambda] = trust(g,H,delta) +%TRUST Exact soln of trust region problem +% +% [s,val,posdef,count,lambda] = TRUST(g,H,delta) Solves the trust region +% problem: min{g^Ts + 1/2 s^THs: ||s|| <= delta}. The full +% eigen-decomposition is used; based on the secular equation, +% 1/delta - 1/(||s||) = 0. The solution is s, the value +% of the quadratic at the solution is val; posdef = 1 if H +% is pos. definite; otherwise posdef = 0. The number of +% evaluations of the secular equation is count, lambda +% is the value of the corresponding Lagrange multiplier. +% +% +% TRUST is meant to be applied to very small dimensional problems. + +% INITIALIZATION +tol = 10^(-12); +tol2 = 10^(-8); +key = 0; +itbnd = 50; +lambda = 0; +n = length(g); +coeff(1:n,1) = zeros(n,1); +H = full(H); +[V,D] = eig(H); +count = 0; +eigval = diag(D); +[mineig,jmin] = min(eigval); +alpha = -V'*g; +sig = sign(alpha(jmin)) + (alpha(jmin)==0); + +% POSITIVE DEFINITE CASE +if mineig > 0 + coeff = alpha ./ eigval; + lambda = 0; + s = V*coeff; + posdef = 1; + nrms = norm(s); + if nrms <= 1.2*delta + key = 1; + else + laminit = 0; + end +else + laminit = -mineig; + posdef = 0; +end + +% INDEFINITE CASE +if key == 0 + if seceqn(laminit,eigval,alpha,delta) > 0 + [b,c,count] = rfzero('seceqn',laminit,itbnd,eigval,alpha,delta,tol); + vval = abs(seceqn(b,eigval,alpha,delta)); + if abs(seceqn(b,eigval,alpha,delta)) <= tol2 + lambda = b; + key = 2; + lam = lambda*ones(n,1); + w = eigval + lam; + arg1 = (w==0) & (alpha == 0); + arg2 = (w==0) & (alpha ~= 0); + coeff(w ~=0) = alpha(w ~=0) ./ w(w~=0); + coeff(arg1) = 0; + coeff(arg2) = Inf; + coeff(isnan(coeff))=0; + s = V*coeff; + nrms = norm(s); + if (nrms > 1.2*delta) | (nrms < .8*delta) + key = 5; + lambda = -mineig; + end + else + lambda = -mineig; + key = 3; + end + else + lambda = -mineig; + key = 4; + end + lam = lambda*ones(n,1); + if (key > 2) + arg = abs(eigval + lam) < 10 * eps * max(abs(eigval),ones(n,1)); + alpha(arg) = 0; + end + w = eigval + lam; + arg1 = (w==0) & (alpha == 0); arg2 = (w==0) & (alpha ~= 0); + coeff(w~=0) = alpha(w~=0) ./ w(w~=0); + coeff(arg1) = zeros(length(arg1(arg1>0)),1); + coeff(arg2) = Inf *ones(length(arg2(arg2>0)),1); + coeff(coeff==NaN)=zeros(length(coeff(coeff==NaN)),1); + coeff(coeff==NaN)=zeros(length(coeff(coeff==NaN)),1); + s = V*coeff; nrms = norm(s); + if (key > 2) & (nrms < .8*delta) + beta = sqrt(delta^2 - nrms^2); + s = s + beta*sig*V(:,jmin); + end + if (key > 2) & (nrms > 1.2*delta) + [b,c,count] = rfzero('seceqn',laminit,itbnd,eigval,alpha,delta,tol); + lambda = b; lam = lambda*(ones(n,1)); + w = eigval + lam; + arg1 = (w==0) & (alpha == 0); arg2 = (w==0) & (alpha ~= 0); + coeff(w~=0) = alpha(w~=0) ./ w(w~=0); + coeff(arg1) = zeros(length(arg1(arg1>0)),1); + coeff(arg2) = Inf *ones(length(arg2(arg2>0)),1); + coeff(coeff==NaN)=zeros(length(coeff(coeff==NaN)),1); + s = V*coeff; nrms = norm(s); + end +end +val = g'*s + (.5*s)'*(H*s); + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +function[value] = seceqn(lambda,eigval,alpha,delta); +%SEC Secular equation +% +% value = SEC(lambda,eigval,alpha,delta) returns the value +% of the secular equation at a set of m points lambda +% +% + + +% +m = length(lambda); n = length(eigval); +unn = ones(n,1); unm = ones(m,1); +M = eigval*unm' + unn*lambda'; MC = M; +MM = alpha*unm'; +M(M~=0) = MM(M~=0) ./ M(M~=0); +M(MC==0) = Inf*ones(size(MC(MC==0))); +M = M.*M; +value = sqrt(unm ./ (M'*unn)); +value(value==NaN) = zeros(length(value(value==NaN)),1); +value = (1/delta)*unm - value; + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +function [b,c,itfun] = rfzero(FunFcn,x,itbnd,eigval,alpha,delta,tol,trace) +%RFZERO Find zero to the right +% +% [b,c,itfun] = rfzero(FunFcn,x,itbnd,eigval,alpha,delta,tol,trace) +% Zero of a function of one variable to the RIGHT of the +% starting point x. A small modification of the M-file fzero, +% described below, to ensure a zero to the Right of x is +% searched for. +% +% RFZERO is a slightly modified version of function FZERO + + + + +% FZERO(F,X) finds a zero of f(x). F is a string containing the +% name of a real-valued function of a single real variable. X is +% a starting guess. The value returned is near a point where F +% changes sign. For example, FZERO('sin',3) is pi. Note the +% quotes around sin. Ordinarily, functions are defined in M-files. +% +% An optional third argument sets the relative tolerance for the +% convergence test. The presence of an nonzero optional fourth +% argument triggers a printing trace of the steps. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% C.B. Moler 1-19-86 +% Revised CBM 3-25-87, LS 12-01-88. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% This algorithm was originated by T. Dekker. An Algol 60 version, +% with some improvements, is given by Richard Brent in "Algorithms for +% Minimization Without Derivatives", Prentice-Hall, 1973. A Fortran +% version is in Forsythe, Malcolm and Moler, "Computer Methods +% for Mathematical Computations", Prentice-Hall, 1976. +% +% Initialization +if nargin < 7, trace = 0; tol = eps; end +if nargin == 7, trace = 0; end +if trace, clc, end +itfun = 0; +% +%if x ~= 0, dx = x/20; +%if x ~= 0, dx = abs(x)/20; +if x~= 0, dx = abs(x)/2; + % + %else, dx = 1/20; +else, dx = 1/2; +end +% +%a = x - dx; fa = feval(FunFcn,a,eigval,alpha,delta); +a = x; c = a; fa = feval(FunFcn,a,eigval,alpha,delta); +itfun = itfun+1; +% +if trace, home, init = [a fa], end +b = x + dx; +b = x + 1; +fb = feval(FunFcn,b,eigval,alpha,delta); +itfun = itfun+1; +if trace, home, init = [b fb], end + +% Find change of sign. + +while (fa > 0) == (fb > 0) + dx = 2*dx; + % + % a = x - dx; fa = feval(FunFcn,a); + % if trace, home, sign = [a fa], end + % + if (fa > 0) ~= (fb > 0), break, end + b = x + dx; fb = feval(FunFcn,b,eigval,alpha,delta); + itfun = itfun+1; + if trace, home, sign = [b fb], end + if itfun > itbnd, break; end +end + +fc = fb; +% Main loop, exit from middle of the loop +while fb ~= 0 + % Insure that b is the best result so far, a is the previous + % value of b, and c is on the opposite of the zero from b. + if (fb > 0) == (fc > 0) + c = a; fc = fa; + d = b - a; e = d; + end + if abs(fc) < abs(fb) + a = b; b = c; c = a; + fa = fb; fb = fc; fc = fa; + end + + % Convergence test and possible exit + % + if itfun > itbnd, break; end + m = 0.5*(c - b); + toler = 2.0*tol*max(abs(b),1.0); + if (abs(m) <= toler) + (fb == 0.0), break, end + + % Choose bisection or interpolation + if (abs(e) < toler) + (abs(fa) <= abs(fb)) + % Bisection + d = m; e = m; + else + % Interpolation + s = fb/fa; + if (a == c) + % Linear interpolation + p = 2.0*m*s; + q = 1.0 - s; + else + % Inverse quadratic interpolation + q = fa/fc; + r = fb/fc; + p = s*(2.0*m*q*(q - r) - (b - a)*(r - 1.0)); + q = (q - 1.0)*(r - 1.0)*(s - 1.0); + end; + if p > 0, q = -q; else p = -p; end; + % Is interpolated point acceptable + if (2.0*p < 3.0*m*q - abs(toler*q)) & (p < abs(0.5*e*q)) + e = d; d = p/q; + else + d = m; e = m; + end; + end % Interpolation + + % Next point + a = b; + fa = fb; + if abs(d) > toler, b = b + d; + else if b > c, b = b - toler; + else b = b + toler; + end + end + fb = feval(FunFcn,b,eigval,alpha,delta); + itfun = itfun + 1; + if trace, home, step = [b fb], end +end % Main loop + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + +%===== perturb.m ================================================= + +function[pert,x,y] = perturb(x,l,u,del,y,sigma) +%PERTURB Perturb point from bounds +% +% [PERT,X] = PERTURB(X,L,U,DEL) perturbs the current point X +% slightly to shake it loose from tight (less than DEL away) +% bounds U and L to be strictly feasible. +% Called by SNLS and SFMINBX. +% +% [PERT,X,Y] = PERTURB(X,L,U,DEL,Y,SIGMA) also perturbs the +% reflected point Y with respect to SIGMA, + +if nargin < 4 + del = 100*eps; +end + +if (min(abs(u-x)) < del) | (min(abs(x-l)) < del) + upperi = (u-x) < del; + loweri = (x-l) < del; + x(upperi) = x(upperi) - del; + x(loweri) = x(loweri) + del; + if nargin > 4 + y(upperi) = y(upperi) - del*sigma(upperi); + y(loweri) = y(loweri) + del*sigma(loweri); + end + pert = 1; +else + pert = 0; +end + + +%===== startx.m ================================================= + +function xstart = startx(u,l); +%STARTX Box-centered point +% +% xstart = STARTX(u,l) returns centered point. + +n = length(u); +onen = ones(n,1); +arg = (u > 1e12); +u(arg) = inf*onen(arg); +xstart = zeros(n,1); +arg1 = (u -inf); +arg3 = (u-inf); arg4 = (u==inf)&(l==-inf); +% +w = max(abs(u),ones(n,1)); +xstart(arg1) = u(arg1) - .5*w(arg1); +% +ww = max(abs(l),ones(n,1)); +xstart(arg2) = l(arg2) + .5*ww(arg2); +% +xstart(arg3)=(u(arg3)+l(arg3))/2; +xstart(arg4)=ones(length(arg4(arg4>0)),1); + + +%===== color.m ================================================== + +function [group] = color(J,p); +%COLOR Column partition for sparse finite differences. +% +% GROUP = COLOR(J,P) returns a partition of the +% column corresponding to a coloring of the column- +% intersection graph. GROUP(I) = J means column I is +% colored J. +% +% All columns belonging to a color can be estimated +% in a single finite difference. +% + +% +[m,n] = size(J); +if nargin < 2, + p = 1:n; +end +J = J(:,p); +group = zeros(n,1); +ncol = 0; +J = spones(J); +while any(group==0) + % Build group for ncol + ncol = ncol + 1; + rows = zeros(m,1); + index = find(group == 0); + lenindex = length(index); + for i = 1:lenindex + k = index(i); + inner = J(:,k)'*rows; + if inner == 0 + group(k) = ncol; + rows = rows + J(:,k); + end + end +end +group(p)= group; + + +%===== sfd.m ==================================================== + +function[H] = sfd(x,grad,H,group,alpha,funfcn,varargin) +%SFD Sparse Hessian via finite gradient differences +% +% H = sfd(x,grad,H,group,fdata,fun) returns the +% sparse finite difference approximation H of a Hessian matrix +% of function 'fun' at current point x. +% Vector group indicates how to use sparse finite differencing: +% group(i) = j means that column i belongs to group (or color) j. +% Each group (or color) corresponds to a finite gradient difference. +% fdata is a data array (possibly) needed by function 'fun'. +% +% H = sfd(x,grad,H,group,fdata,fun,alpha) overrides the default +% finite differencing stepsize. +% + +xcurr = x(:); % Preserve x so we know what funfcn expects +scalealpha = 0; +[m,n] = size(H); +v = zeros(n,1); +ncol = max(group); epsi = sqrt(eps); +if isempty(alpha) + scalealpha = 1; + alpha = ones(ncol,1)*sqrt(eps); +end +H = spones(H); d = zeros(n,1); +for k = 1:ncol + d = (group == k); + if scalealpha + xnrm = norm(xcurr(d)); + xnrm = max(xnrm,1); + alpha(k) = alpha(k)*xnrm; + end + y = xcurr + alpha(k)*d; + + % Make x conform to user-x + x(:) = y; + %[dummy,v] = feval(fun,y); + switch funfcn{1} + case 'fun' + error('should not reach this') + case 'fungrad' + [dummy,v(:)] = feval(funfcn{3},x,varargin{:}); + %OPTIONS(11)=OPTIONS(11)+1; + case 'fun_then_grad' + %newval = feval(funfcn{3},x,varargin{:}); + v(:) = feval(funfcn{4},x,varargin{:}); + % OPTIONS(11)=OPTIONS(11)+1; + otherwise + error('Undefined calltype in FMINUNC'); + end + + + w = (v-grad)/alpha(k); + cols = find(d); + lpoint = length(cols); + A = sparse(m,n); + A(:,cols) = H(:,cols); + H(:,cols) = H(:,cols) - A(:,cols); + [i,j,val] = find(A); + [p,ind] = sort(i); + val(ind) = w(p); + A = sparse(i,j,full(val),m,n); + H = H + A; +end +H = (H+H')/2; % symmetricize + + +%===== hprecon.m ================================================ + +function[R,pvec] = hprecon(H,upperbandw,DM,DG,varargin); +%HPRECON Sparse Cholesky factor of H-preconditioner +% +% [R,PVEC] = HPRECON(H,UPPERBANDW,DM,DG) computes the +% sparse Cholesky factor (transpose of a (usually) banded +% preconditioner of square matrix M +% M = DM*H*DM + DG +% wehre DM and DG are non-negative sparse diagonal matrices. +% R'*R approximates M(pvec,pvec), i.e. +% R'*R = M(pvec,pvec) +% +% H may not be the true Hessian. If H is the same size as the +% true Hessian, H will be used in computing the preconditioner R. +% Otherwise, compute a diagonal preconditioner for +% M = DM*DM + DG +% +% If 0 < UPPERBANDW < n then the upper bandwidth of +% R is UPPERBANDW. If UPPERBANDW >= n then the structure of R +% corresponds to a sparse Cholesky factorization of H +% using the symmmd ordering (the ordering is returned in PVEC). +% + +% Default preconditioner for SFMINBX and SQPMIN. + +if nargin < 1, + error('hprecon requires at least 1 input parameter.'); +end +if nargin <2, + upperbandw = 0; + if nargin < 3 + DM = []; + if nargin < 4 + DG = []; + end, end, end + +[rows,cols] = size(H); +n = length(DM); +% In case "H" isn't really H, but something else to use with HessMult function. +if ~isnumeric(H) | ~isequal(n,rows) | ~isequal(n,cols) + % H is not the right size; ignore requested bandwidth and compute + % diagonal preconditioner based only on DM and DG. + pvec = (1:n); + d1 = full(diag(DM)); % full vector + d2 = full(diag(DG)); + dd = sqrt(d1.*d1 + abs(d2)); + R = sparse(1:n,1:n,dd); + return +end + +H = DM*H*DM + DG; +pvec = (1:n); +epsi = .0001*ones(n,1); +info = 1; + +if upperbandw >= n-1 % Try complete approximation to H + pvec = symmmd(H); + ddiag = diag(H); + mind = min(ddiag); + lambda = 0; + if mind < 0, + lambda = -mind + .001; + end + while info > 0 + H = H + lambda*speye(n); + [R,info] = chol(H(pvec,pvec)); + lambda = lambda + 10; + end +elseif (upperbandw > 0) & ( upperbandw < n-1) % Banded approximation to H + % Banded approximation + lambda = 0; + ddiag = diag(H); + mind = min(ddiag); + if mind < 0, + lambda = -mind + .001; + end + H = tril(triu(H,-upperbandw),upperbandw); + while info > 0 + H = H + lambda*speye(n); + [R,info] = chol(H); + lambda = 4*lambda; + if lambda <= .001, + lambda = 1; + end + end +elseif upperbandw == 0 % diagonal approximation for H + dnrms = sqrt(sum(H.*H))'; + d = max(sqrt(dnrms),epsi); + R = sparse(1:n,1:n,full(d)); + pvec = (1:n); +else + error('upperbandw must be >= 0.') +end diff --git a/boosting/weightedstats/private/statsizechk.m b/boosting/weightedstats/private/statsizechk.m new file mode 100644 index 0000000..9fb92df --- /dev/null +++ b/boosting/weightedstats/private/statsizechk.m @@ -0,0 +1,29 @@ +function [err, commonSize, numElements] = statsizechk(nparams,varargin) +%STATSIZECHK Check for compatible array sizes. +% [ERR,COMMONSIZE,NUMELEMENTS] = STATSIZECHK(NPARAMS,A,B,...,M,N,...) or +% [ERR,COMMONSIZE,NUMELEMENTS] = STATSIZECHK(NPARAMS,A,B,...,[M,N,...]) +% in effect computes size( A + B + ... + zeros(M,N,...) ), and catches +% any size mismatches. NPARAMS is the number of array input arguments. + +% Copyright 1993-2004 The MathWorks, Inc. +% $Revision: 1.1.6.2 $ $Date: 2004/01/24 09:36:35 $ +% +% Mex file. + +% try +% tmp = 0; +% for argnum = 1:nparams +% tmp = tmp + varargin{argnum}; +% end +% if nargin > nparams+1 +% tmp = tmp + zeros(varargin{nparams+1:end}); +% end +% err = 0; +% commonSize = size(tmp); +% numElements = numel(tmp); +% +% catch +% err = 1; +% commonSize = []; +% numElements = 0; +% end diff --git a/boosting/weightedstats/private/statsizechk.mexglx b/boosting/weightedstats/private/statsizechk.mexglx new file mode 100644 index 0000000..5df265d Binary files /dev/null and b/boosting/weightedstats/private/statsizechk.mexglx differ diff --git a/boosting/weightedstats/private/statsizechk.mexglx.csf b/boosting/weightedstats/private/statsizechk.mexglx.csf new file mode 100644 index 0000000..80a545a Binary files /dev/null and b/boosting/weightedstats/private/statsizechk.mexglx.csf differ diff --git a/boosting/weightedstats/private/statsrexact.m b/boosting/weightedstats/private/statsrexact.m new file mode 100644 index 0000000..87f2f40 --- /dev/null +++ b/boosting/weightedstats/private/statsrexact.m @@ -0,0 +1,68 @@ +function [pval,P] = statsrexact(v,w) +%STATSREXACT Compute exacttail probability for signed rank statistic +% [PVAL,ALLP]=STATSREXACT(V,W) computes the tail probability PVAL +% for the statistic W with the vector V of ranks. ALLP is a matrix +% containing the probabilities (col. 2) for each W value (col. 1). +% +% Private function used by the SIGNRANK function. + +% Copyright 2003-2004 The MathWorks, Inc. +% $Revision: 1.1.6.3 $ $Date: 2004/01/24 09:36:36 $ + +n = length(v); +v = sort(v); + +% For convenience we can just compute the lower tail. If w is +% in the upper tail, compute its equivalent lower tail value. +maxw = n*(n+1)/2; +folded = (w>maxw/2); +if folded + w = maxw-w; +end + +% We would like to use the elements of w and v as indexes into +% arrays that enumerate possible values. If there are ties causing +% non-integer ranks, multiply by 2 to force everything to integer. +doubled = any(v~=floor(v)); +if doubled + v = round(2*v); + w = round(2*w); +end + +C = zeros(w+1,1); % C(w+1) will be the number of combinations adding + % to w at each step +C(1) = 1; % just one combination includes nothing +top = 1; % top entry currently in use in C vector + +% Look at all combinations of ranks that could contribute +% to the observed value of W +for vj=v(v<=w) + + % C now enumerates combinations not including v(j). Now update the + % elements that could include v(j). + newtop = min(top+vj,w+1); + hi = min(vj,w+1)+1:newtop; + lo = 1:length(hi); + + C(hi) = C(hi) + C(lo); + + top = newtop; +end + +% Convert to probabilities +C = C / (2^n); + +% Get tail probability +pval = sum(C); + +if nargout>1 + allw = 0:w; + if doubled + allw = allw/2; + end + if folded + allw = n*(n+1)/2 - allw; + end + + P = [allw(:), C(:)]; +end diff --git a/boosting/weightedstats/private/stattestlink.m b/boosting/weightedstats/private/stattestlink.m new file mode 100644 index 0000000..bec52e3 --- /dev/null +++ b/boosting/weightedstats/private/stattestlink.m @@ -0,0 +1,58 @@ +function [emsg,link,dlink,ilink,exponent]=stattestlink(link) +%STATTESTLINK Test link function for GLMFIT and GLMVAL + +% Author: Tom Lane, 3-7-2000 +% Copyright 1993-2002 The MathWorks, Inc. +% $Revision: 1.3 $ $Date: 2002/02/04 19:25:49 $ + +dlink = ''; +ilink = ''; +exponent = {}; +emsg = ''; + +if (iscell(link)) + % A cell array of three functions is okay + if (length(link)~=3) + emsg = 'LINK cell array must have three components'; + return + end + dlink = link{2}; + ilink = link{3}; + link = link{1}; + if (~statglmeval('testlink',link)) + emsg = 'LINK function is not valid'; + return + end + if (~statglmeval('testlink',dlink)) + emsg = 'LINK function derivative is not valid'; + return + end + if (~statglmeval('testlink',ilink)) + emsg = 'LINK function inverse is not valid'; + return + end +elseif (ischar(link) & size(link,1)==1) + % A function name is okay, but three functions must exist + dlink = ['d_' link]; + ilink = ['i_' link]; + if (~statglmeval('testlink',link)) + emsg = sprintf('Cannot find LINK function %s.',link); + return + end + if (~statglmeval('testlink',dlink)) + emsg = sprintf('Cannot find LINK function derivative %s.',dlink); + return + end + if (~statglmeval('testlink',ilink)) + emsg = sprintf('Cannot find LINK function inverse %s.',ilink); + return + end +elseif (isnumeric(link) & length(link)==1) + exponent = {link}; + link = 'power'; + dlink = ['d_' link]; + ilink = ['i_' link]; +else + emsg = 'LINK function is not valid.'; + return +end diff --git a/boosting/weightedstats/private/stdrcdf.m b/boosting/weightedstats/private/stdrcdf.m new file mode 100644 index 0000000..dbdeda7 --- /dev/null +++ b/boosting/weightedstats/private/stdrcdf.m @@ -0,0 +1,104 @@ +function xout = stdrcdf(q, v, r, upper) +%STDRCDF Compute c.d.f. for Studentized Range statistic +% F = STDRCDF(Q,V,R) is the cumulative distribution function for the +% Studentized range statistic for R samples and V degrees of +% freedom, evaluated at Q. +% +% G = STDRCDF(Q,V,R,'upper') is the upper tail probability, +% G=1-F. This version computes the upper tail probability +% directly (not by subtracting it from 1), and is likely to be +% more accurate if Q is large and therefore F is close to 1. + +% Copyright 1993-2002 The MathWorks, Inc. +% $Revision: 1.3 $ $Date: 2002/02/04 19:25:50 $ + +% Based on Fortran program from statlib, http://lib.stat.cmu.edu +% Algorithm AS 190 Appl. Statist. (1983) Vol.32, No. 2 +% Incorporates corrections from Appl. Statist. (1985) Vol.34 (1) +% Vectorized and simplified for MATLAB. Added 'upper' option. + +if (length(q)>1 | length(v)>1 | length(r)>1), + error('STDRCDF requires scalar arguments.'); % for now +end +[err,q,v,r] = distchck(3,q,v,r); +if (err > 0), error('Non-scalar arguments must match in size.'); end +uppertail = 0; +if (nargin>3) + if ~( isequal(upper,'u') | isequal(upper,'upper') ... + | isequal(upper,'l') | isequal(upper,'lower')) + error('Fourth argument must be ''upper'' or ''lower''.'); + end + uppertail = isequal(upper,'u') | isequal(upper,'upper'); +end + +% Accuracy can be increased by use of a finer grid. Increase +% jmax, kmax and 1/step proportionally. +jmax = 15; % controls maximum number of steps +kmax = 15; % controls maximum number of steps +step = 0.45; % node spacing +vmax = 120; % max d.f. for integration over chi-square + +% Handle illegal or trivial values first. +xout = zeros(size(q)); +if (length(xout) == 0), return; end +ok = (v>0) & (v==round(v)) & (r>1) & (r==round(r)); +xout(~ok) = NaN; +ok = ok & (q > 0); +v = v(ok); +q = q(ok); +r = r(ok); +if (length(v) == 0), return; end +xx = zeros(size(v)); + +% Compute constants, locate midpoint, adjust steps. +g = step ./ (r .^ 0.2); +if (v > vmax) + c = log(r .* g ./ sqrt(2*pi)); +else + h = step ./ sqrt(v); + v2 = v * 0.5; + c = sqrt(2/pi) * exp(-v2) .* (v2.^v2) ./ gamma(v2); + c = log(c .* r .* g .* h); + + j=(-jmax:jmax)'; + hj = h * j; + ehj = exp(hj); + qw = q .* ehj; + vw = v .* (hj + 0.5 * (1 - ehj .^2)); + C = ones(1,2*kmax+1); % index to duplicate columns + R = ones(1,2*jmax+1); % index to duplicate rows +end + +% Compute integral by summing the integrand over a +% two-dimensional grid centered approximately near its maximum. +gk = (0.5 * log(r)) + g * (-kmax:kmax); +w0 = c - 0.5 * gk .^ 2; +pz = normcdf(-gk); +if (~uppertail) + % For regular cdf, use integrand as in AS 190. + if (v > vmax) + % don't integrate over chi-square + x = normcdf(q - gk) - pz; + xx = sum(exp(w0) .* (x .^ (r-1))); + else + % integrate over chi-square + x = normcdf(qw(:,C) - gk(R,:)) - pz(R,:); + xx = sum(sum(exp(w0(R,:) + vw(:,C)) .* (x .^ (r-1)))); + end +else + % To compute the upper tail probability, we need an integrand that + % contains the normal probability of a region consisting of a + % hyper-quadrant minus a rectangular region at the origin of the + % hyperquadrant. + if (v > vmax) % for large d.f., don't integrate over chi-square + xhq = (1 - pz) .^ (r-1); + xrect = (normcdf(q - gk) - pz) .^ (r-1); + xx = sum(exp(w0) .* (xhq - xrect)); + else % for typical cases, integrate over chi-square + xhq = (1 - pz) .^ (r-1); + xrect = (normcdf(qw(:,C) - gk(R,:)) - pz(R,:)) .^ (r-1); + xx = sum(sum(exp(w0(R,:) + vw(:,C)) .* (xhq(R,:) - xrect))); + end +end + +xout(ok) = xx; \ No newline at end of file diff --git a/boosting/weightedstats/private/stdrinv.m b/boosting/weightedstats/private/stdrinv.m new file mode 100644 index 0000000..2615b0e --- /dev/null +++ b/boosting/weightedstats/private/stdrinv.m @@ -0,0 +1,91 @@ +function x = stdrinv(p, v, r) +%STDRINV Compute inverse c.d.f. for Studentized Range statistic +% STDRINV(P,V,R) is the inverse cumulative distribution function for +% the Studentized range statistic for R samples and V degrees of +% freedom, evaluated at P. + +% Copyright 1993-2002 The MathWorks, Inc. +% $Revision: 1.3 $ $Date: 2002/02/04 18:52:50 $ + +% Based on Fortran program from statlib, http://lib.stat.cmu.edu +% Algorithm AS 190 Appl. Statist. (1983) Vol.32, No. 2 +% Incorporates corrections from Appl. Statist. (1985) Vol.34 (1) + +if (length(p)>1 | length(v)>1 | length(r)>1), + error('STDRINV requires scalar arguments.'); % for now +end + +[err,p,v,r] = distchck(3,p,v,r); +if (err > 0), error('Non-scalar arguments must match in size.'); end + +% Handle illegal or trivial values first. +x = zeros(size(p)); +if (length(x) == 0), return; end +ok = (v>0) & (v==round(v)) & (r>1) & (r==round(r) & (p<1)); +x(~ok) = NaN; +ok = ok & (p>0); +v = v(ok); +p = p(ok); +r = r(ok); +if (length(v) == 0), return; end +xx = zeros(size(v)); + +% Define constants +jmax = 20; +pcut = 0.00001; +tiny = 0.000001; +upper = (p > .99); +if (upper) + uppertail = 'u'; + p0 = 1-p; +else + uppertail = 'l'; + p0 = p; +end + +% Obtain initial values +q1 = qtrng0(p, v, r); +p1 = stdrcdf(q1, v, r, uppertail); +xx = q1; +if (abs(p1-p0) >= pcut*p0) + if (p1 > p0), p2 = max(.75*p0, p0-.75*(p1-p0)); end + if (p1 < p0), p2 = p0 + (p0 - p1) .* (1 - p0) ./ (1 - p1) * 0.75; end + if (upper) + q2 = qtrng0(1-p2, v, r); + else + q2 = qtrng0(p2, v, r); + end + + % Refine approximation + for j=2:jmax + p2 = stdrcdf(q2, v, r, uppertail); + e1 = p1 - p0; + e2 = p2 - p0; + d = e2 - e1; + xx = (q1 + q2) / 2; + if (abs(d) > tiny*p0) + xx = (e2 .* q1 - e1 .* q2) ./ d; + end + if (abs(e1) >= abs(e2)) + q1 = q2; + p1 = p2; + end + if (abs(p1 - p0) < pcut*p0), break; end + q2 = xx; + end +end + +x(ok) = xx; + +% --------------------------------- +function x = qtrng0(p, v, r) +% Algorithm AS 190.2 Appl. Statist. (1983) Vol.32, No.2 +% Calculates an initial quantile p for a studentized range +% distribution having v degrees of freedom and r samples +% for probability p, p.gt.0.80 .and. p.lt.0.995. + +t=norminv(0.5 + 0.5 .* p); +if (v < 120), t = t + 0.25 * (t.^3 + t) ./ v; end +q = 0.8843 - 0.2368 .* t; +if (v < 120), q = q - (1.214./v) + (1.208.*t./v); end +x = t .* (q .* log(r-1) + 1.4142); diff --git a/boosting/weightedstats/private/termcross.m b/boosting/weightedstats/private/termcross.m new file mode 100644 index 0000000..544fd8f --- /dev/null +++ b/boosting/weightedstats/private/termcross.m @@ -0,0 +1,13 @@ +function ab = termcross(a,b) +%TERMCROSS Multiply dummy variables for two terms to get interaction + +% Copyright 1993-2002 The MathWorks, Inc. +% $Revision: 1.3 $ $Date: 2002/02/04 18:52:51 $ +if (isempty(a)), ab = b; return, end +if (isempty(b)), ab = a; return, end + +na = size(a,2); +nb = size(b,2); +acols = repmat((1:na), 1, nb); +bcols = reshape(repmat((1:nb), na, 1), 1, na*nb); +ab = a(:,acols) .* b(:,bcols); diff --git a/boosting/weightedstats/treefitw.asv b/boosting/weightedstats/treefitw.asv new file mode 100644 index 0000000..2abd099 --- /dev/null +++ b/boosting/weightedstats/treefitw.asv @@ -0,0 +1,562 @@ +function Tree=treefitw(X,y,w, equivsample, varargin) +%TREEFIT Fit a tree-based model for classification or regression. +% T = TREEFIT(X,Y) creates a decision tree T for predicting response Y +% as a function of predictors X. X is an N-by-M matrix of predictor +% values. Y is either a vector of N response values (for regression), +% or a character array or cell array of strings containing N class +% names (for classification). Either way, T is binary tree where each +% non-terminal node is split based on the values of a column of X. NaN +% values in X or Y are taken to be missing values, and observations with +% any missing values are not used in the fit. +% +% W is the weight vector with sum(W) = N +% Equivsample is the equivalent sample size (add that number to each node +% for each class. +% +% T = TREEFIT(X,Y,W,0,'PARAM1',val1,'PARAM2',val2,...) specifies optional +% parameter name/value pairs: +% +% For all trees: +% 'catidx' Vector of indices of the columns of X that are to be +% treated as unordered categorical variables +% 'method' Either 'classification' (default if Y is text) or +% 'regression' (default if Y is numeric) +% 'splitmin' A number N such that impure nodes must have N or more +% observations to be split (default 10) +% 'prune' 'on' (default) to compute the full tree and the optimal +% sequence of pruned subtrees, or 'off' for the full tree +% without pruning +% +% For classification trees only: +% 'cost' Square matrix C, C(i,j) is the cost of classifying +% a point into class j if its true class is i (default +% has C(i,j)=1 if i~=j, and C(i,j)=0 if i=j). Alternatively +% this value can be a structure S having two fields: S.group +% continaing the group names as a character array or cell +% array of strings, and S.cost containing the cost matrix C. +% 'splitcriterion' Criterion for choosing a split, either 'gdi' (default) +% for Gini's diversity index, 'twoing' for the twoing rule, +% or 'deviance' for maximum deviance reduction +% 'priorprob' Prior probabilities for each class, specified as a +% vector (one value for each distinct group name) or as a +% structure S with two fields: S.group containing the group +% names as a character array or cell array of strings, and +% S.prob containing a a vector of corresponding probabilities +% +% Example: Create classification tree for Fisher's iris data. +% load fisheriris; +% t = treefit(meas, species); +% treedisp(t,'names',{'SL' 'SW' 'PL' 'PW'}); +% +% See also TREEDISP, TREEPRUNE, TREETEST, TREEVAL. + +% Reference: Breiman et al. (1993), "Classification and Regression +% Trees," Chapman and Hall, Boca Raton. + +% Copyright 1993-2004 The MathWorks, Inc. +% $Revision: 1.1 $ $Date: 2004/08/23 14:32:11 $ + +% Process inputs +if isnumeric(y) + Method = 'regression'; +else + Method = 'classification'; + if (ischar(y)) + y = cellstr(y); + end +end +if ~isnumeric(X) + error('X must be a numeric matrix.'); +end +okargs = {'priorprob' 'cost' 'splitcriterion' 'splitmin' 'catidx' 'prune' 'method'}; +defaults = {[] [] 'gdi' 10 [] 'on' Method}; +[emsg Prior Cost Criterion Splitmin Catidx Prune Method] = ... + statgetargs(okargs,defaults,varargin{:}); +error(emsg); + +if ~isstr(Method) | isempty(Method) | ~(Method(1)=='c' | Method(1)=='r') + error('Value of ''method'' parameter must be ''classification'' or ''regression''.'); +elseif Method(1)=='c' + Method = 'classification'; +else + Method = 'regression'; +end + +t = any(isnan(X),2); +if isequal(Method,'regression') + t = t | isnan(y); +end +if any(t) + disp(['nan: ' num2str(find(t==1)')]) + X(t,:) = []; + y(t) = []; +end + +[N,nvars] = size(X); +doclass = isequal(Method(1),'c'); +if doclass + switch(Criterion) + % Criterion function Is it an impurity measure? + % ------------------ -------------------------- + case 'gdi', critfun = @gdi; impurity = 1; + case 'twoing', critfun = @twoing; impurity = 0; + case 'deviance', critfun = @deviance; impurity = 1; + otherwise, error('Bad value for ''splitcriterion'' parameter.') + end + + % Get binary matrix, C(i,j)==1 means point i is in class j + if islogical(y) + y = double(y); + end + [y,cnames] = grp2idx(y); % find groups only after NaNs removed from X + if any(isnan(y)) + t = isnan(y); + y(t) = []; + X(t,:) = []; + N = size(X,1); + end + nclasses = max(y); + C = zeros(N,nclasses); + C(sub2ind([N nclasses],(1:N)',y)) = 1; + for cindex = 1:nclasses + C(:, cindex) =C(:, cindex).*w; + end + Nj = sum(C,1); +else + C = y(:); +end + +% Tree structure fields ([C] only for classification trees): +% .method method +% .node node number +% .parent parent node number +% .class class assignment for points in this node if treated as a leaf +% .var column j of X matrix to be split, or 0 for a leaf node, +% or -j to treat column j as categorical +% .cut cutoff value for split (Xj0, 1); + if doclass + % Compute class probabilities and related statistics for this node + %Njt = sum(Cnode,1); % number in class j at node t + Njt = sum(Cnode,1) + equivsample; % number in class j at node t + Pjandt = Prior .* Njt ./ (Nj+equivsample*nclasses); + Pjgivent = Pjandt / sum(Pjandt); + misclasscost = Pjgivent * Cost; + [mincost,nodeclass] = min(misclasscost); + yfitnode(tnode) = nodeclass; + Pt = sum(Pjandt); + nodeprob(tnode) = Pt; + classprob(tnode,:) = Pjgivent; + classcount(tnode,:) = Njt; + pratio = adjprior ./ Nj; + % was ... impure = sum(Pjgivent>0)>1; + impure = sum(NnodeC>1)>1; % must be "significantly" impure + else + % Compute variance and related statistics for this node + ybar = mean(Cnode); + yfitnode(tnode) = ybar; + nodeprob(tnode) = Nnode/N; + sst = norm(Cnode-ybar)^2; % total sum of squares at this node + mincost = sst / Nnode; + impure = (mincost > 1e-6 * resuberr(1)); + end + bestcrit = -Inf; + nodesize(tnode) = Nnode; + resuberr(tnode) = mincost; + risk(tnode) = nodeprob(tnode) * resuberr(tnode); + cutvar(tnode) = 0; + cutpoint(tnode) = 0; + children(tnode,:) = 0;; + + % Consider splitting this node + if Nnode>=Splitmin & impure % split only large impure nodes + disp(num2str([Nnode sum(Cnode, 1)])) + Xnode = X(noderows,:); + bestvar = 0; + bestcut = 0; + + % Find the best of all possible splits + for jvar=1:nvars + x = Xnode(:,jvar); % get jth x variable + [x,idx] = sort(x); % sort it + xcat = iscat(jvar); + if doclass + Ccum = cumsum(Cnode(idx,:)); % cum. class counts + [critval,cutval]=Ccritval(x,Ccum,xcat,pratio,Pt,impurity,critfun); + else + ycum = cumsum(Cnode(idx,:) - ybar); % centered response cum. sum + [critval,cutval]=Rcritval(x,ycum,xcat); + end + + % Change best split if this one is best so far + if critval>bestcrit + bestcrit = critval; + bestvar = jvar; + bestcut = cutval; + end + end + + % Split this node using the best rule found + if bestvar~=0 + x = Xnode(:,bestvar); + if ~iscat(bestvar) + cutvar(tnode) = bestvar; + cutpoint(tnode) = bestcut; + leftside = x<=bestcut; + rightside = ~leftside; + else + cutvar(tnode) = -bestvar; % negative indicates cat. var. split + ncatsplit = size(catsplit,1) + 1; % index into catsplit cell array + cutpoint(tnode) = ncatsplit; + catsplit(ncatsplit,:) = bestcut; + leftside = ismember(x,bestcut{1}); + rightside = ismember(x,bestcut{2}); + end + children(tnode,:) = nextunusednode + (0:1); + assignednode(noderows(leftside)) = nextunusednode; + assignednode(noderows(rightside)) = nextunusednode+1; + nodenumber(nextunusednode+(0:1)) = nextunusednode+(0:1)'; + parent(nextunusednode+(0:1)) = tnode; + nextunusednode = nextunusednode+2; + end + end + tnode = tnode + 1; + disp(['current node: ' num2str(tnode)]); +end + +topnode = nextunusednode - 1; +Tree.method = Method; +Tree.node = nodenumber(1:topnode); +Tree.parent = parent(1:topnode); +Tree.class = yfitnode(1:topnode); +Tree.var = cutvar(1:topnode); +Tree.cut = cutpoint(1:topnode); +Tree.children = children(1:topnode,:); +Tree.nodeprob = nodeprob(1:topnode); +Tree.nodeerr = resuberr(1:topnode); +Tree.risk = risk(1:topnode); +Tree.nodesize = nodesize(1:topnode); +Tree.npred = nvars; +Tree.catcols = Catidx; +if doclass + if ~haveprior, Prior=[]; end + Tree.prior = Prior; + Tree.nclasses = nclasses; + Tree.cost = Cost; + Tree.classprob = classprob(1:topnode,:); + Tree.classcount= classcount(1:topnode,:); + Tree.classname = cnames; +end + +Tree.catsplit = catsplit; % list of all categorical predictor splits + +Tree = removebadsplits(Tree); + +if isequal(Prune,'on') + Tree = treeprune(Tree); +end + +%---------------------------------------------------- +function v=gdi(p) +%GDI Gini diversity index + +v=1-sum(p.^2,2); + +%---------------------------------------------------- +function v=twoing(Pleft, P1, Pright, P2) +%TWOING Twoing index + +v = 0.25 * Pleft .* Pright .* sum(abs(P1-P2),2).^2; + +%---------------------------------------------------- +function v=deviance(p) +%DEVIANCE Deviance + +v = -2 * sum(p .* log(max(p,eps)), 2); + +%---------------------------------------------------- +function [critval,cutval]=Ccritval(x,Ccum,iscat,pratio,Pt,impurity,critfun) +%CCRITVAL Get critical value for splitting node in classification tree. + +% First get all possible split points +Ncum = (1:length(x))'; +rows = Ncum(diff(x)>0); +if isempty(rows) + critval = -Inf; + cutval = 0; + return +end + +% Get arrays showing left/right class membership at each split +nsplits = length(rows); +if iscat + % A picks out all category subsets including the 1st, but not the whole set + A = ones(2^nsplits,nsplits+1); + A(:,2:end) = fullfact(2*ones(1,nsplits)) - 1; + A(end,:) = []; + + % B contains the class counts in each category + t = [rows; size(Ccum,1)]; + B = Ccum(t,:); + B(2:end,:) = B(2:end,:) - B(1:end-1,:); + + Csplit1 = A*B; + nsplits = size(Csplit1,1); + allx = x(t); +else + % Split between each pair of distinct ordered values + Csplit1 = Ccum(rows,:); +end +Csplit2 = repmat(Ccum(end,:),nsplits,1) - Csplit1; + +% Get left/right class probabilities at each split +temp = repmat(pratio,nsplits,1); +P1 = temp .* Csplit1; +P2 = temp .* Csplit2; +Ptleft = sum(P1,2); +Ptright = sum(P2,2); +nclasses = size(P1,2); +P1 = P1 ./ repmat(Ptleft,1,nclasses); +P2 = P2 ./ repmat(Ptright,1,nclasses); + +% Get left/right node probabilities +Pleft = Ptleft ./ Pt; +Pright = 1 - Pleft; + +% Evaluate criterion as impurity or otherwise +if impurity + crit = - Pleft.*feval(critfun,P1) - Pright.*feval(critfun,P2); +else + crit = feval(critfun, Pleft, P1, Pright, P2); +end + +% Return best split point +critval = max(crit); +maxloc = find(crit==critval); +if length(maxloc)>1 + maxloc = maxloc(1+floor(length(maxloc)*rand)); +end +if iscat + t = logical(A(maxloc,:)); + xleft = allx(t); + xright = allx(~t); + cutval = {xleft' xright'}; +else + cutloc = rows(maxloc); + cutval = (x(cutloc) + x(cutloc+1))/2; +end + +%---------------------------------------------------- +function [critval,cutval]=Rcritval(x,Ycum,iscat) +%RCRITVAL Get critical value for splitting node in regression tree. + +% First get all possible split points +Ncum = (1:length(x))'; +rows = Ncum(diff(x)>0); +if isempty(rows) + critval = -Inf; + cutval = 0; + return +end + +% Get arrays showing left/right class membership at each split +nsplits = length(rows); +if iscat + % A picks out all category subsets including the 1st, but not the whole set + A = ones(2^nsplits,nsplits+1); + A(:,2:end) = fullfact(2*ones(1,nsplits)) - 1; + A(end,:) = []; + + % B contains the category sums + t = [rows; size(Ycum,1)]; + B = Ycum(t,:); + B(2:end,:) = B(2:end,:) - B(1:end-1,:); + + Ysplit1 = A*B; + n1 = A*[t(1);diff(t)]; + allx = x(t); % take one x value from each unique set +else + % Split between each pair of distinct ordered values + Ysplit1 = Ycum(rows,:); + n1 = rows; +end + +% Get left/right means +N = Ncum(end); +mu1 = Ysplit1 ./ n1; +mu2 = (Ycum(end) - Ysplit1) ./ (N - n1); + +ssx = n1.*mu1.^2 + (N-n1).*mu2.^2; +critval = max(ssx); +maxloc = find(ssx==critval); +if length(maxloc)>1 + maxloc = maxloc(1+floor(length(maxloc)*rand)); +end +if iscat + t = logical(A(maxloc,:)); + xleft = allx(t); + xright = allx(~t); + cutval = {xleft' xright'}; +else + cutloc = rows(maxloc); + cutval = (x(cutloc) + x(cutloc+1))/2; +end + +% -------------------------------------- +function Tree = removebadsplits(Tree) +%REMOVEBADSPLITS Remove splits that contribute nothing to the tree. + +N = length(Tree.node); +isleaf = (Tree.var==0)'; % no split variable implies leaf node +isntpruned = true(1,N); +doprune = false(1,N); +adjfactor = (1 - 100*eps); +risk = Tree.risk'; + +% Work up from the bottom of the tree +while(true) + % Find "twigs" with two leaf children + leafs = find(isleaf & isntpruned); + branches = find(~isleaf & isntpruned); + twig = branches(sum(isleaf(Tree.children(branches,:)),2) == 2); + if isempty(twig) + break; % must have just the root node left + end + + % Find twigs to "unsplit" if the error of the twig is no larger + % than the sum of the errors of the children + Rtwig = risk(twig); + kids = Tree.children(twig,:); + Rsplit = sum(risk(kids),2); + unsplit = Rsplit >= Rtwig'*adjfactor; + if any(unsplit) + % Mark children as pruned, and mark twig as now a leaf + isntpruned(kids(unsplit,:)) = 0; + twig = twig(unsplit); % only these to be marked on next 2 lines + isleaf(twig) = 1; + doprune(twig) = 1; + else + break; + end +end + +% Remove splits that are useless +if any(doprune) + Tree = treeprune(Tree,'nodes',find(doprune)); +end diff --git a/boosting/weightedstats/treefitw.m b/boosting/weightedstats/treefitw.m new file mode 100644 index 0000000..2770708 --- /dev/null +++ b/boosting/weightedstats/treefitw.m @@ -0,0 +1,561 @@ +function Tree=treefitw(X,y,w, equivsample, varargin) +%TREEFIT Fit a tree-based model for classification or regression. +% T = TREEFIT(X,Y) creates a decision tree T for predicting response Y +% as a function of predictors X. X is an N-by-M matrix of predictor +% values. Y is either a vector of N response values (for regression), +% or a character array or cell array of strings containing N class +% names (for classification). Either way, T is binary tree where each +% non-terminal node is split based on the values of a column of X. NaN +% values in X or Y are taken to be missing values, and observations with +% any missing values are not used in the fit. +% +% W is the weight vector with sum(W) = N +% Equivsample is the equivalent sample size (add that number to each node +% for each class. +% +% T = TREEFIT(X,Y,W,0,'PARAM1',val1,'PARAM2',val2,...) specifies optional +% parameter name/value pairs: +% +% For all trees: +% 'catidx' Vector of indices of the columns of X that are to be +% treated as unordered categorical variables +% 'method' Either 'classification' (default if Y is text) or +% 'regression' (default if Y is numeric) +% 'splitmin' A number N such that impure nodes must have N or more +% observations to be split (default 10) +% 'prune' 'on' (default) to compute the full tree and the optimal +% sequence of pruned subtrees, or 'off' for the full tree +% without pruning +% 'maxnodes' The maximum number of splitting nodes in the tree +% +% For classification trees only: +% 'cost' Square matrix C, C(i,j) is the cost of classifying +% a point into class j if its true class is i (default +% has C(i,j)=1 if i~=j, and C(i,j)=0 if i=j). Alternatively +% this value can be a structure S having two fields: S.group +% continaing the group names as a character array or cell +% array of strings, and S.cost containing the cost matrix C. +% 'splitcriterion' Criterion for choosing a split, either 'gdi' (default) +% for Gini's diversity index, 'twoing' for the twoing rule, +% or 'deviance' for maximum deviance reduction +% 'priorprob' Prior probabilities for each class, specified as a +% vector (one value for each distinct group name) or as a +% structure S with two fields: S.group containing the group +% names as a character array or cell array of strings, and +% S.prob containing a a vector of corresponding probabilities +% +% Example: Create classification tree for Fisher's iris data. +% load fisheriris; +% t = treefit(meas, species); +% treedisp(t,'names',{'SL' 'SW' 'PL' 'PW'}); +% +% See also TREEDISP, TREEPRUNE, TREETEST, TREEVAL. + +% Reference: Breiman et al. (1993), "Classification and Regression +% Trees," Chapman and Hall, Boca Raton. + +% Copyright 1993-2004 The MathWorks, Inc. +% $Revision: 1.1 $ $Date: 2004/08/23 14:32:11 $ + +% Process inputs +if isnumeric(y) + Method = 'regression'; +else + Method = 'classification'; + if (ischar(y)) + y = cellstr(y); + end +end +if ~isnumeric(X) + error('X must be a numeric matrix.'); +end +okargs = {'priorprob' 'cost' 'splitcriterion' 'splitmin' 'catidx' 'prune' 'method' 'maxnodes'}; +defaults = {[] [] 'gdi' 10 [] 'on' Method 1000}; +[emsg Prior Cost Criterion Splitmin Catidx Prune Method maxNodes] = ... + statgetargs(okargs,defaults,varargin{:}); +error(emsg); + +if ~isstr(Method) | isempty(Method) | ~(Method(1)=='c' | Method(1)=='r') + error('Value of ''method'' parameter must be ''classification'' or ''regression''.'); +elseif Method(1)=='c' + Method = 'classification'; +else + Method = 'regression'; +end + +t = any(isnan(X),2); +if isequal(Method,'regression') + t = t | isnan(y); +end +if any(t) + disp(['nan: ' num2str(find(t==1)')]) + X(t,:) = []; + y(t) = []; +end + +[N,nvars] = size(X); +doclass = isequal(Method(1),'c'); +if doclass + switch(Criterion) + % Criterion function Is it an impurity measure? + % ------------------ -------------------------- + case 'gdi', critfun = @gdi; impurity = 1; + case 'twoing', critfun = @twoing; impurity = 0; + case 'deviance', critfun = @deviance; impurity = 1; + otherwise, error('Bad value for ''splitcriterion'' parameter.') + end + + % Get binary matrix, C(i,j)==1 means point i is in class j + if islogical(y) + y = double(y); + end + [y,cnames] = grp2idx(y); % find groups only after NaNs removed from X + if any(isnan(y)) + t = isnan(y); + y(t) = []; + X(t,:) = []; + N = size(X,1); + end + nclasses = max(y); + C = zeros(N,nclasses); + C(sub2ind([N nclasses],(1:N)',y)) = 1; + for cindex = 1:nclasses + C(:, cindex) =C(:, cindex).*w; + end + Nj = sum(C,1); +else + C = y(:); +end + +% Tree structure fields ([C] only for classification trees): +% .method method +% .node node number +% .parent parent node number +% .class class assignment for points in this node if treated as a leaf +% .var column j of X matrix to be split, or 0 for a leaf node, +% or -j to treat column j as categorical +% .cut cutoff value for split (Xj0, 1); + if doclass + % Compute class probabilities and related statistics for this node + %Njt = sum(Cnode,1); % number in class j at node t + Njt = sum(Cnode,1) + equivsample; % number in class j at node t + Pjandt = Prior .* Njt ./ (Nj+equivsample*nclasses); + Pjgivent = Pjandt / sum(Pjandt); + misclasscost = Pjgivent * Cost; + [mincost,nodeclass] = min(misclasscost); + yfitnode(tnode) = nodeclass; + Pt = sum(Pjandt); + nodeprob(tnode) = Pt; + classprob(tnode,:) = Pjgivent; + classcount(tnode,:) = Njt; + pratio = adjprior ./ Nj; + % was ... impure = sum(Pjgivent>0)>1; + impure = sum(NnodeC>2)>1; % must be "significantly" impure + else + % Compute variance and related statistics for this node + ybar = mean(Cnode); + yfitnode(tnode) = ybar; + nodeprob(tnode) = Nnode/N; + sst = norm(Cnode-ybar)^2; % total sum of squares at this node + mincost = sst / Nnode; + impure = (mincost > 1e-6 * resuberr(1)); + end + bestcrit = -Inf; + nodesize(tnode) = Nnode; + resuberr(tnode) = mincost; + risk(tnode) = nodeprob(tnode) * resuberr(tnode); + cutvar(tnode) = 0; + cutpoint(tnode) = 0; + children(tnode,:) = 0; + + % Consider splitting this node + if Nnode>=Splitmin & impure & tnode <= maxNodes % split only large impure nodes + Xnode = X(noderows,:); + bestvar = 0; + bestcut = 0; + + % Find the best of all possible splits + for jvar=1:nvars + x = Xnode(:,jvar); % get jth x variable + [x,idx] = sort(x); % sort it + xcat = iscat(jvar); + if doclass + Ccum = cumsum(Cnode(idx,:)); % cum. class counts + [critval,cutval]=Ccritval(x,Ccum,xcat,pratio,Pt,impurity,critfun); + else + ycum = cumsum(Cnode(idx,:) - ybar); % centered response cum. sum + [critval,cutval]=Rcritval(x,ycum,xcat); + end + + % Change best split if this one is best so far + if critval>bestcrit + bestcrit = critval; + bestvar = jvar; + bestcut = cutval; + end + end + + % Split this node using the best rule found + if bestvar~=0 + x = Xnode(:,bestvar); + if ~iscat(bestvar) + cutvar(tnode) = bestvar; + cutpoint(tnode) = bestcut; + leftside = x<=bestcut; + rightside = ~leftside; + else + cutvar(tnode) = -bestvar; % negative indicates cat. var. split + ncatsplit = size(catsplit,1) + 1; % index into catsplit cell array + cutpoint(tnode) = ncatsplit; + catsplit(ncatsplit,:) = bestcut; + leftside = ismember(x,bestcut{1}); + rightside = ismember(x,bestcut{2}); + end + children(tnode,:) = nextunusednode + (0:1); + assignednode(noderows(leftside)) = nextunusednode; + assignednode(noderows(rightside)) = nextunusednode+1; + nodenumber(nextunusednode+(0:1)) = nextunusednode+(0:1)'; + parent(nextunusednode+(0:1)) = tnode; + nextunusednode = nextunusednode+2; + end + end + tnode = tnode + 1; +end + +topnode = nextunusednode - 1; +Tree.method = Method; +Tree.node = nodenumber(1:topnode); +Tree.parent = parent(1:topnode); +Tree.class = yfitnode(1:topnode); +Tree.var = cutvar(1:topnode); +Tree.cut = cutpoint(1:topnode); +Tree.children = children(1:topnode,:); +Tree.nodeprob = nodeprob(1:topnode); +Tree.nodeerr = resuberr(1:topnode); +Tree.risk = risk(1:topnode); +Tree.nodesize = nodesize(1:topnode); +Tree.npred = nvars; +Tree.catcols = Catidx; +if doclass + if ~haveprior, Prior=[]; end + Tree.prior = Prior; + Tree.nclasses = nclasses; + Tree.cost = Cost; + Tree.classprob = classprob(1:topnode,:); + Tree.classcount= classcount(1:topnode,:); + Tree.classname = cnames; +end + +Tree.catsplit = catsplit; % list of all categorical predictor splits + +Tree = removebadsplits(Tree); + +if isequal(Prune,'on') + Tree = treeprune(Tree); +end + +%---------------------------------------------------- +function v=gdi(p) +%GDI Gini diversity index + +v=1-sum(p.^2,2); + +%---------------------------------------------------- +function v=twoing(Pleft, P1, Pright, P2) +%TWOING Twoing index + +v = 0.25 * Pleft .* Pright .* sum(abs(P1-P2),2).^2; + +%---------------------------------------------------- +function v=deviance(p) +%DEVIANCE Deviance + +v = -2 * sum(p .* log(max(p,eps)), 2); + +%---------------------------------------------------- +function [critval,cutval]=Ccritval(x,Ccum,iscat,pratio,Pt,impurity,critfun) +%CCRITVAL Get critical value for splitting node in classification tree. + +% First get all possible split points +Ncum = (1:length(x))'; +rows = Ncum(diff(x)>0); +if isempty(rows) + critval = -Inf; + cutval = 0; + return +end + +% Get arrays showing left/right class membership at each split +nsplits = length(rows); +if iscat + % A picks out all category subsets including the 1st, but not the whole set + A = ones(2^nsplits,nsplits+1); + A(:,2:end) = fullfact(2*ones(1,nsplits)) - 1; + A(end,:) = []; + + % B contains the class counts in each category + t = [rows; size(Ccum,1)]; + B = Ccum(t,:); + B(2:end,:) = B(2:end,:) - B(1:end-1,:); + + Csplit1 = A*B; + nsplits = size(Csplit1,1); + allx = x(t); +else + % Split between each pair of distinct ordered values + Csplit1 = Ccum(rows,:); +end +Csplit2 = repmat(Ccum(end,:),nsplits,1) - Csplit1; + +% Get left/right class probabilities at each split +temp = repmat(pratio,nsplits,1); +P1 = temp .* Csplit1; +P2 = temp .* Csplit2; +Ptleft = sum(P1,2); +Ptright = sum(P2,2); +nclasses = size(P1,2); +P1 = P1 ./ max(repmat(Ptleft,1,nclasses), 1E-10); % max added by DWH +P2 = P2 ./ max(repmat(Ptright,1,nclasses), 1E-10); + +% Get left/right node probabilities +Pleft = Ptleft ./ Pt; +Pright = 1 - Pleft; + +% Evaluate criterion as impurity or otherwise +if impurity + crit = - Pleft.*feval(critfun,P1) - Pright.*feval(critfun,P2); +else + crit = feval(critfun, Pleft, P1, Pright, P2); +end + +% Return best split point +critval = max(crit); +maxloc = find(crit==critval); +if length(maxloc)>1 + maxloc = maxloc(1+floor(length(maxloc)*rand)); +end +if iscat + t = logical(A(maxloc,:)); + xleft = allx(t); + xright = allx(~t); + cutval = {xleft' xright'}; +else + cutloc = rows(maxloc); + cutval = (x(cutloc) + x(cutloc+1))/2; +end + +%---------------------------------------------------- +function [critval,cutval]=Rcritval(x,Ycum,iscat) +%RCRITVAL Get critical value for splitting node in regression tree. + +% First get all possible split points +Ncum = (1:length(x))'; +rows = Ncum(diff(x)>0); +if isempty(rows) + critval = -Inf; + cutval = 0; + return +end + +% Get arrays showing left/right class membership at each split +nsplits = length(rows); +if iscat + % A picks out all category subsets including the 1st, but not the whole set + A = ones(2^nsplits,nsplits+1); + A(:,2:end) = fullfact(2*ones(1,nsplits)) - 1; + A(end,:) = []; + + % B contains the category sums + t = [rows; size(Ycum,1)]; + B = Ycum(t,:); + B(2:end,:) = B(2:end,:) - B(1:end-1,:); + + Ysplit1 = A*B; + n1 = A*[t(1);diff(t)]; + allx = x(t); % take one x value from each unique set +else + % Split between each pair of distinct ordered values + Ysplit1 = Ycum(rows,:); + n1 = rows; +end + +% Get left/right means +N = Ncum(end); +mu1 = Ysplit1 ./ n1; +mu2 = (Ycum(end) - Ysplit1) ./ (N - n1); + +ssx = n1.*mu1.^2 + (N-n1).*mu2.^2; +critval = max(ssx); +maxloc = find(ssx==critval); +if length(maxloc)>1 + maxloc = maxloc(1+floor(length(maxloc)*rand)); +end +if iscat + t = logical(A(maxloc,:)); + xleft = allx(t); + xright = allx(~t); + cutval = {xleft' xright'}; +else + cutloc = rows(maxloc); + cutval = (x(cutloc) + x(cutloc+1))/2; +end + +% -------------------------------------- +function Tree = removebadsplits(Tree) +%REMOVEBADSPLITS Remove splits that contribute nothing to the tree. + +N = length(Tree.node); +isleaf = (Tree.var==0)'; % no split variable implies leaf node +isntpruned = true(1,N); +doprune = false(1,N); +adjfactor = (1 - 100*eps); +risk = Tree.risk'; + +% Work up from the bottom of the tree +while(true) + % Find "twigs" with two leaf children + leafs = find(isleaf & isntpruned); + branches = find(~isleaf & isntpruned); + twig = branches(sum(isleaf(Tree.children(branches,:)),2) == 2); + if isempty(twig) + break; % must have just the root node left + end + + % Find twigs to "unsplit" if the error of the twig is no larger + % than the sum of the errors of the children + Rtwig = risk(twig); + kids = Tree.children(twig,:); + Rsplit = sum(risk(kids),2); + unsplit = Rsplit >= Rtwig'*adjfactor; + if any(unsplit) + % Mark children as pruned, and mark twig as now a leaf + isntpruned(kids(unsplit,:)) = 0; + twig = twig(unsplit); % only these to be marked on next 2 lines + isleaf(twig) = 1; + doprune(twig) = 1; + else + break; + end +end + +% Remove splits that are useless +if any(doprune) + Tree = treeprune(Tree,'nodes',find(doprune)); +end diff --git a/boosting/weightedstats/treetestw.asv b/boosting/weightedstats/treetestw.asv new file mode 100644 index 0000000..909735e --- /dev/null +++ b/boosting/weightedstats/treetestw.asv @@ -0,0 +1,344 @@ +function [cost,secost,ntnodes,bestlevel] = treetestw(Tree,TorCorR,X,Y,w,varargin) +%TREETEST Compute error rate for tree. +% COST = TREETEST(T,'resubstitution') computes the cost of the tree T +% using a resubstitution method. T is a decision tree as created by +% the TREEFIT function. The cost of the tree is the sum over all +% terminal nodes of the estimated probability of that node times the +% node's cost. If T is a classification tree, the cost of a node is +% the sum of the misclassification costs of the observations in +% that node. If T is a regression tree, the cost of a node is the +% average squared error over the observations in that node. COST is +% a vector of cost values for each subtree in the optimal pruning +% sequence for T. The resubstitution cost is based on the same +% sample that was used to create the original tree, so it under- +% estimates the likely cost of applying the tree to new data. +% +% COST = TREETEST(T,'test',X,Y) uses the predictor matrix X and +% response Y as a test sample, applies the decision tree T to that +% sample, and returns a vector COST of cost values computed for the +% test sample. X and Y should not be the same as the learning sample, +% which is the sample that was used to fit the tree T. +% +% COST = TREETEST(T,'crossvalidate',X,Y) uses 10-fold cross-validation to +% compute the cost vector. X and Y should be the learning sample, which +% is the sample that was used to fit the tree T. The function +% partitions the sample into 10 subsamples, chosen randomly but with +% roughly equal size. For classification trees the subsamples also have +% roughly the same class proportions. For each subsample, TREETEST fits +% a tree to the remaining data and uses it to predict the subsample. It +% pools the information from all subsamples to compute the cost for the +% whole sample. w is the weight vector for the samples. If no weights +% the original function treetest should be used instead. +% +% [COST,SECOST,NTNODES,BESTLEVEL] = TREETEST(...) also returns the vector +% SECOST containing the standard error of each COST value, the vector +% NTNODES containing number of terminal nodes for each subtree, and the +% scalar BESTLEVEL containing the estimated best level of pruning. +% BESTLEVEL=0 means no pruning (i.e. the full unpruned tree). The best +% level is the one that produces the smallest tree that is within one +% standard error of the minimum-cost subtree. +% +% [...] = TREETEST(...,'PARAM1',val1,'PARAM2',val2,...) specifies +% optional parameter name/value pairs chosen from the following: +% +% 'nsamples' The number of cross-validation samples (default 10) +% 'treesize' Either 'se' (the default) to choose the smallest +% tree whose cost is within one standard error of the +% minimum cost, or 'min' to choose the minimal cost tree +% (not meaningful for resubstitution error calculations) +% +% Example: Find best tree for Fisher's iris data using cross-validation. +% The solid line shows the estimated cost for each tree size, +% the dashed line marks 1 standard error above the minimum, +% and the square marks the smallest tree under the dashed line. +% % Start with a large tree +% load fisheriris; +% t = treefit(meas,species','splitmin',5); +% +% % Find the minimum-cost tree +% [c,s,n,best] = treetest(t,'cross',meas,species); +% tmin = treeprune(t,'level',best); +% +% % Plot smallest tree within 1 std. error of minimum cost tree +% [mincost,minloc] = min(c); +% plot(n,c,'b-o', n(best+1),c(best+1),'bs',... +% n,(mincost+s(minloc))*ones(size(n)),'k--'); +% xlabel('Tree size (number of terminal nodes)') +% ylabel('Cost') +% +% See also TREEFIT, TREEDISP, TREEPRUNE, TREEVAL. + +% Copyright 1993-2004 The MathWorks, Inc. +% $Revision: 1.1 $ $Date: 2004/08/23 14:32:11 $ + +if nargin<2, error('Not enough arguments.'); end +if ~isstruct(Tree) | ~isfield(Tree,'method') + error('First argument must be a decision tree.'); +end +if ~isstr(TorCorR) | ~(TorCorR(1)=='t' | TorCorR(1)=='c' | TorCorR=='r') + error('Second argument must be ''test'', ''crossvalidate'', or ''resubstitution''.'); +end +if TorCorR(1)=='t' & nargin<4 + error('Not enough arguments. Need X and Y for the test sample.'); +elseif TorCorR(1)=='c' & nargin<4 + error('Not enough arguments. Need X and Y from the learning sample.'); +end +doclass = isequal(Tree.method,'classification'); +if TorCorR(1)~='r' + if ~ischar(Y) & prod(size(Y))~=length(Y) + error('Y must be a vector.'); + else + if iscell(Y) | isnumeric(Y) + n = length(Y); + else + n = size(Y,1); + end + if size(X,1)~=n + error('There must be one Y value for each row of X.'); + end + end +end + +okargs = {'nsamples' 'treesize'}; +defaults = {10 'se'}; +[emsg ncv treesize] = statgetargs(okargs,defaults,varargin{:}); +error(emsg); + +if ~isnumeric(ncv) | prod(size(ncv))~=1 | ncv<2 | ncv~=round(ncv) + error('Value of ''nsamples'' argument must be an integer 2 or larger.'); +end +if ~isstr(treesize) | ~(treesize(1)=='s' | treesize(1)=='m') + error('Value of ''treesize'' argument must be ''se'' or ''min''.'); +end + +% Get complexity parameters for all pruned subtrees +if ~isfield(Tree,'alpha') + Tree = treeprune(Tree); +end + +% Remove missing values +if nargin>=4 + t = any(isnan(X),2); + if isequal(Tree.method,'classification') + Yold = Y; + Y = classname2id(Y,Tree.classname); + if any(Y==0) + bad = find(Y==0); + bad = Yold(bad(1)); + if isnumeric(bad) + bad = num2str(bad); + elseif iscell(bad) + bad = bad{1}; + end + error(sprintf(... + 'At least one Y value (''%s'') is incompatible with the tree.',... + bad)); + end + end + + t = t | isnan(Y); + if any(t) + X(t,:) = []; + Y(t,:) = []; + end +end + +% Do proper type of testing (error estimation) +switch(TorCorR(1)) + case 't', [cost,secost] = testtree(Tree,X,Y); + case 'c', [cost,secost] = cvtree(Tree,X,Y,w,ncv); + case 'r', [cost,secost] = resubinfo(Tree); treesize = 'm'; +end + +cost = cost(:); +secost = secost(:); +if nargout>=3 + ntnodes = Tree.ntermnodes(:); +end +if nargout>=4 + bestlevel = selecttree(cost,secost,treesize(1)) - 1; +end + +% --------------------------------------------------------- +function [resuberr,seresub] = resubinfo(Tree) +%RESUBINFO Compute error rates for tree using resubstitution error. + +% Get complexity parameters for all pruned subtrees +nsub = 1+max(Tree.prunelist); + +% Get error rate for each subtree in this sequence +resuberr = zeros(nsub,1); +for j=1:nsub; + Tj = treeprune(Tree,'level',j-1); + leaves = Tj.node(Tj.var==0); + resuberr(j) = sum(Tj.risk(leaves)); +end +seresub = zeros(size(resuberr)); + +% --------------------------------------------------------------- +function [testerr,seerr] = testtree(Tree,X,id) +%TESTTREE Compute error rates for tree using test sample. +% The id variable is the class id for classification, or the y variable +% for regression. + +% Get pruning sequence and compute fitted values for the whole sequence +nsub = 1 + max(Tree.prunelist); +yfit = treeval(Tree,X,(0:nsub-1)); + +doclass = isequal(Tree.method,'classification'); +if doclass % get info required for classification + nclasses = Tree.nclasses; + cost = Tree.cost; + prior = Tree.prior(:); + if isempty(prior) + prior = Tree.classcount(1,:)' / Tree.nodesize(1); + end + Njtest = histc(id,1:nclasses); + adjprior = (prior ./ max(eps,Njtest))'; +end + +% Compute error statistics +if doclass + testerr = zeros(nsub,1); + seerr = zeros(nsub,1); + for k=nsub:-1:1; + % M(i,j) counts class i items classified as class j + M = full(sparse(id,yfit(:,k),1,nclasses,nclasses)); + + % Compute loss for this classification + loss = sum(cost .* M, 2); + losssq = sum(cost.^2 .* M, 2); + s2 = losssq - loss.^2 ./ Njtest; + + testerr(k) = adjprior * loss; + seerr(k) = sqrt(adjprior.^2 * s2); + end +else + N = size(X,1); + E = (yfit - repmat(id,1,size(yfit,2))).^2; + testerr = mean(E,1); + s2 = sum((E - repmat(testerr,size(E,1),1)).^2,1) / N; + seerr = sqrt(s2/N); +end + +% --------------------------------------------------------------- +function [cverr,secverr] = cvtree(Tree,X,id,w,ncv) +%CVTREE Compute error rates for tree using cross-validaiton. +% [CVERR,SECVERR] = CVTREE(TREE,X,ID,NCV) + +% Get geometric means of the alpha boundary points +alpha = Tree.alpha; +avgalpha = [sqrt(alpha(1:end-1) .* alpha(2:end)); Inf]; + +% Loop over cross-validation samples +N = size(X,1); +ntrees = length(avgalpha); +cverr = zeros(ntrees,1); +secverr = zeros(ntrees,1); +cvid = 1 + mod((1:N),ncv); + +doclass = isequal(Tree.method,'classification'); +if doclass + % Use a random permutation with fixed category proportions + idrand = id + rand(size(id)); + [stdid,idx] = sort(idrand); + cvid = cvid(idx); + args = {'prior',Tree.prior, 'cost',Tree.cost, 'prune','on'}; +else + % Use a random permutation with fixed numbers per cross-validation sample + cvid = cvid(randperm(N)); + args = {'prune','on'}; +end + +% Get predicted values using cross-validation samples +cvclass = zeros(N,ntrees); +for j=1:ncv + % Use jth group as a test, train on the others + testrows = (cvid == j); + trainrows = ~testrows; + testsize = sum(testrows); + + % Get a sequence of pruned trees for the training set + Tj = treefitw(X(trainrows,:),id(trainrows),w(trainrows),0,... + 'method',Tree.method, 'catidx',Tree.catcols, args{:}); + + % Get classifications based on each subtree that we require + treesneeded = findsubtree(Tj,avgalpha); + cvclass(testrows,:) = treeval(Tj,X(testrows,:),treesneeded-1); +end + +% Compute output statistics based on those predictions +if doclass + Nj = Tree.classcount(1,:)'; + prior = Tree.prior; + if isempty(prior) + prior = Nj' / N; + end + adjprior = (prior ./ Nj'); + nclasses = length(prior); + cost = Tree.cost; + sz = size(cost); + for k=1:ntrees + disp(num2str(sum(cvclass(:, k)~=id).*w/sum(w))) + loss = sum((cvclass(:, k)~=id).*w)/ sum(w); + losssq = sum(((cvclass(:, k)~=id).^2).*w) /sum(w); + s2 = losssq - loss.^2; + cverr(k) = loss; + secverr(k) = sqrt(s2); + %M = full(sparse(id,cvclass(:,k),1,nclasses,nclasses)); + %loss = sum(cost .* M, 2); + %losssq = sum(cost.^2 .* M, 2); + %s2 = losssq - loss.^2 ./ Nj; + %cverr(k) = adjprior * loss; + %secverr(k) = sqrt(adjprior.^2 * s2); + end +else + E = (cvclass - repmat(id,1,size(cvclass,2))).^2; + cverr = mean(E,1); + s2 = sum((E - repmat(cverr,size(E,1),1)).^2,1) / N; + secverr = sqrt(s2/N); +end + +% ---------------------------- +function k = findsubtree(Tree,alpha0) +%FINDSUBTREE Find subtree corresponding to specified complexity parameters. + +adjfactor = 1 + 100*eps; +alpha = Tree.alpha; +k = zeros(size(alpha0)); +for j=1:length(alpha0); + k(j) = sum(alpha <= alpha0(j)*adjfactor); +end + +% ----------------------------- +function bestj = selecttree(allalpha,sealpha,treesize) +%SELECTTREE Select the best tree from error rates using some criterion. + +% Find the smallest tree that gives roughly the minimum error +[minerr,minloc] = min(allalpha); +if isequal(treesize(1),'m') + cutoff = minerr * (1 + 100*eps); +else + cutoff = minerr + sealpha(minloc); +end +j = find(allalpha <= cutoff); +bestj = j(end); + +% ----------------------------- +function idvec = classname2id(idnames,cnames) +%CLASSNAME2ID Create vector of numeric indices from class name array. + +idvec = zeros(length(idnames),1); +if isnumeric(idnames), idnames = cellstr(num2str(idnames)); end +for j=1:length(cnames) + idvec(strcmp(cnames(j),idnames)) = j; +end + +t = find(idvec==0); +if ~isempty(t) + txt = idnames(t,:); + if ischar(txt) + txt = cellstr(txt); + end + idvec(t(cellfun('isempty',txt))) = NaN; +end diff --git a/boosting/weightedstats/treetestw.m b/boosting/weightedstats/treetestw.m new file mode 100644 index 0000000..1b50239 --- /dev/null +++ b/boosting/weightedstats/treetestw.m @@ -0,0 +1,344 @@ +function [cost,secost,ntnodes,bestlevel] = treetestw(Tree,TorCorR,X,Y,w,varargin) +%TREETEST Compute error rate for tree. +% COST = TREETEST(T,'resubstitution') computes the cost of the tree T +% using a resubstitution method. T is a decision tree as created by +% the TREEFIT function. The cost of the tree is the sum over all +% terminal nodes of the estimated probability of that node times the +% node's cost. If T is a classification tree, the cost of a node is +% the sum of the misclassification costs of the observations in +% that node. If T is a regression tree, the cost of a node is the +% average squared error over the observations in that node. COST is +% a vector of cost values for each subtree in the optimal pruning +% sequence for T. The resubstitution cost is based on the same +% sample that was used to create the original tree, so it under- +% estimates the likely cost of applying the tree to new data. +% +% COST = TREETEST(T,'test',X,Y) uses the predictor matrix X and +% response Y as a test sample, applies the decision tree T to that +% sample, and returns a vector COST of cost values computed for the +% test sample. X and Y should not be the same as the learning sample, +% which is the sample that was used to fit the tree T. +% +% COST = TREETEST(T,'crossvalidate',X,Y) uses 10-fold cross-validation to +% compute the cost vector. X and Y should be the learning sample, which +% is the sample that was used to fit the tree T. The function +% partitions the sample into 10 subsamples, chosen randomly but with +% roughly equal size. For classification trees the subsamples also have +% roughly the same class proportions. For each subsample, TREETEST fits +% a tree to the remaining data and uses it to predict the subsample. It +% pools the information from all subsamples to compute the cost for the +% whole sample. w is the weight vector for the samples. If no weights +% the original function treetest should be used instead. +% +% [COST,SECOST,NTNODES,BESTLEVEL] = TREETEST(...) also returns the vector +% SECOST containing the standard error of each COST value, the vector +% NTNODES containing number of terminal nodes for each subtree, and the +% scalar BESTLEVEL containing the estimated best level of pruning. +% BESTLEVEL=0 means no pruning (i.e. the full unpruned tree). The best +% level is the one that produces the smallest tree that is within one +% standard error of the minimum-cost subtree. +% +% [...] = TREETEST(...,'PARAM1',val1,'PARAM2',val2,...) specifies +% optional parameter name/value pairs chosen from the following: +% +% 'nsamples' The number of cross-validation samples (default 10) +% 'treesize' Either 'se' (the default) to choose the smallest +% tree whose cost is within one standard error of the +% minimum cost, or 'min' to choose the minimal cost tree +% (not meaningful for resubstitution error calculations) +% +% Example: Find best tree for Fisher's iris data using cross-validation. +% The solid line shows the estimated cost for each tree size, +% the dashed line marks 1 standard error above the minimum, +% and the square marks the smallest tree under the dashed line. +% % Start with a large tree +% load fisheriris; +% t = treefit(meas,species','splitmin',5); +% +% % Find the minimum-cost tree +% [c,s,n,best] = treetest(t,'cross',meas,species); +% tmin = treeprune(t,'level',best); +% +% % Plot smallest tree within 1 std. error of minimum cost tree +% [mincost,minloc] = min(c); +% plot(n,c,'b-o', n(best+1),c(best+1),'bs',... +% n,(mincost+s(minloc))*ones(size(n)),'k--'); +% xlabel('Tree size (number of terminal nodes)') +% ylabel('Cost') +% +% See also TREEFIT, TREEDISP, TREEPRUNE, TREEVAL. + +% Copyright 1993-2004 The MathWorks, Inc. +% $Revision: 1.1 $ $Date: 2004/08/23 14:32:11 $ + +if nargin<2, error('Not enough arguments.'); end +if ~isstruct(Tree) | ~isfield(Tree,'method') + error('First argument must be a decision tree.'); +end +if ~isstr(TorCorR) | ~(TorCorR(1)=='t' | TorCorR(1)=='c' | TorCorR=='r') + error('Second argument must be ''test'', ''crossvalidate'', or ''resubstitution''.'); +end +if TorCorR(1)=='t' & nargin<4 + error('Not enough arguments. Need X and Y for the test sample.'); +elseif TorCorR(1)=='c' & nargin<4 + error('Not enough arguments. Need X and Y from the learning sample.'); +end +doclass = isequal(Tree.method,'classification'); +if TorCorR(1)~='r' + if ~ischar(Y) & prod(size(Y))~=length(Y) + error('Y must be a vector.'); + else + if iscell(Y) | isnumeric(Y) + n = length(Y); + else + n = size(Y,1); + end + if size(X,1)~=n + error('There must be one Y value for each row of X.'); + end + end +end + +okargs = {'nsamples' 'treesize'}; +defaults = {10 'se'}; +[emsg ncv treesize] = statgetargs(okargs,defaults,varargin{:}); +error(emsg); + +if ~isnumeric(ncv) | prod(size(ncv))~=1 | ncv<2 | ncv~=round(ncv) + error('Value of ''nsamples'' argument must be an integer 2 or larger.'); +end +if ~isstr(treesize) | ~(treesize(1)=='s' | treesize(1)=='m') + error('Value of ''treesize'' argument must be ''se'' or ''min''.'); +end + +% Get complexity parameters for all pruned subtrees +if ~isfield(Tree,'alpha') + Tree = treeprune(Tree); +end + +% Remove missing values +if nargin>=4 + t = any(isnan(X),2); + if isequal(Tree.method,'classification') + Yold = Y; + Y = classname2id(Y,Tree.classname); + if any(Y==0) + bad = find(Y==0); + bad = Yold(bad(1)); + if isnumeric(bad) + bad = num2str(bad); + elseif iscell(bad) + bad = bad{1}; + end + error(sprintf(... + 'At least one Y value (''%s'') is incompatible with the tree.',... + bad)); + end + end + + t = t | isnan(Y); + if any(t) + X(t,:) = []; + Y(t,:) = []; + end +end + +% Do proper type of testing (error estimation) +switch(TorCorR(1)) + case 't', [cost,secost] = testtree(Tree,X,Y); + case 'c', [cost,secost] = cvtree(Tree,X,Y,w,ncv); + case 'r', [cost,secost] = resubinfo(Tree); treesize = 'm'; +end + +cost = cost(:); +secost = secost(:); +if nargout>=3 + ntnodes = Tree.ntermnodes(:); +end +if nargout>=4 + bestlevel = selecttree(cost,secost,treesize(1)) - 1; +end + +% --------------------------------------------------------- +function [resuberr,seresub] = resubinfo(Tree) +%RESUBINFO Compute error rates for tree using resubstitution error. + +% Get complexity parameters for all pruned subtrees +nsub = 1+max(Tree.prunelist); + +% Get error rate for each subtree in this sequence +resuberr = zeros(nsub,1); +for j=1:nsub; + Tj = treeprune(Tree,'level',j-1); + leaves = Tj.node(Tj.var==0); + resuberr(j) = sum(Tj.risk(leaves)); +end +seresub = zeros(size(resuberr)); + +% --------------------------------------------------------------- +function [testerr,seerr] = testtree(Tree,X,id) +%TESTTREE Compute error rates for tree using test sample. +% The id variable is the class id for classification, or the y variable +% for regression. + +% Get pruning sequence and compute fitted values for the whole sequence +nsub = 1 + max(Tree.prunelist); +yfit = treeval(Tree,X,(0:nsub-1)); + +doclass = isequal(Tree.method,'classification'); +if doclass % get info required for classification + nclasses = Tree.nclasses; + cost = Tree.cost; + prior = Tree.prior(:); + if isempty(prior) + prior = Tree.classcount(1,:)' / Tree.nodesize(1); + end + Njtest = histc(id,1:nclasses); + adjprior = (prior ./ max(eps,Njtest))'; +end + +% Compute error statistics +if doclass + testerr = zeros(nsub,1); + seerr = zeros(nsub,1); + for k=nsub:-1:1; + % M(i,j) counts class i items classified as class j + M = full(sparse(id,yfit(:,k),1,nclasses,nclasses)); + + % Compute loss for this classification + loss = sum(cost .* M, 2); + losssq = sum(cost.^2 .* M, 2); + s2 = losssq - loss.^2 ./ Njtest; + + testerr(k) = adjprior * loss; + seerr(k) = sqrt(adjprior.^2 * s2); + end +else + N = size(X,1); + E = (yfit - repmat(id,1,size(yfit,2))).^2; + testerr = mean(E,1); + s2 = sum((E - repmat(testerr,size(E,1),1)).^2,1) / N; + seerr = sqrt(s2/N); +end + +% --------------------------------------------------------------- +function [cverr,secverr] = cvtree(Tree,X,id,w,ncv) +%CVTREE Compute error rates for tree using cross-validaiton. +% [CVERR,SECVERR] = CVTREE(TREE,X,ID,NCV) + +% Get geometric means of the alpha boundary points +alpha = Tree.alpha; +avgalpha = [sqrt(alpha(1:end-1) .* alpha(2:end)); Inf]; + +% Loop over cross-validation samples +N = size(X,1); +ntrees = length(avgalpha); +cverr = zeros(ntrees,1); +secverr = zeros(ntrees,1); +cvid = 1 + mod((1:N),ncv); + +doclass = isequal(Tree.method,'classification'); +if doclass + % Use a random permutation with fixed category proportions + idrand = id + rand(size(id)); + [stdid,idx] = sort(idrand); + cvid = cvid(idx); + args = {'prior',Tree.prior, 'cost',Tree.cost, 'prune','on'}; +else + % Use a random permutation with fixed numbers per cross-validation sample + cvid = cvid(randperm(N)); + args = {'prune','on'}; +end + +% Get predicted values using cross-validation samples +cvclass = zeros(N,ntrees); +for j=1:ncv + % Use jth group as a test, train on the others + testrows = (cvid == j); + trainrows = ~testrows; + testsize = sum(testrows); + + % Get a sequence of pruned trees for the training set + Tj = treefitw(X(trainrows,:),id(trainrows),w(trainrows),0,... + 'method',Tree.method, 'catidx',Tree.catcols, args{:}); + + % Get classifications based on each subtree that we require + treesneeded = findsubtree(Tj,avgalpha); + cvclass(testrows,:) = treeval(Tj,X(testrows,:),treesneeded-1); +end + +% Compute output statistics based on those predictions +if doclass + Nj = Tree.classcount(1,:)'; + prior = Tree.prior; + if isempty(prior) + prior = Nj' / N; + end + adjprior = (prior ./ Nj'); + nclasses = length(prior); + cost = Tree.cost; + sz = size(cost); + w = w / sum(w); + for k=1:ntrees + loss = sum((cvclass(:, k)~=id).*w); + losssq = sum(((cvclass(:, k)~=id).^2).*w); + s2 = losssq - loss.^2; + cverr(k) = loss; + secverr(k) = sqrt(s2); + %M = full(sparse(id,cvclass(:,k),1,nclasses,nclasses)); + %loss = sum(cost .* M, 2); + %losssq = sum(cost.^2 .* M, 2); + %s2 = losssq - loss.^2 ./ Nj; + %cverr(k) = adjprior * loss; + %secverr(k) = sqrt(adjprior.^2 * s2); + end +else + E = (cvclass - repmat(id,1,size(cvclass,2))).^2; + cverr = mean(E,1); + s2 = sum((E - repmat(cverr,size(E,1),1)).^2,1) / N; + secverr = sqrt(s2/N); +end + +% ---------------------------- +function k = findsubtree(Tree,alpha0) +%FINDSUBTREE Find subtree corresponding to specified complexity parameters. + +adjfactor = 1 + 100*eps; +alpha = Tree.alpha; +k = zeros(size(alpha0)); +for j=1:length(alpha0); + k(j) = sum(alpha <= alpha0(j)*adjfactor); +end + +% ----------------------------- +function bestj = selecttree(allalpha,sealpha,treesize) +%SELECTTREE Select the best tree from error rates using some criterion. + +% Find the smallest tree that gives roughly the minimum error +[minerr,minloc] = min(allalpha); +if isequal(treesize(1),'m') + cutoff = minerr * (1 + 100*eps); +else + cutoff = minerr + sealpha(minloc); +end +j = find(allalpha <= cutoff); +bestj = j(end); + +% ----------------------------- +function idvec = classname2id(idnames,cnames) +%CLASSNAME2ID Create vector of numeric indices from class name array. + +idvec = zeros(length(idnames),1); +if isnumeric(idnames), idnames = cellstr(num2str(idnames)); end +for j=1:length(cnames) + idvec(strcmp(cnames(j),idnames)) = j; +end + +t = find(idvec==0); +if ~isempty(t) + txt = idnames(t,:); + if ischar(txt) + txt = cellstr(txt); + end + idvec(t(cellfun('isempty',txt))) = NaN; +end diff --git a/compile.m b/compile.m new file mode 100644 index 0000000..c2037a0 --- /dev/null +++ b/compile.m @@ -0,0 +1,23 @@ +cd ./mex +mex mexFeatureDistance.cpp +mex mexLBP.cpp +cd .. + +cd ./segmentation/segment +mex mexSegment.cpp +cd ../.. + +cd ./randomforest-matlab/RF_Reg_C +mex src/cokus.cpp src/reg_RF.cpp src/mex_regressionRF_train.cpp -DMATLAB -output mexRF_train +mex src/cokus.cpp src/reg_RF.cpp src/mex_regressionRF_predict.cpp -DMATLAB -output mexRF_predict +cd ../.. + +cd ./multi-segmentation +mex mexMergeAdjRegs_Felzenszwalb.cpp +%mex mexMergeAdjacentRegions.cpp +%mex mexMergeAdjacentRegions2.cpp +cd .. + +cd ./randomforest-matlab\RF_Reg_C\compress +mex mexCharArray2DoubleArray.cpp +mex mexDoubleArray2CharArray.cpp \ No newline at end of file diff --git a/data/1_45_45397.png b/data/1_45_45397.png new file mode 100644 index 0000000..1c84cc9 Binary files /dev/null and b/data/1_45_45397.png differ diff --git a/demo.m b/demo.m new file mode 100644 index 0000000..d8fe015 --- /dev/null +++ b/demo.m @@ -0,0 +1,19 @@ +addpath(genpath('.')); + +image_name = './data/1_45_45397.png'; +image = imread( image_name ); + +para = makeDefaultParameters; + +% acclerate using the parallel computing +% matlabpool + +t = tic; +smap = drfiGetSaliencyMap( image, para ); +time_cost = toc(t); +fprintf( 'time cost for saliency computation using DRFI approach: %.3f\n', time_cost ); + +subplot('121'); +imshow(image); +subplot('122'); +imshow(smap); \ No newline at end of file diff --git a/drfiGetSaliencyMap.m b/drfiGetSaliencyMap.m new file mode 100644 index 0000000..9b21b62 --- /dev/null +++ b/drfiGetSaliencyMap.m @@ -0,0 +1,72 @@ +% Compute saliency map using the Discriminative Regional Feature +% Integration approach. +function smap = drfiGetSaliencyMap( image, para ) + % input + % image rgb image with type of uint8, can be got using imread + % para parameters including the number of segmentations, + % saliency fusion weight, and saliency regressor + + % smap saliency map with type of uint8 + + num_segmentation = para.num_segmentation; + w = para.w; + seg_para_mat = para.seg_para; + segment_saliency_regressor = para.segment_saliency_regressor; + + [imh, imw, imc] = size( image ); + smap_mat = zeros(imh, imw, num_segmentation); + + % prepare data for feature generation + imdata = drfiGetImageData( image ); + + % data of the pesudo-background + pbgdata = drfiGetPbgFeat( imdata ); + + for p = 1 : num_segmentation + % segmentation + imsegs = im2superpixels( image, 'pedro', seg_para_mat(p, :) ); + + % data of each superpixel + spdata = drfiGetSuperpixelData( imdata, imsegs ); + + % saliency feature of each segment (region) + sp_sal_data = drfiGetRegionSaliencyFeature( imsegs, spdata, imdata, pbgdata ); + + % run regression for each segment (region) using the random forest + sp_sal_prob = regRF_predict( sp_sal_data, segment_saliency_regressor ); + + % propagate saliency of segments (regions) to pixels + smap_mat(:, :, p) = spSaliency2Pixels( sp_sal_prob, imsegs ) * w( p ); + end + + % multi-level saliency fusion + smap = sum(smap_mat, 3); + + % normalization + smap = (smap - min(smap(:))) / (max(smap(:)) - min(smap(:)) + eps) * 255; + smap = uint8(smap); +end + +function temp_smap = spSaliency2Pixels( sp_sal_prob, imsegs, enhance ) + if nargin < 3 + enhance = true; + end + + % normalization + sp_sal_prob = (sp_sal_prob - min(sp_sal_prob)) /... + (max(sp_sal_prob) - min(sp_sal_prob) + eps); + + % enhance the difference between salient and background regions + if enhance + sp_sal_prob = exp( 1.25 * sp_sal_prob ); + sp_sal_prob = (sp_sal_prob - min(sp_sal_prob)) /... + (max(sp_sal_prob) - min(sp_sal_prob) + eps); + end + + % assign the saliency value of each segment to its contained pixels + spstats = regionprops( imsegs.segimage, 'PixelIdxList' ); + temp_smap = zeros( size(imsegs.segimage) ); + for r = 1 : length(spstats) + temp_smap( spstats(r).PixelIdxList ) = sp_sal_prob( r ); + end +end \ No newline at end of file diff --git a/drfiGetSaliencyMapSingleLevel.m b/drfiGetSaliencyMapSingleLevel.m new file mode 100644 index 0000000..24adb89 --- /dev/null +++ b/drfiGetSaliencyMapSingleLevel.m @@ -0,0 +1,62 @@ +% Compute saliency map using the Discriminative Regional Feature +% Integration approach. +function smap = drfiGetSaliencyMapSingleLevel( image, segment_saliency_regressor, sigma, k, min_size ) + % input + % image rgb image with type of uint8, can be got using imread + % segment_saliency_regressor random forest regressor + % sigma + % k + % min_size parameters including the number of segmentations, + % saliency fusion weight, and saliency regressor + + % smap saliency map with type of uint8 + + + % prepare data for feature generation + imdata = drfiGetImageData( image ); + + % data of the pesudo-background + pbgdata = drfiGetPbgFeat( imdata ); + + seg_para = [sigma, k, min_size]; + + imsegs = im2superpixels( image, 'pedro', seg_para ); + + % data of each superpixel + spdata = drfiGetSuperpixelData( imdata, imsegs ); + + % saliency feature of each segment (region) + sp_sal_data = drfiGetRegionSaliencyFeature( imsegs, spdata, imdata, pbgdata ); + + % run regression for each segment (region) using the random forest + sp_sal_prob = regRF_predict( sp_sal_data, segment_saliency_regressor ); + + % propagate saliency of segments (regions) to pixels + smap = spSaliency2Pixels( sp_sal_prob, imsegs ); + + smap = uint8(smap * 255); +end + +function temp_smap = spSaliency2Pixels( sp_sal_prob, imsegs, enhance ) + if nargin < 3 + enhance = true; + end + + % normalization + sp_sal_prob = (sp_sal_prob - min(sp_sal_prob)) /... + (max(sp_sal_prob) - min(sp_sal_prob) + eps); + + % enhance the difference between salient and background regions + if enhance + sp_sal_prob = exp( 1.25 * sp_sal_prob ); + sp_sal_prob = (sp_sal_prob - min(sp_sal_prob)) /... + (max(sp_sal_prob) - min(sp_sal_prob) + eps); + end + + % assign the saliency value of each segment to its contained pixels + spstats = regionprops( imsegs.segimage, 'PixelIdxList' ); + temp_smap = zeros( size(imsegs.segimage) ); + for r = 1 : length(spstats) + temp_smap( spstats(r).PixelIdxList ) = sp_sal_prob( r ); + end +end \ No newline at end of file diff --git a/drfi_saliency_feature/change_log.txt b/drfi_saliency_feature/change_log.txt new file mode 100644 index 0000000..7b00419 --- /dev/null +++ b/drfi_saliency_feature/change_log.txt @@ -0,0 +1,2 @@ +2014-01-02 +Scripts in feature folder are latest version which is not relying on the texton, and LBP feature (for texture description) is added \ No newline at end of file diff --git a/drfi_saliency_feature/drfiGetImageData.m b/drfi_saliency_feature/drfiGetImageData.m new file mode 100644 index 0000000..6a83341 --- /dev/null +++ b/drfi_saliency_feature/drfiGetImageData.m @@ -0,0 +1,119 @@ +function imdata = drfiGetImageData( image ) + g = fspecial('gaussian', 5); + image = imfilter(image, g, 'same'); + + image_rgb = im2double( image ); +% image_rgb = rgb2luv( image ); +% image_rgb(:,:,1) = image_rgb(:,:,1) / 100; +% image_rgb(:,:,2) = (image_rgb(:,:,1) + 100) / 300; +% image_rgb(:,:,3) = (image_rgb(:,:,1) + 140) / 260; + + image_lab = rgb2lab( image_rgb ); + image_lab(:,:,1) = image_lab(:,:,1) / 100; + image_lab(:,:,2) = image_lab(:,:,2) / 220 + 0.5; + image_lab(:,:,3) = image_lab(:,:,3) / 220 / 0.5; + + image_hsv = rgb2hsv( image_rgb ); + imdata.image_rgb = image_rgb; + imdata.image_lab = image_lab; + imdata.image_hsv = image_hsv; + + [imh, imw, imc] = size( image_rgb ); + imdata.imh = imh; + imdata.imw = imw; + + RGB_bins = [8, 8, 8]; + nRGBHist = prod( RGB_bins ); + + Lab_bins = [8, 8, 8]; + nLabHist = prod(Lab_bins); + + HSV_bins = [8, 8, 8]; + nHSVHist = prod( HSV_bins ); + + ntexthist = 15; + nloc = 8; % mean x-y, 10th, 90th percentile x-y, w/h, area + filtext = makeLMfilters; + ntext = size(filtext, 3); + + imdata.nRGBHist = nRGBHist; + imdata.nLabHist = nLabHist; + imdata.nHSVHist = nHSVHist; + + imdata.nRGB = 3; + imdata.nLab = 3; + imdata.nHSV = 3; + + imdata.ntexthist = ntexthist; + imdata.nloc = nloc; + + imdata.ntext = ntext; + + imdata.nlbp = 256; % [0, 255] + + % RGB histogram + R = image_rgb(:,:,1); + G = image_rgb(:,:,2); + B = image_rgb(:,:,3); + + rr = min( floor(R*RGB_bins(1)) + 1, RGB_bins(1) ); + gg = min( floor(G*RGB_bins(2)) + 1, RGB_bins(2) ); + bb = min( floor(B*RGB_bins(3)) + 1, RGB_bins(3) ); + Q_rgb = (rr-1) * RGB_bins(2) * RGB_bins(3) + ... + (gg-1) * RGB_bins(3) + ... + bb + 1; + + % Lab histogram + L = image_lab(:,:,1); + a = image_lab(:,:,2); + b = image_lab(:,:,3); + + ll = min(floor(L/(1/Lab_bins(1))) + 1, Lab_bins(1)); + aa = min(floor((a)/(1/Lab_bins(2))) + 1, Lab_bins(2)); + bb = min(floor((b)/(1/Lab_bins(3))) + 1, Lab_bins(3)); + Q_lab = (ll-1) * Lab_bins(2) * Lab_bins(3) + ... + (aa-1) * Lab_bins(3) + ... + bb + 1; + + % HSV histogram + H = image_hsv(:,:,1); + % H(H >= 0.5) = 1- H(H >= 0.5); + S = image_hsv(:,:,2); + V = image_hsv(:,:,3); + + hh = min( floor(H*HSV_bins(1)) + 1, HSV_bins(1) ); + ss = min( floor(S*HSV_bins(2)) + 1, HSV_bins(2) ); + vv = min( floor(V*HSV_bins(3)) + 1, HSV_bins(3) ); + + Q_hsv = (hh-1) * HSV_bins(2) * HSV_bins(3) + ... + (ss-1) * HSV_bins(3) + ... + vv + 1; + + imdata.Q_rgb = Q_rgb; + imdata.Q_lab = Q_lab; + imdata.Q_hsv = Q_hsv; + + % texture - response of filter bank + grayim = rgb2gray( image ); + imtext = zeros([imh imw ntext]); + for f = 1:ntext + response = abs(imfilter(im2single(grayim), filtext(:, :, f), 'same')); + response = (response - min(response(:))) / (max(response(:)) - min(response(:)) + eps); + imtext(:, :, f) = response; + end + [dummy, texthist] = max(imtext, [], 3); + imdata.imtext = imtext; + imdata.texthist = texthist; + + % texture - LBP + imlbp = mexLBP( grayim ); + imdata.imlbp = double( imlbp ); + + % location + yim = 1-repmat(((0:imh-1)/(imh-1))', 1, imw); + xim = repmat(((0:imw-1)/(imw-1)), imh, 1); + + imdata.xim = xim; + imdata.yim = yim; + + imdata.hist_type = 'x2'; diff --git a/drfi_saliency_feature/drfiGetPbgFeat.m b/drfi_saliency_feature/drfiGetPbgFeat.m new file mode 100644 index 0000000..f80ea4d --- /dev/null +++ b/drfi_saliency_feature/drfiGetPbgFeat.m @@ -0,0 +1,82 @@ +function pbgdata = drfiGetPbgFeat( imdata ) + % get probable background feature + % the probable background is estimated as the border near image edge + % there are 497 features in total ... + % abs mean R, G, B values 3 + % RGB histogram + % abs mean L, a, b values 3 + % L*a*b* histogram 8*16*16 + % abs mean H, S, V values + % HSV histogram + % abs diff mean texture response 15 + % maximum texture response histogram 15 + % texton histogram + % location 8 + + borderwidth = floor( 15 * max(imdata.imh, imdata.imw) / 400 ); + + % get pixels in the probable background + [h w c] = size( imdata.image_rgb ); + pixels = [1 : h * borderwidth]'; + pixels = [pixels; [h*w : -1 : (h*w - borderwidth*h +1)]']; + n = 16 : w - 15; + y1 = 1 : 15; + y2 = h-14 : h; + [nn1 yy1] = meshgrid( n, y1 ); + ny1 = (nn1 - 1) * h + yy1; + pixels = [pixels; ny1(:)]; + [nn2 yy2] = meshgrid( n, y2 ); + ny2 = (nn2 - 1) * h + yy2; + pixels = [pixels; ny2(:)]; + +% segimage = imsegs.segimage; +% spind = unique(segimage(pixels)); +% +% pixels = []; +% for ix = 1 : length(spind) +% pixels = [pixels; imdata.spstats(spind(ix)).PixelIdxList]; +% end + pbgdata.RGBHist = zeros(imdata.nRGBHist, 1); + + pbgdata.LabHist = zeros(imdata.nLabHist, 1); + + pbgdata.HSVHist = zeros(imdata.nHSVHist, 1); + + pbgdata.texture = zeros(imdata.ntext, 1); + + pbgdata.textureHist = zeros(imdata.ntext, 1); + + pbgdata.lbpHist = zeros(imdata.nlbp, 1); + + pbgdata.R = mean( imdata.image_rgb(pixels) ); + pbgdata.G = mean( imdata.image_rgb(pixels + w * h) ); + pbgdata.B = mean( imdata.image_rgb(pixels + w * h * 2) ); + + pbgdata.RGBHist = hist( imdata.Q_rgb(pixels), 1:imdata.nRGBHist )'; + pbgdata.RGBHist = pbgdata.RGBHist / max( sum(pbgdata.RGBHist), eps ); + + pbgdata.L = mean( imdata.image_lab(pixels) ); + pbgdata.a = mean( imdata.image_lab(pixels + w * h) ); + pbgdata.b = mean( imdata.image_lab(pixels + w * h * 2) ); + + pbgdata.LabHist = hist( imdata.Q_lab(pixels), 1:imdata.nLabHist )'; + pbgdata.LabHist = pbgdata.LabHist / max( sum(pbgdata.LabHist), eps ); + + pbgdata.H = mean( imdata.image_hsv(pixels) ); + pbgdata.S = mean( imdata.image_hsv(pixels + w * h) ); + pbgdata.V = mean( imdata.image_hsv(pixels + w * h * 2) ); + + pbgdata.HSVHist = hist( imdata.Q_hsv(pixels), 1:imdata.nHSVHist )'; + pbgdata.HSVHist = pbgdata.HSVHist / max( sum(pbgdata.HSVHist), eps ); + + pbgdata.texture = zeros(imdata.ntext, 1); + for ix = 1 : imdata.ntext + pbgdata.texture(ix, 1) = mean( imdata.imtext(pixels + (ix-1) * w * h) ); + end + + pbgdata.textureHist = hist( imdata.texthist(pixels), 1:imdata.ntext )'; + pbgdata.textureHist = pbgdata.textureHist / max( sum(pbgdata.textureHist), eps ); + + pbgdata.lbpHist = hist( imdata.imlbp(pixels), 0:255 )'; + pbgdata.lbpHist = pbgdata.lbpHist / max( sum(pbgdata.lbpHist), eps ); +end \ No newline at end of file diff --git a/drfi_saliency_feature/drfiGetRegionSaliencyFeature.m b/drfi_saliency_feature/drfiGetRegionSaliencyFeature.m new file mode 100644 index 0000000..5d82c20 --- /dev/null +++ b/drfi_saliency_feature/drfiGetRegionSaliencyFeature.m @@ -0,0 +1,148 @@ +function feat = drfiGetRegionSaliencyFeature( imsegs, spdata, imdata, pbgdata ) + % hopefully, this will be much faster than the version of cvpr 2013 + nseg = imsegs.nseg; + + iDim = 29 * 2 + 35; + feat = zeros( nseg, iDim ); + + spstats = regionprops( imsegs.segimage, 'Centroid', 'PixelIdxList', 'Area', 'Perimeter' ); + + adjmat = double( imsegs.adjmat ) .* (1 - eye(nseg, nseg)); + + r = double( imdata.image_rgb(:,:,1) ); + g = double( imdata.image_rgb(:,:,2) ); + b = double( imdata.image_rgb(:,:,3) ); + L = imdata.image_lab(:,:,1); + a = imdata.image_lab(:,:,2); + bb = imdata.image_lab(:,:,3); + h = imdata.image_hsv(:,:,1); + s = imdata.image_hsv(:,:,2); + v = imdata.image_hsv(:,:,3); + + [imh imw] = size(r); + + position = zeros(nseg, 2); + area = zeros(1, nseg); + + for ix = 1 : length(spstats) + position(ix, :) = (spstats(ix).Centroid); + area(ix) = spstats(ix).Area; + end + position = position / max(imh, imw); + + area_weight = repmat(area, [nseg, 1]) .* adjmat; + adj_area = sum(area_weight); + area_weight = area_weight ./ repmat(sum(area_weight, 2) + eps, [1, nseg]); + + sp = 1 / 0.4;%0.5 / ( 0.25 * 0.25 ); + dp = mexFeatureDistance( position', [], 'L2' ); + dist_weight = exp( -sp * dp ); + + feat_dist_mat = zeros(nseg, nseg, 29); + + % mean R, G, B distance, and x2 distance of RGB histogram + feat_dist_mat(:,:,1) = mexFeatureDistance(spdata.R, [], 'L1'); + feat_dist_mat(:,:,2) = mexFeatureDistance(spdata.G, [], 'L1'); + feat_dist_mat(:,:,3) = mexFeatureDistance(spdata.B, [], 'L1'); + feat_dist_mat(:,:,4) = mexFeatureDistance(spdata.RGBHist, [], 'x2'); + + % mean L, a, b distance, and x2 distance of Lab histogram + feat_dist_mat(:,:,5) = mexFeatureDistance(spdata.L, [], 'L1'); + feat_dist_mat(:,:,6) = mexFeatureDistance(spdata.a, [], 'L1'); + feat_dist_mat(:,:,7) = mexFeatureDistance(spdata.b, [], 'L1'); + feat_dist_mat(:,:,8) = mexFeatureDistance(spdata.LabHist, [], 'x2'); + + % mean H, S, V distance, and x2 distance of HSV histogram + feat_dist_mat(:,:,9) = mexFeatureDistance(spdata.H, [], 'L1'); + feat_dist_mat(:,:,10) = mexFeatureDistance(spdata.S, [], 'L1'); + feat_dist_mat(:,:,11) = mexFeatureDistance(spdata.V, [], 'L1'); + feat_dist_mat(:,:,12) = mexFeatureDistance(spdata.HSVHist, [], 'x2'); + + for ix = 1 : imdata.ntext + feat_dist_mat(:,:,12+ix) = mexFeatureDistance(spdata.texture(ix,:), [], 'L1'); + end + + feat_dist_mat(:,:,28) = mexFeatureDistance(spdata.textureHist, [], 'x2'); + + feat_dist_mat(:,:,29) = mexFeatureDistance(spdata.lbpHist, [], 'x2'); + + % regional contrast + for ix = 1 : 29 + % feat(:, ix) = sum(feat_dist_mat(:,:,ix) .* area_weight, 2); + feat(:, ix) = sum(feat_dist_mat(:,:,ix) .* dist_weight, 2) ./ (sum(dist_weight, 2) + eps); + end + + % regional backgroundness + dim = 29; + feat(:, dim + 1) = abs( spdata.R - pbgdata.R ); + feat(:, dim + 2) = abs( spdata.G - pbgdata.G ); + feat(:, dim + 3) = abs( spdata.B - pbgdata.B ); + + feat(:, dim + 4) = hist_dist( spdata.RGBHist, repmat(pbgdata.RGBHist, [1 nseg]), 'x2' ); + + feat(:, dim + 5) = abs( spdata.L - pbgdata.L ); + feat(:, dim + 6) = abs( spdata.a - pbgdata.a ); + feat(:, dim + 7) = abs( spdata.b - pbgdata.b ); + + feat(:, dim + 8) = hist_dist( spdata.LabHist, repmat(pbgdata.LabHist, [1 nseg]), 'x2' ); + + feat(:, dim + 9) = abs( spdata.H - pbgdata.H ); + feat(:, dim + 10) = abs( spdata.S - pbgdata.S ); + feat(:, dim + 11) = abs( spdata.V - pbgdata.V ); + + feat(:, dim + 12) = hist_dist( spdata.HSVHist, repmat(pbgdata.HSVHist, [1 nseg]), 'x2' ); + + for ift = 1 : imdata.ntext + feat(:, dim + 12 + ift) = abs( spdata.texture(ift, :) - pbgdata.texture(ift) ); + end + + feat(:, dim + 28) = hist_dist( spdata.textureHist, repmat(pbgdata.textureHist, [1 nseg]), 'x2' ); + + feat(:, dim + 29) = hist_dist( spdata.lbpHist, repmat(pbgdata.lbpHist, [1 nseg]), 'x2' ); + + ii = 29 * 2; + + % regional property + for reg = 1 : nseg + pixels = spstats(reg).PixelIdxList; + + xvals = imdata.xim( pixels ); + yvals = imdata.yim( pixels ); + + sx = sort( xvals ); + sy = sort( yvals ); + + LEFT = sx( ceil(numel(sx)/10) ); + TOP = sy( ceil(numel(sy)/10) ); + RIGHT = sx( ceil(numel(sx)/10*9) ); + BOTTOM = sy( ceil(numel(sy)/10*9) ); + + ix = ii; + feat(reg, ix+1) = mean( xvals ); + feat(reg, ix+2) = mean( yvals ); + + feat(reg, ix+3) = LEFT; + feat(reg, ix+4) = TOP; + feat(reg, ix+5) = RIGHT; + feat(reg, ix+6) = BOTTOM; + + feat(reg, ix+7) = spstats(reg).Perimeter / (imw + imh); + + feat(reg, ix+8) = (RIGHT - LEFT) / (BOTTOM - TOP + eps); % aspect ratio + + feat(reg, ix+8+(1:3)) = [var( r(pixels) ), var( g(pixels) ), var( b(pixels) )]; + feat(reg, ix+11+(1:3)) = [var( L(pixels) ), var( a(pixels) ), var( bb(pixels) )]; + feat(reg, ix+14+(1:3)) = [var( h(pixels) ), var( s(pixels) ), var( v(pixels) )]; + + for it = 1 : imdata.ntext + temp_text = imdata.imtext(:,:,it); + feat(reg, ix+17+it) = var( temp_text(pixels) ); + end + + feat(reg, ix+33) = var( imdata.imlbp(pixels) ); + + feat(reg, ix+34) = length(pixels) / imdata.imh / imdata.imw; % area + + feat(reg, ix+35) = adj_area(reg) / imdata.imh / imdata.imw; % area of neighbor + end +end \ No newline at end of file diff --git a/drfi_saliency_feature/drfiGetSameLabelFeat.m b/drfi_saliency_feature/drfiGetSameLabelFeat.m new file mode 100644 index 0000000..ee03f63 --- /dev/null +++ b/drfi_saliency_feature/drfiGetSameLabelFeat.m @@ -0,0 +1,95 @@ +function [edata, imdata] = drfiGetSameLabelFeat( imsegs, spdata, pbgdata, imdata ) + imh = imdata.imh; + imw = imdata.imw; + + % boundmap = imdata.boundmap; + [boundmap perim] = mcmcGetSuperpixelBoundaries_fast( imsegs ); + + boundx = cell(size(boundmap)); + boundy = cell(size(boundmap)); + + for ix = 1 : numel(boundmap) + [boundy{ix}, boundx{ix}] = ind2sub([imh, imw], boundmap{ix}); +% boundy{ix} = mod(boundmap{ix}, imh) + 1; +% boundx{ix} = ceil(boundmap{ix} / imh); + end + + nadj = 0; + for s1 = 1:imsegs.nseg + nadj = nadj + numel(find(perim(s1, s1+1:end)>0)); + end + + adjlist = zeros(nadj, 2); + c = 0; + for s1 = 1:imsegs.nseg + ns1 = numel(find(perim(s1, s1+1:end)>0)); + adjlist(c+1:c+ns1, 1) = s1; + adjlist(c+1:c+ns1, 2) = s1 + find(perim(s1, s1+1:end)>0); + c = c + ns1; + end + + imdata.adjlist = adjlist; + imdata.spstats = regionprops( imsegs.segimage, 'PixelIdxList' ); + + % superpixel saliency + sp_saliency = drfiGetRegionSaliencyFeature(imsegs, spdata, imdata, pbgdata); + +% texton_id = [29 58]; +% sp_saliency(:, texton_id) = []; + + saliency_dim = size(sp_saliency, 2); + + edata = zeros(nadj, 2 * saliency_dim + 29 + 7); + + for k = 1 : nadj + s1 = adjlist(k, 1); + s2 = adjlist(k, 2); + + % saliency for s1 + edata(k, 1:saliency_dim) = sp_saliency(s1, :); + % saliency for s2 + edata(k, saliency_dim+(1:saliency_dim)) = sp_saliency(s2, :); + + dim = 2 * saliency_dim; + % superpixel contrast + edata(k, dim + 1) = abs( spdata.R(s1) - spdata.R(s2) ); + edata(k, dim + 2) = abs( spdata.G(s1) - spdata.G(s2) ); + edata(k, dim + 3) = abs( spdata.B(s1) - spdata.B(s2) ); + + edata(k, dim + 4) = hist_dist( spdata.RGBHist(:,s1), spdata.RGBHist(:,s2), 'x2' ); + + edata(k, dim + 5) = abs( spdata.L(s1) - spdata.L(s2) ); + edata(k, dim + 6) = abs( spdata.a(s1) - spdata.a(s2) ); + edata(k, dim + 7) = abs( spdata.b(s1) - spdata.b(s2) ); + + edata(k, dim + 8) = hist_dist( spdata.LabHist(:,s1), spdata.LabHist(:,s2), 'x2' ); + + edata(k, dim + 9) = abs( spdata.H(s1) - spdata.H(s2) ); + edata(k, dim + 10) = abs( spdata.S(s1) - spdata.S(s2) ); + edata(k, dim + 11) = abs( spdata.V(s1) - spdata.V(s2) ); + + edata(k, dim + 12) = hist_dist( spdata.HSVHist(:,s1), spdata.HSVHist(:,s2), 'x2' ); + + for ift = 1 : imdata.ntext + edata(k, dim + 12 + ift) = abs( spdata.texture(ift, s1) - spdata.texture(ift, s2) ); + end + + edata(k, dim + 28) = hist_dist( spdata.textureHist(:,s1), spdata.textureHist(:,s2), 'x2' ); + + edata(k, dim + 29) = hist_dist( spdata.lbpHist(:, s1), spdata.lbpHist(:,s2), 'x2' ); + + % boundary geometry + x = boundx{s1, s2} / imw; + y = boundy{s1, s2} / imh; + sx = sort(x); + sy = sort(y); + + edata(k, dim+30) = mean(x); + edata(k, dim+31) = mean(y); + edata(k, dim+32) = sx(ceil(numel(sx)/10)); + edata(k, dim+33) = sx(ceil(9*numel(sx)/10)); + edata(k, dim+34) = sy(ceil(numel(sy)/10)); + edata(k, dim+35) = sy(ceil(9*numel(sy)/10)); + edata(k, dim+36) = length(x) / (imh + imw); + end +end \ No newline at end of file diff --git a/drfi_saliency_feature/drfiGetSuperpixelData.m b/drfi_saliency_feature/drfiGetSuperpixelData.m new file mode 100644 index 0000000..5d6c79f --- /dev/null +++ b/drfi_saliency_feature/drfiGetSuperpixelData.m @@ -0,0 +1,78 @@ +function spdata = drfiGetSuperpixelData( imdata, imsegs ) + spstats = regionprops( imsegs.segimage, 'PixelIdxList' ); + + image_rgb = imdata.image_rgb; + image_lab = imdata.image_lab; + image_hsv = imdata.image_hsv; + + Q_rgb = imdata.Q_rgb; + Q_lab = imdata.Q_lab; + Q_hsv = imdata.Q_hsv; + + imtext = imdata.imtext; + texthist = imdata.texthist; + imlbp = imdata.imlbp; + + imw = imdata.imw; + imh = imdata.imh; + + nseg = imsegs.nseg; + + spdata.R = zeros(1, nseg); + spdata.G = zeros(1, nseg); + spdata.B = zeros(1, nseg); + + spdata.RGBHist = zeros(imdata.nRGBHist, nseg); + + spdata.L = zeros(1, nseg); + spdata.a = zeros(1, nseg); + spdata.b = zeros(1, nseg); + + spdata.LabHist = zeros(imdata.nLabHist, nseg); + + spdata.H = zeros(1, nseg); + spdata.S = zeros(1, nseg); + spdata.V = zeros(1, nseg); + + spdata.HSVHist = zeros(imdata.nHSVHist, nseg); + + spdata.texture = zeros(imdata.ntext, nseg); + + spdata.textureHist = zeros(imdata.ntext, nseg); + + spdata.lbpHist = zeros(imdata.nlbp, nseg); + + for s = 1 : nseg + pixels = spstats(s).PixelIdxList; + spdata.R(s) = mean( image_rgb(pixels) ); + spdata.G(s) = mean( image_rgb(pixels + imw * imh) ); + spdata.B(s) = mean( image_rgb(pixels + imw * imh * 2) ); + + spdata.RGBHist(:, s) = hist( Q_rgb(pixels), 1:imdata.nRGBHist )'; + spdata.RGBHist(:, s) = spdata.RGBHist(:, s) / max( sum(spdata.RGBHist(:, s)), eps ); + + spdata.L(s) = mean( image_lab(pixels) ); + spdata.a(s) = mean( image_lab(pixels + imw * imh) ); + spdata.b(s) = mean( image_lab(pixels + imw * imh * 2) ); + + spdata.LabHist(:, s) = hist( Q_lab(pixels), 1:imdata.nLabHist )'; + spdata.LabHist(:, s) = spdata.LabHist(:, s) / max( sum(spdata.LabHist(:, s)), eps ); + + spdata.H(s) = mean( image_hsv(pixels) ); + spdata.S(s) = mean( image_hsv(pixels + imw * imh) ); + spdata.V(s) = mean( image_hsv(pixels + imw * imh * 2) ); + + spdata.HSVHist(:, s) = hist( Q_hsv(pixels), 1:imdata.nHSVHist )'; + spdata.HSVHist(:, s) = spdata.HSVHist(:, s) / max( sum(spdata.HSVHist(:, s)), eps ); + + for ift = 1 : imdata.ntext + spdata.texture(ift, s) = mean( imtext(pixels+(ift-1)*imw*imh) ); + end + + spdata.textureHist(:, s) = hist( texthist(pixels), 1:imdata.ntext )'; + spdata.textureHist(:, s) = spdata.textureHist(:, s) / max( sum(spdata.textureHist(:, s)), eps ); + + spdata.lbpHist(:, s) = hist( imlbp(pixels), 0:255 )'; + spdata.lbpHist(:, s) = spdata.lbpHist(:, s) / max( sum(spdata.lbpHist(:, s)), eps ); + end +end \ No newline at end of file diff --git a/drfi_saliency_feature/hist_dist.m b/drfi_saliency_feature/hist_dist.m new file mode 100644 index 0000000..593fd2c --- /dev/null +++ b/drfi_saliency_feature/hist_dist.m @@ -0,0 +1,11 @@ +function diff = hist_dist( hist1, hist2, method ) + switch method + case 'x2' + % diff = 0.5 * sum((hist1 - hist2).^2) ./ sum(hist1 + hist2 + eps); + diff = 0.5 * sum( (hist1 - hist2).^2 ./ (hist1 + hist2 + eps) ); + case 'jsd' % Jensen-Shannon Divergence + diff = 0.5*(sum(hist1.*log((hist1+eps)./(hist2+eps))) + sum(hist2.*log((hist2+eps)./(hist1+eps)))); + otherwise + error( 'unknown type for computing histogram distance' ); + end +end \ No newline at end of file diff --git a/drfi_saliency_feature/rgb2lab.m b/drfi_saliency_feature/rgb2lab.m new file mode 100644 index 0000000..f40be3f --- /dev/null +++ b/drfi_saliency_feature/rgb2lab.m @@ -0,0 +1,64 @@ +function [L,a,b] = rgb2lab(R,G,B) +% function [L, a, b] = RGB2Lab(R, G, B) +% RGB2Lab takes matrices corresponding to Red, Green, and Blue, and +% transforms them into CIELab. This transform is based on ITU-R +% Recommendation BT.709 using the D65 white point reference. +% The error in transforming RGB -> Lab -> RGB is approximately +% 10^-5. RGB values can be either between 0 and 1 or between 0 and 255. +% By Mark Ruzon from C code by Yossi Rubner, 23 September 1997. +% Updated for MATLAB 5 28 January 1998. + +if (nargin == 1) + B = double(R(:,:,3)); + G = double(R(:,:,2)); + R = double(R(:,:,1)); +end + +if ((max(max(R)) > 1.0) | (max(max(G)) > 1.0) | (max(max(B)) > 1.0)) + R = R/255; + G = G/255; + B = B/255; +end + +[M, N] = size(R); +s = M*N; + +% Set a threshold +T = 0.008856; + +RGB = [reshape(R,1,s); reshape(G,1,s); reshape(B,1,s)]; + +% RGB to XYZ +MAT = [0.412453 0.357580 0.180423; + 0.212671 0.715160 0.072169; + 0.019334 0.119193 0.950227]; +XYZ = MAT * RGB; + +X = XYZ(1,:) / 0.950456; +Y = XYZ(2,:); +Z = XYZ(3,:) / 1.088754; + +XT = X > T; +YT = Y > T; +ZT = Z > T; + +fX = XT .* X.^(1/3) + (~XT) .* (7.787 .* X + 16/116); + +% Compute L +Y3 = Y.^(1/3); +fY = YT .* Y3 + (~YT) .* (7.787 .* Y + 16/116); +L = YT .* (116 * Y3 - 16.0) + (~YT) .* (903.3 * Y); + +fZ = ZT .* Z.^(1/3) + (~ZT) .* (7.787 .* Z + 16/116); + +% Compute a and b +a = 500 * (fX - fY); +b = 200 * (fY - fZ); + +L = reshape(L, M, N); +a = reshape(a, M, N); +b = reshape(b, M, N); + +if ((nargout == 1) | (nargout == 0)) + L = cat(3,L,a,b); +end \ No newline at end of file diff --git a/drfi_saliency_feature/textons/applyFilter.m b/drfi_saliency_feature/textons/applyFilter.m new file mode 100644 index 0000000..3b5c090 --- /dev/null +++ b/drfi_saliency_feature/textons/applyFilter.m @@ -0,0 +1,12 @@ +function [fim] = applyFilter(f,im) +% function [fim] = applyFilter(f,im) +% +% Apply a filter to an image with reflected boundary conditions. +% +% See also fbCreate, fbRun. +% +% David R. Martin +% March 2003 + +fim = fbRun({f},im); +fim = fim{1}; diff --git a/drfi_saliency_feature/textons/assignTextons.m b/drfi_saliency_feature/textons/assignTextons.m new file mode 100644 index 0000000..a986415 --- /dev/null +++ b/drfi_saliency_feature/textons/assignTextons.m @@ -0,0 +1,14 @@ +function [map] = assignTextons(fim,textons) +% function [map] = assignTextons(fim,textons) + +d = numel(fim); +n = numel(fim{1}); +data = zeros(d,n); +for i = 1:d, + data(i,:) = fim{i}(:)'; +end + +d2 = distSqr(data,textons); +[y,map] = min(d2,[],2); +[w,h] = size(fim{1}); +map = reshape(map,w,h); diff --git a/drfi_saliency_feature/textons/cluster_learning_create_image_filters.m b/drfi_saliency_feature/textons/cluster_learning_create_image_filters.m new file mode 100644 index 0000000..6f5eb6f --- /dev/null +++ b/drfi_saliency_feature/textons/cluster_learning_create_image_filters.m @@ -0,0 +1,28 @@ +function [doog_filters, texton_data] = cluster_learning_create_image_filters; + +% create pair data +angle_step = 15; +filter_size = 5; +sigma = 1.5; +r = 0.25; + +% create DOOG filters +num_angles=180/angle_step; +doog_filters = zeros(filter_size, filter_size, num_angles); +for (theta=0:angle_step:180-angle_step) + doog_filters(:, :, theta/angle_step+1)=createDOOGFilter(sigma, r, theta, filter_size); +end + +% load textons +if 0 +texton_data = load('D:\Projects\Tools\textons\unitex_6_1_2_1.4_2_32.mat'); +ordered_textons = select_diverse_textons(texton_data.tsim, 12); +new_tims = cell(length(ordered_textons), 1); +for i = 1:length(new_tims) + new_tims{i} = texton_data.tim{ordered_textons(i)}; +end +texton_data.tim = new_tims; +end + +texton_data.tim = []; +%num_textons = length(texton_data.tim); \ No newline at end of file diff --git a/drfi_saliency_feature/textons/computeTextons.m b/drfi_saliency_feature/textons/computeTextons.m new file mode 100644 index 0000000..84326d3 --- /dev/null +++ b/drfi_saliency_feature/textons/computeTextons.m @@ -0,0 +1,15 @@ +function [map,textons] = computeTextons(fim,k) +% function [map,textons] = computeTextons(fim,k) + +d = numel(fim); +n = numel(fim{1}); +data = zeros(d,n); +for i = 1:d, + data(i,:) = fim{i}(:)'; +end + +[map,textons] = kmeansML(k,data,'maxiter',30,'verbose',0); +[w,h] = size(fim{1}); +map = reshape(map,w,h); + + diff --git a/drfi_saliency_feature/textons/compute_texton_response.m b/drfi_saliency_feature/textons/compute_texton_response.m new file mode 100644 index 0000000..14b331b --- /dev/null +++ b/drfi_saliency_feature/textons/compute_texton_response.m @@ -0,0 +1,14 @@ +function tex_responses = compute_texton_response(im, tim) + +if isempty(tim) + tex_responses = []; + return; +end + +tex_responses = zeros(size(im, 1), size(im, 2), numel(tim)); + +fims = fbRun(tim, im); + +for i = 1:length(fims) + tex_responses(:, :, i) = abs(fims{i}); +end diff --git a/drfi_saliency_feature/textons/csFilter.m b/drfi_saliency_feature/textons/csFilter.m new file mode 100644 index 0000000..bdbf791 --- /dev/null +++ b/drfi_saliency_feature/textons/csFilter.m @@ -0,0 +1,37 @@ +function [f] = csFilter(sigma,support) +% function [f] = csFilter(sigma,support) +% +% Compute unit L1-norm zero-mean difference-of-Gaussians +% center-surround filter. +% +% INPUTS +% sigma 2-element vector of inner/outer sigma. +% [support] Make filter +/- this many sigma. +% +% OUTPUTS +% f Square filter. +% +% David R. Martin +% March 2003 + +nargchk(1,2,nargin); +if nargin<2, support=3; end + +if numel(sigma)~=2, + error('sigma must have 2 elements'); +end + +% DOG +ratio = max(sigma) / min(sigma); +f1 = oeFilter(sigma(1),support*max(1,sigma(2)/sigma(1))); +f2 = oeFilter(sigma(2),support*max(1,sigma(1)/sigma(2))); +f = f1 - f2; + +% zero mean +f = f - mean(f(:)); + +% unit L1-norm +sumf = sum(abs(f(:))); +if sumf>0, + f = f / sumf; +end diff --git a/drfi_saliency_feature/textons/distSqr.m b/drfi_saliency_feature/textons/distSqr.m new file mode 100644 index 0000000..b377c1b --- /dev/null +++ b/drfi_saliency_feature/textons/distSqr.m @@ -0,0 +1,39 @@ +function z = distSqr(x,y) +% function z = distSqr(x,y) +% +% Return matrix of all-pairs squared distances between the vectors +% in the columns of x and y. +% +% INPUTS +% x dxn matrix of vectors +% y dxm matrix of vectors +% +% OUTPUTS +% z nxm matrix of squared distances +% +% This routine is faster when mn. +% +% David Martin +% March 2003 + +% Based on dist2.m code, +% Copyright (c) Christopher M Bishop, Ian T Nabney (1996, 1997) + +if size(x,1)~=size(y,1), + error('size(x,1)~=size(y,1)'); +end + +[d,n] = size(x); +[d,m] = size(y); + +% z = repmat(sum(x.^2)',1,m) ... +% + repmat(sum(y.^2),n,1) ... +% - 2*x'*y; + +z = x'*y; +x2 = sum(x.^2)'; +y2 = sum(y.^2); +for i = 1:m, + z(:,i) = x2 + y2(i) - 2*z(:,i); +end + diff --git a/drfi_saliency_feature/textons/fbCreate.m b/drfi_saliency_feature/textons/fbCreate.m new file mode 100644 index 0000000..695ed3f --- /dev/null +++ b/drfi_saliency_feature/textons/fbCreate.m @@ -0,0 +1,29 @@ +function [fb] = fbCreate(numOrient,startSigma,numScales,scaling,elong) +% function [fb] = fbCreate(numOrient,startSigma,numScales,scaling,elong) +% +% Create a filterbank containing numOrient even and odd-symmetric +% filters and one center-surround filter at numScales scales. +% +% The even-symmetric filter is a Gaussian second derivative. +% The odd-symmetric filter is its Hilbert transform. +% +% See also oeFilter, csFilter, fbRun. +% +% David R. Martin +% March 2003 + +if nargin<3, numScales = 1; end +if nargin<4, scaling = sqrt(2); end +if nargin<5, elong = 3; end +support = 3; + +fb = cell(2*numOrient,numScales); +for scale = 1:numScales, + sigma = startSigma * scaling^(scale-1); + for orient = 1:numOrient, + theta = (orient-1)/numOrient * pi; + fb{2*orient-1,scale} = oeFilter(sigma*[elong 1],support,theta, 2,0); + fb{2*orient,scale} = oeFilter(sigma*[elong 1],support,theta,2,1); + end + %fb{2*numOrient+1,scale} = csFilter(sigma*[3 1],support); +end diff --git a/drfi_saliency_feature/textons/fbRun.m b/drfi_saliency_feature/textons/fbRun.m new file mode 100644 index 0000000..7d26e06 --- /dev/null +++ b/drfi_saliency_feature/textons/fbRun.m @@ -0,0 +1,31 @@ +function [fim] = fbRun(fb,im) +% function [fim] = fbRun(fb,im) +% +% Run a filterbank on an image with reflected boundary conditions. +% +% See also fbCreate,padReflect. +% +% David R. Martin +% March 2003 + +% find the max filter size +maxsz = max(size(fb{1})); +for i = 1:numel(fb), + maxsz = max(maxsz,max(size(fb{i}))); +end + +% pad the image +r = floor(maxsz/2); +impad = padReflect(im,r); + +% run the filterbank on the padded image, and crop the result back +% to the original image size +fim = cell(size(fb)); +for i = 1:numel(fb), + if size(fb{i},1)<50, + fim{i} = conv2(impad,fb{i},'same'); + else + fim{i} = fftconv2(impad,fb{i}); + end + fim{i} = fim{i}(r+1:end-r,r+1:end-r); +end diff --git a/drfi_saliency_feature/textons/fftconv2.m b/drfi_saliency_feature/textons/fftconv2.m new file mode 100644 index 0000000..7eb7254 --- /dev/null +++ b/drfi_saliency_feature/textons/fftconv2.m @@ -0,0 +1,21 @@ +function fim = fftconv2(im,f) +% function fim = fftconv2(im,f) +% +% Convolution using FFT. +% +% David R. Martin +% March 2003 + +% wrap the filter around the origin and pad with zeros +padf = zeros(size(im)); +r = floor(size(f,1)/2); +padf(1:r+1,1:r+1) = f(r+1:end,r+1:end); +padf(1:r,end-r+1:end) = f(r+2:end,1:r); +padf(end-r+1:end,1:r) = f(1:r,r+2:end); +padf(end-r+1:end,end-r+1:end) = f(1:r,1:r); + +% magic +fftim = fft2(im); +fftf = fft2(padf); +fim = real(ifft2(fftim.*fftf)); + diff --git a/drfi_saliency_feature/textons/genunitex.m b/drfi_saliency_feature/textons/genunitex.m new file mode 100644 index 0000000..369353b --- /dev/null +++ b/drfi_saliency_feature/textons/genunitex.m @@ -0,0 +1,16 @@ + +no = 6; +ss = 1; +ns = 2; +sc = sqrt(2); +el = 2; +fb = fbCreate(no,ss,ns,sc,el); +for k = [32 64 128], + tex = unitex(fb,k); + tsim = textonsim(fb,tex); + [tim,tperm] = visTextons(tex,fb); + save(sprintf('unitex_%.2g_%.2g_%.2g_%.2g_%.2g_%d.mat',... + no,ss,ns,sc,el,k),... + 'fb','tex','tsim','tim','tperm'); +end + diff --git a/drfi_saliency_feature/textons/makeLMfilters.m b/drfi_saliency_feature/textons/makeLMfilters.m new file mode 100644 index 0000000..a99a395 --- /dev/null +++ b/drfi_saliency_feature/textons/makeLMfilters.m @@ -0,0 +1,62 @@ +function F=makeLMfilters +% Returns the LML filter bank of size 49x49x48 in F. To convolve an +% image I with the filter bank you can either use the matlab function +% conv2, i.e. responses(:,:,i)=conv2(I,F(:,:,i),'valid'), or use the +% Fourier transform. + + SUP=19;%49; % Support of the largest filter (must be odd) + SCALEX=sqrt(2).^1; %sqrt(2).^[1:3]; % Sigma_{x} for the oriented filters + NORIENT=6; % Number of orientations + + NROTINV=3; + NBAR=length(SCALEX)*NORIENT; + NEDGE=length(SCALEX)*NORIENT; + NF=NBAR+NEDGE+NROTINV; + F=zeros(SUP,SUP,NF); + hsup=(SUP-1)/2; + [x,y]=meshgrid([-hsup:hsup],[hsup:-1:-hsup]); + orgpts=[x(:) y(:)]'; + + count=1; + for scale=1:length(SCALEX), + for orient=0:NORIENT-1, + angle=pi*orient/NORIENT; % Not 2pi as filters have symmetry + c=cos(angle);s=sin(angle); + rotpts=[c -s;s c]*orgpts; + F(:,:,count)=makefilter(SCALEX(scale),0,1,rotpts,SUP); + F(:,:,count+NEDGE)=makefilter(SCALEX(scale),0,2,rotpts,SUP); + count=count+1; + end; + end; + + count=NBAR+NEDGE+1; + SCALES=sqrt(2).^1; %sqrt(2).^[1:4]; + for i=1:length(SCALES), + F(:,:,count)=normalise(fspecial('gaussian',SUP,SCALES(i))); + F(:,:,count+1)=normalise(fspecial('log',SUP,SCALES(i))); + F(:,:,count+2)=normalise(fspecial('log',SUP,3*SCALES(i))); + count=count+3; + end; +return + +function f=makefilter(scale,phasex,phasey,pts,sup) + gx=gauss1d(3*scale,0,pts(1,:),phasex); + gy=gauss1d(scale,0,pts(2,:),phasey); + f=normalise(reshape(gx.*gy,sup,sup)); +return + +function g=gauss1d(sigma,mean,x,ord) +% Function to compute gaussian derivatives of order 0 <= ord < 3 +% evaluated at x. + + x=x-mean;num=x.*x; + variance=sigma^2; + denom=2*variance; + g=exp(-num/denom)/(pi*denom)^0.5; + switch ord, + case 1, g=-g.*(x/variance); + case 2, g=g.*((num-variance)/(variance^2)); + end; +return + +function f=normalise(f), f=f-mean(f(:)); f=f/sum(abs(f(:))); return \ No newline at end of file diff --git a/drfi_saliency_feature/textons/oeFilter.m b/drfi_saliency_feature/textons/oeFilter.m new file mode 100644 index 0000000..03c1f87 --- /dev/null +++ b/drfi_saliency_feature/textons/oeFilter.m @@ -0,0 +1,116 @@ +function [f] = oeFilter(sigma,support,theta,deriv,hil,vis) +% function [f] = oeFilter(sigma,support,theta,deriv,hil,vis) +% +% Compute unit L1-norm 2D filter. +% The filter is a Gaussian in the x direction. +% The filter is a Gaussian derivative with optional Hilbert +% transform in the y direction. +% The filter is zero-meaned if deriv>0. +% +% INPUTS +% sigma Scalar, or 2-element vector of [sigmaX sigmaY]. +% [support] Make filter +/- this many sigma. +% [theta] Orientation of x axis, in radians. +% [deriv] Degree of y derivative, one of {0,1,2}. +% [hil] Do Hilbert transform in y direction? +% [vis] Visualization for debugging? +% +% OUTPUTS +% f Square filter. +% +% David R. Martin +% March 2003 + +nargchk(1,6,nargin); +if nargin<2, support=3; end +if nargin<3, theta=0; end +if nargin<4, deriv=0; end +if nargin<5, hil=0; end +if nargin<6, vis=0; end + +if numel(sigma)==1, + sigma = [sigma sigma]; +end +if deriv<0 | deriv>2, + error('deriv must be in [0,2]'); +end + +% Calculate filter size; make sure it's odd. +hsz = max(ceil(support*sigma)); +sz = 2*hsz + 1; + +% Sampling limits. +maxsamples = 1000; % Max samples in each dimension. +maxrate = 10; % Maximum sampling rate. +frate = 10; % Over-sampling rate for function evaluation. + +% Cacluate sampling rate and number of samples. +rate = min(maxrate,max(1,floor(maxsamples/sz))); +samples = sz*rate; + +% The 2D samping grid. +r = floor(sz/2) + 0.5 * (1 - 1/rate); +dom = linspace(-r,r,samples); +[sx,sy] = meshgrid(dom,dom); + +% Bin membership for 2D grid points. +mx = round(sx); +my = round(sy); +membership = (mx+hsz+1) + (my+hsz)*sz; + +% Rotate the 2D sampling grid by theta. +su = sx*sin(theta) + sy*cos(theta); +sv = sx*cos(theta) - sy*sin(theta); + +if vis, + figure(1); clf; hold on; + plot(sx,sy,'.'); + plot(mx,my,'o'); + %plot([sx(:) mx(:)]',[sy(:) my(:)]','k-'); + plot(su,sv,'x'); + axis equal; + ginput(1); +end + +% Evaluate the function separably on a finer grid. +R = r*sqrt(2)*1.01; % radius of domain, enlarged by >sqrt(2) +fsamples = ceil(R*rate*frate); % number of samples +fsamples = fsamples + mod(fsamples+1,2); % must be odd +fdom = linspace(-R,R,fsamples); % domain for function evaluation +gap = 2*R/(fsamples-1); % distance between samples + +% The function is a Gaussian in the x direction... +fx = exp(-fdom.^2/(2*sigma(1)^2)); +% .. and a Gaussian derivative in the y direction... +fy = exp(-fdom.^2/(2*sigma(2)^2)); +switch deriv, + case 1, + fy = fy .* (-fdom/(sigma(2)^2)); + case 2, + fy = fy .* (fdom.^2/(sigma(2)^2) - 1); +end +% ...with an optional Hilbert transform. +if hil, + fy = imag(hilbert(fy)); +end + +% Evaluate the function with NN interpolation. +xi = round(su/gap) + floor(fsamples/2) + 1; +yi = round(sv/gap) + floor(fsamples/2) + 1; +f = fx(xi) .* fy(yi); + +% Accumulate the samples into each bin. +f = isum(f,membership,sz*sz); +f = reshape(f,sz,sz); + +% zero mean +if deriv>0, + f = f - mean(f(:)); +end + +% unit L1-norm +sumf = sum(abs(f(:))); +if sumf>0, + f = f / sumf; +end + diff --git a/drfi_saliency_feature/textons/padReflect.m b/drfi_saliency_feature/textons/padReflect.m new file mode 100644 index 0000000..864a0fd --- /dev/null +++ b/drfi_saliency_feature/textons/padReflect.m @@ -0,0 +1,19 @@ +function [impad] = padReflect(im,r) +% function [impad] = padReflect(im,r) +% +% Pad an image with a border of size r, and reflect the image into +% the border. +% +% David R. Martin +% March 2003 + +impad = zeros(size(im)+2*r); +impad(r+1:end-r,r+1:end-r) = im; % middle +impad(1:r,r+1:end-r) = flipud(im(1:r,:)); % top +impad(end-r+1:end,r+1:end-r) = flipud(im(end-r+1:end,:)); % bottom +impad(r+1:end-r,1:r) = fliplr(im(:,1:r)); % left +impad(r+1:end-r,end-r+1:end) = fliplr(im(:,end-r+1:end)); % right +impad(1:r,1:r) = flipud(fliplr(im(1:r,1:r))); % top-left +impad(1:r,end-r+1:end) = flipud(fliplr(im(1:r,end-r+1:end))); % top-right +impad(end-r+1:end,1:r) = flipud(fliplr(im(end-r+1:end,1:r))); % bottom-left +impad(end-r+1:end,end-r+1:end) = flipud(fliplr(im(end-r+1:end,end-r+1:end))); % bottom-right diff --git a/drfi_saliency_feature/textons/scratch.m b/drfi_saliency_feature/textons/scratch.m new file mode 100644 index 0000000..a81228b --- /dev/null +++ b/drfi_saliency_feature/textons/scratch.m @@ -0,0 +1,56 @@ + +im = imgRead(101085,'gray'); +fb = fbCreate(6,1,1,3); +ntex = 32; +[tmap,tex] = computeTextons(fbRun(fb,im),ntex); +[tim,tperm] = visTextons(tex,fb); +wt = zeros(ntex,1); +for i = 1:ntex, + wt(i) = sum(abs(tim{i}(:))); % L1 norm of texton +end +wt = wt / max(wt(:)); +tsim = zeros(ntex); +for i = 1:ntex, + for j = 1:ntex, + tsim(i,j) = sum(sum(abs(tim{i}-tim{j}))); + end +end +r = 10; +norient = 6; +tic; [tg,theta] = tgmo(tmap,ntex,r,norient,tsim); toc; +aa = cell(size(tg)); +bb = cell(size(tg)); +cc = cell(size(tg)); +for i = 1:numel(tg), + tic; [c,b,a] = fitparab(tg{i},r,theta(i)); toc; + aa{i}=a; bb{i}=b; cc{i}=c; +end +tgs = cell(size(tg)); +pb = zeros(size(tmap)); +for i = 1:numel(tgs), + tgs{i} = max(0,cc{i}) .* (aa{i}<0) .* exp(-abs(bb{i})/0.1); + pb = max(pb,tgs{i}); +end +pb2 = zeros(size(tmap)); +for i = 1:numel(tgs), + pb2 = max(pb2,(tgs{i}==pb).*nonmax(tgs{i},theta(i))); +end + +figure(1); clf; +imshow(im); + +figure(2); clf; +imagesc(mymontage({tim{tperm}})); +axis image; colorbar; + +figure(3); clf; +imagesc(tmap); +truesize; + +figure(4); clf; +imagesc(pb); +truesize; + +figure(5); clf; +imagesc(pb2) +truesize; diff --git a/drfi_saliency_feature/textons/select_diverse_textons.m b/drfi_saliency_feature/textons/select_diverse_textons.m new file mode 100644 index 0000000..1c1c4d8 --- /dev/null +++ b/drfi_saliency_feature/textons/select_diverse_textons.m @@ -0,0 +1,14 @@ +function ordered_textons = select_diverse_textons(tsim, nt) + +next_t = zeros(nt, 1); +vals = zeros(nt, 1); +indices = (1:size(tsim, 1)); +for t = 1:nt + total_dsim = mean(tsim, 1); + [vals(t), index] = max(total_dsim); + next_t(t) = indices(index); + tsim = tsim([(1:index-1) (index+1:end)], [(1:index-1) (index+1:end)]); + indices = indices([(1:index-1) (index+1:end)]); +end + +ordered_textons = next_t; \ No newline at end of file diff --git a/drfi_saliency_feature/textons/textonsim.m b/drfi_saliency_feature/textons/textonsim.m new file mode 100644 index 0000000..a717629 --- /dev/null +++ b/drfi_saliency_feature/textons/textonsim.m @@ -0,0 +1,20 @@ +function tsim = textonsim(fb,tex) +% function tsim = textonsim(fb,tex) +% +% Compute texton dis-similarity matrix. The dis-similarity between +% two textons is given by their L1 difference run through an +% exponential. +% +% David R. Martin +% April 2003 +tim = visTextons(tex,fb); +ntex = size(tex,2); +tsim = zeros(ntex); +for i = 1:ntex, + for j = 1:ntex, + tsim(i,j) = sum(sum(abs(tim{i}-tim{j}))); + end +end +sigma = 0.25*max(tsim(:)); +tsim = 1 - exp(-tsim/sigma); +tsim = tsim / max(tsim(:)); diff --git a/drfi_saliency_feature/textons/unitex.m b/drfi_saliency_feature/textons/unitex.m new file mode 100644 index 0000000..1e63150 --- /dev/null +++ b/drfi_saliency_feature/textons/unitex.m @@ -0,0 +1,33 @@ +function [textons] = unitex(fb,k) +% function [textons] = unitex(fb,k) +% +% Compute universal textons from the training images. + +iids = imgList('train'); + +n = 100000; +nper = round(n/numel(iids)); +n = nper * numel(iids); + +d = numel(fb); +data = zeros(d,n); + +c = 0; +for i = 1:numel(iids), + iid = iids(i); + fprintf(2,'Processing image %d/%d (iid=%d)...\n',i,numel(iids),iid); + im = imgRead(iid,'gray'); + fim = fbRun(fb,im); + npix = numel(im); + p = randperm(npix); + p = p(1:min(npix,nper)); + m = numel(p); + for j = 1:d, + data(j,c+1:c+m) = fim{j}(p); + end + c = c + m; +end +data = data(:,1:c); + +fprintf(2,'Computing %d universal textons from %d samples...\n',k,c); +[unused,textons] = kmeansML(k,data,'maxiter',30,'verbose',1); diff --git a/drfi_saliency_feature/textons/unitex_6_1_2_1.4_2_128.mat b/drfi_saliency_feature/textons/unitex_6_1_2_1.4_2_128.mat new file mode 100644 index 0000000..7c0ef8a Binary files /dev/null and b/drfi_saliency_feature/textons/unitex_6_1_2_1.4_2_128.mat differ diff --git a/drfi_saliency_feature/textons/unitex_6_1_2_1.4_2_32.mat b/drfi_saliency_feature/textons/unitex_6_1_2_1.4_2_32.mat new file mode 100644 index 0000000..839cc1e Binary files /dev/null and b/drfi_saliency_feature/textons/unitex_6_1_2_1.4_2_32.mat differ diff --git a/drfi_saliency_feature/textons/unitex_6_1_2_1.4_2_64.mat b/drfi_saliency_feature/textons/unitex_6_1_2_1.4_2_64.mat new file mode 100644 index 0000000..426ef36 Binary files /dev/null and b/drfi_saliency_feature/textons/unitex_6_1_2_1.4_2_64.mat differ diff --git a/drfi_saliency_feature/textons/visTextons.m b/drfi_saliency_feature/textons/visTextons.m new file mode 100644 index 0000000..94de13d --- /dev/null +++ b/drfi_saliency_feature/textons/visTextons.m @@ -0,0 +1,33 @@ +function [tim,perm] = visTextons(textons,fb) +% function [tim,perm] = visTextons(textons,fb) + +if size(textons,1) ~= numel(fb), + error('size(textons,1) ~= numel(fb)'); +end + +[d,k] = size(textons); + +% find the max filter size +maxsz = max(size(fb{1})); +for j = 1:d, + maxsz = max(maxsz,max(size(fb{j}))); +end + +% compute the linear combinations of filters +tim = cell(k,1); +for i = 1:k, + tim{i} = zeros(maxsz); + for j = 1:d, + f = fb{j} * textons(j,i); + off = (maxsz-size(f,1))/2; + tim{i}(1+off:end-off,1+off:end-off) = tim{i}(1+off:end-off,1+off:end-off) + f; + end +end + +% computer permutation order for decreasing L1 norm +norms = zeros(k,1); +for i = 1:k, + norms(i) = sum(sum(abs(tim{i}))); +end +[y,perm] = sort(norms); +perm = flipud(perm); diff --git a/makeDefaultParameters.m b/makeDefaultParameters.m new file mode 100644 index 0000000..44f5dc2 --- /dev/null +++ b/makeDefaultParameters.m @@ -0,0 +1,56 @@ +function para = makeDefaultParameters() + % number of segmentations + % the more number of segmentations, the better performance and slower + % speed + para.num_segmentation = 15; + + % trained segment (region) saliency regressor, saliency fusion weight + % model = load( './model/model_MSRA_48s_trn_valid_full_93d_regressor_weight.mat' ); + model = load( './model/drfiModelMatlab.mat' ); + + [sw ind] = sort( model.w, 'descend' ); + w = sw(1 : para.num_segmentation ); + w = w / sum(w); + + para.w = w; + para.ind = ind(1 : para.num_segmentation); + + para.seg_para = model.para(para.ind,:); + +% newModel = load( './model/saliency_model_cpp.mat'); + para.segment_saliency_regressor = model.segment_saliency_regressor; + + % saveModel('Model.mat', para); +end + +% int _N; // Number of segmentation +% vecD _w; // weights with dimension: N +% Mat _segPara1d; // Segmentation parameters: [Nx3] +% int _NumN; // nrNodes: Number of nodes (41565) +% int _NumT; // number of Tree (200) +% // int Matrix of size [NumN x NumT] +% Mat _lDau1i, _rDau1i, _mBest1i; +% // char matrix of size [NumN x NumT] +% Mat _nodeStatus1c; +% // double matrix of size [NumN x NumT] +% Mat _upper1d, _avNode1d; +% vecI _ndTree; //[NumT] +% Mat _mlFilters15d; // [19 x 19 x 15] + +function saveModel(fileName, para) + N = para.num_segmentation; + sr = para.segment_saliency_regressor; + NumN = sr.nrnodes; + NumT = sr.ntree; + w = para.w; + segPara = para.seg_para; + lDau = sr.lDau; + rDau = sr.rDau; + mBest = sr.mbest; + nodeStatus = sr.nodestatus; + upper = sr.upper; + avNode = sr.avnode; + mlFilters = makeLMfilters; + ndTree = sr.ndtree; + save(fileName, 'N', 'NumN', 'NumT', 'w', 'segPara', 'lDau', 'rDau', 'mBest', 'nodeStatus', 'upper', 'avNode', 'mlFilters', 'ndTree'); +end \ No newline at end of file diff --git a/mex/mexFeatureDistance.cpp b/mex/mexFeatureDistance.cpp new file mode 100644 index 0000000..9b973e1 --- /dev/null +++ b/mex/mexFeatureDistance.cpp @@ -0,0 +1,107 @@ +#include "mex.h" + +#include +#include +#include + +// mexFeatureDistance( f1, f2, 'x1' ) +// where each column in f1 and f2 is a sample +// i.e., size(f1, 1) == feature_dimension +// size(f1, 2) == sample_number + +double Distance( double *e1, double *e2, int iDim, const char *distType ); + +void mexFunction( int nlhs, mxArray *plhs[], int nrhs, + const mxArray *prhs[] ) +{ + if( nrhs != 3 ) + { + mexPrintf( "usage: %s f1 f2 dist_type\n" ); + return; + } + + bool isSameFeature = mxIsEmpty( prhs[1] ); + + double *f1 = (double*)mxGetData( prhs[0] ); + int iSample = mxGetN( prhs[0] ); + int iDim = mxGetM( prhs[0] ); + + double *f2 = f1; + + if( !isSameFeature ) + { + f2 = (double*)mxGetData( prhs[1] ); + + if( mxGetN(prhs[1]) != iSample || mxGetM(prhs[1]) != iDim ) + { + mexErrMsgTxt( "The dimension of f1 and f2 are not matched." ); + return; + } + } + + char distType[10]; + mxGetString( prhs[2], distType, 10 ); + + // mexPrintf( "Sample: %d, iDim: %d, distType: %s\n", iSample, iDim, distType ); + + plhs[0] = mxCreateDoubleMatrix( iSample, iSample, mxREAL ); + double *distMat = (double*)mxGetData( plhs[0] ); + + if( isSameFeature ) + { + for( int ix = 0; ix < iSample; ++ix ) + { + for( int jx = 0; jx < iSample; ++jx ) + { + if( ix == jx ) + distMat[ix * iSample + jx] = 0.0; + if( ix < jx ) + distMat[ix * iSample + jx] = Distance( f1 + ix * iDim, f2 + jx * iDim, iDim, distType ); + else + distMat[ix * iSample + jx] = distMat[jx * iSample + ix]; + } + } + } + else + { + for( int ix = 0; ix < iSample; ++ix ) + { + for( int jx = 0; jx < iSample; ++jx ) + { + distMat[jx * iSample + ix] = Distance( f1 + ix * iDim, f2 + jx * iDim, iDim, distType ); + } + } + } +} + +double Distance( double *e1, double *e2, int iDim, const char *distType ) +{ + using namespace std; + + const double eps = mxGetEps(); + + double dist = 0.0; + if( strcmp(distType, "L1" ) == 0 ) + { + for( int dim = 0; dim < iDim; ++dim ) + dist += fabs( e1[dim] - e2[dim] ); + } + else if( strcmp(distType, "L2" ) == 0 ) + { + for( int dim = 0; dim < iDim; ++dim ) + dist += ( e1[dim] - e2[dim] ) * ( e1[dim] - e2[dim] ); + } + else if( strcmp(distType, "x2" ) == 0 ) + { + for( int dim = 0; dim < iDim; ++dim ) + dist += ( e1[dim] - e2[dim] ) * ( e1[dim] - e2[dim] ) / ( e1[dim] + e2[dim] + eps ); + dist /= 2.0; + } + else + { + mexErrMsgTxt( "Not supported feature distance type." ); + return -1.0; + } + + return dist; +} \ No newline at end of file diff --git a/mex/mexLBP.cpp b/mex/mexLBP.cpp new file mode 100644 index 0000000..3d2d4ac --- /dev/null +++ b/mex/mexLBP.cpp @@ -0,0 +1,59 @@ +#include "mex.h" + +typedef unsigned char uchar; + +inline int sub2ind( int r, int c, int rows ) +{ + return (c * rows + r ); +} + +// usage: +// image_lbp = mexLBP( rgb2gray(image) ); +void mexFunction( int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[] ) +{ + if( nrhs != 1 ) + { + mexPrintf( "usage: mexLBP( rgb2gray(image) )\n" ); + return; + } + + uchar *image = (uchar*)mxGetData( prhs[0] ); + + int width = mxGetN( prhs[0] ); + int height = mxGetM( prhs[0] ); + + const int nb = 8; +// const int ox[nb] = {1, 0, -1, 0, 1, -1, -1, 1}; +// const int oy[nb] = {0, -1, 0, 1, -1, -1, 1, 1}; + + const int ox[nb] = {1, 1, 0, -1, -1, -1, 0, 1}; + const int oy[nb] = {0, 1, 1, 1, 0, -1, -1, -1}; + + plhs[0] = mxCreateNumericMatrix( height, width, mxUINT8_CLASS, mxREAL ); + uchar *lbp = (uchar*)mxGetData( plhs[0] ); + + for( int y = 0; y < height; ++y ) + { + for( int x = 0; x < width; ++x ) + { + int p = 0; + + int ind = sub2ind(y, x, height); + int pc = static_cast( image[ind] ); + + for( int n = 0; n < nb; ++n ) + { + int ni = sub2ind(y+oy[n], x+ox[n], height); + if( ni < 0 || ni >= height * width ) + continue; + + int pn = static_cast( image[ni] ); + + if( pn >= pc ) + p += (1 << (n+1)); + } + + lbp[ind] = static_cast( p ); + } + } +} diff --git a/multi-segmentation/computeQuantMatrix.asv b/multi-segmentation/computeQuantMatrix.asv new file mode 100644 index 0000000..273e028 --- /dev/null +++ b/multi-segmentation/computeQuantMatrix.asv @@ -0,0 +1,40 @@ +function Q = computeQuantMatrix(image, bins) +%compute the quantization matrix based on the 3-dimensional matrix imgLAB + +% if length(bins) ~= 3 +% error('Need 3 bins for quantization'); +% end + +% f = [1 4 6 4 1]; +% image = imfilter(image, f); +% image = imfilter(image, f'); + + image_lab = rgb2lab(image); + L = image_lab(:,:,1); + a = image_lab(:,:,2); + b = image_lab(:,:,3); + + image_hsv = rgb2hsv(image); + H = image_hsv(:,:,1); + ind = find(H > 0.5); + H(ind) = 1 - H(ind); + %s = image_hsv(:,:,2); + + ll = min(floor(L/(100/bins(1))) + 1,bins(1)); + aa = min(floor((a+120)/(240/bins(2))) + 1,bins(2)); + bb = min(floor((b+120)/(240/bins(3))) + 1,bins(3)); + hh = min(floor(H*bins(4)) + 1, bins(4)); + %ss = min(floor(s*bins(5)) + 1, bins(5)); + +% Q = (ll-1)* bins(2)*bins(3)*bins(4)*bins(5) + ... +% (aa-1)* bins(3)*bins(4)*bins(5) + ... +% (bb-1)* bins(4)*bins(5) + ... +% (hh-1)* bins(5) + ... +% ss + 1; + Q = (ll-1)* bins(2)*bins(3)*bins(4) + ... + (aa-1)* bins(3)*bins(4) + ... + (bb-1)* bins(4) + ... + hh + 1; +% Q = (ll-1) * bins(2) * bins(3) + ... +% (aa-1) * bins(3) + ... +% bb + 1; \ No newline at end of file diff --git a/multi-segmentation/computeQuantMatrix.m b/multi-segmentation/computeQuantMatrix.m new file mode 100644 index 0000000..8334f11 --- /dev/null +++ b/multi-segmentation/computeQuantMatrix.m @@ -0,0 +1,18 @@ +function Q = computeQuantMatrix(image_lab, bins) +%compute the quantization matrix based on the 3-dimensional matrix imgLAB + + if length(bins) ~= 3 + error('Need 3 bins for quantization'); + end + + L = image_lab(:,:,1); + a = image_lab(:,:,2); + b = image_lab(:,:,3); + + ll = min(floor(L/(100/bins(1))) + 1,bins(1)); + aa = min(floor((a+120)/(240/bins(2))) + 1,bins(2)); + bb = min(floor((b+120)/(240/bins(3))) + 1,bins(3)); + + Q = (ll-1)* bins(2)*bins(3) + ... + (aa-1)* bins(3) + ... + bb + 1; \ No newline at end of file diff --git a/multi-segmentation/computeRegionHist.m b/multi-segmentation/computeRegionHist.m new file mode 100644 index 0000000..778f53b --- /dev/null +++ b/multi-segmentation/computeRegionHist.m @@ -0,0 +1,18 @@ +function regionHist = computeRegionHist(Q, bins, segimage) + num_region = max(segimage(:)); + + num_bin = bins(1)*bins(2)*bins(3); + + regionHist = zeros(num_region, num_bin); + + spstats = regionprops(segimage, 'PixelIdxList'); + + for ix = 1 : num_region + pixel_ind = spstats(ix).PixelIdxList; + bin_ind = sort(Q(pixel_ind)); + [v m n] = unique(bin_ind); + mm = [0; m(1:end-1)]; + freq = m - mm; + regionHist(ix,v) = regionHist(ix,v) + freq'; + end + \ No newline at end of file diff --git a/multi-segmentation/disjoint-set.h b/multi-segmentation/disjoint-set.h new file mode 100644 index 0000000..01e98a4 --- /dev/null +++ b/multi-segmentation/disjoint-set.h @@ -0,0 +1,90 @@ +/* +Copyright (C) 2006 Pedro Felzenszwalb + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef DISJOINT_SET +#define DISJOINT_SET + +// disjoint-set forests using union-by-rank and path compression (sort of). + +typedef struct { + int rank; + int p; + int size; +} uni_elt; + +class universe { +public: + universe(int elements); + universe(int elements, int *size_array); + ~universe(); + int find(int x); + void join(int x, int y); + int size(int x) const { return elts[x].size; } + int num_sets() const { return num; } + +private: + uni_elt *elts; + int num; +}; + +universe::universe(int elements) { + elts = new uni_elt[elements]; + num = elements; + for (int i = 0; i < elements; i++) { + elts[i].rank = 0; + elts[i].size = 1; + elts[i].p = i; + } +} + +universe::universe(int elements, int *size_array) +{ + elts = new uni_elt[elements]; + num = elements; + for (int i = 0; i < elements; i++) { + elts[i].rank = 0; + elts[i].size = size_array[i]; + elts[i].p = i; + } +} +universe::~universe() { + delete [] elts; +} + +int universe::find(int x) { + int y = x; + while (y != elts[y].p) + y = elts[y].p; + elts[x].p = y; + return y; +} + +void universe::join(int x, int y) { + if (elts[x].rank > elts[y].rank) { + elts[y].p = x; + elts[x].size += elts[y].size; + } else { + elts[x].p = y; + elts[y].size += elts[x].size; + if (elts[x].rank == elts[y].rank) + elts[y].rank++; + } + num--; +} + +#endif diff --git a/multi-segmentation/histDist.m b/multi-segmentation/histDist.m new file mode 100644 index 0000000..c217b0c --- /dev/null +++ b/multi-segmentation/histDist.m @@ -0,0 +1,16 @@ +function dist = histDist(h1, h2, method) + if nargin == 2 + method = 'x2'; + end + + % normalize + h1 = h1 / (sum(h1(:)) + eps); + h2 = h2 / (sum(h2(:)) + eps); + + dist = 0; + + switch method + case 'x2' + dist = sum((h1-h2).^2 ./ (h2+h1+eps)) / 2; + end + \ No newline at end of file diff --git a/multi-segmentation/mexMergeAdjRegs_Felzenszwalb.cpp b/multi-segmentation/mexMergeAdjRegs_Felzenszwalb.cpp new file mode 100644 index 0000000..5e8894d --- /dev/null +++ b/multi-segmentation/mexMergeAdjRegs_Felzenszwalb.cpp @@ -0,0 +1,77 @@ +#include "mex.h" +#include "new-segment-graph.h" + +void mexFunction( int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[] ) +{ + double *adjlist = mxGetPr( prhs[0] ); + double *pE = mxGetPr( prhs[1] ); + mwSize nadj = mxGetM( prhs[0] ); + mwSize dummy = mxGetN( prhs[0] ); + if( nadj != mxGetM(prhs[1]) || dummy != 2 ) + { + mexErrMsgTxt( "Error in merging adjacent regions: invalid input" ); + } + + double numSuperpixel = mxGetScalar( prhs[2] ); + double *threshold = mxGetPr( prhs[3] ); + int m = mxGetM( prhs[3] ); + int n = mxGetN( prhs[3] ); + int numSegmentation = m * n; + + double *supSize = mxGetPr( prhs[4] ); + if( numSuperpixel != mxGetM(prhs[4]) ) + { + mexErrMsgTxt( "Error in merging adjacent regions: invalid input" ); + } + + int *newSupSize = new int[ static_cast(numSuperpixel) ]; + for( int ix = 0; ix < numSuperpixel; ++ix ) + newSupSize[ix] = static_cast( supSize[ix] ); + + mexPrintf( "nadj: %d\n", nadj ); + mexPrintf( "m: %d, n: %d\n", m, n ); + + mexPrintf( "numSuperpixel: %.1f, numSegmentation: %d\n", numSuperpixel, numSegmentation ); + + plhs[0] = mxCreateDoubleMatrix( numSuperpixel, numSegmentation, mxREAL ); + double *label = mxGetPr( plhs[0] ); + + edge *edges = new edge[nadj]; + for( int e = 0; e < nadj; ++e ) + { + edges[e].a = adjlist[e] - 1; + edges[e].b = adjlist[e + nadj] - 1; + edges[e].w = 1 - pE[e]; + } + + static const int min_size = 300; + int num = 0; + for( int s = 0; s < numSegmentation; ++s ) + { + float t = static_cast( threshold[s] ); + // mexPrintf( "\t*** t: %.3f\n", t ); + + universe *u = segment_graph( numSuperpixel, nadj, edges, t, newSupSize ); + + // force minimum size of segmentation + for( int e = 0; e < nadj; ++e ) + { + int a = u->find( edges[e].a ); + int b = u->find( edges[e].b ); + if ((a != b) && ((u->size(a) < min_size) || (u->size(b) < min_size))) + u->join(a, b); + } + + + for( int ix = 0; ix < numSuperpixel; ++ix ) + { + label[num] = u->find( ix ); + ++num; + } + + delete u; + } + + delete [] newSupSize; + delete edges; +} \ No newline at end of file diff --git a/multi-segmentation/mexMergeAdjRegs_Felzenszwalb.mexw64 b/multi-segmentation/mexMergeAdjRegs_Felzenszwalb.mexw64 new file mode 100644 index 0000000..be84459 Binary files /dev/null and b/multi-segmentation/mexMergeAdjRegs_Felzenszwalb.mexw64 differ diff --git a/multi-segmentation/mexMergeAdjacentRegions.cpp b/multi-segmentation/mexMergeAdjacentRegions.cpp new file mode 100644 index 0000000..e176aeb --- /dev/null +++ b/multi-segmentation/mexMergeAdjacentRegions.cpp @@ -0,0 +1,58 @@ +#include "mex.h" +#include "disjoint-set.h" +#include +using std::abs; + +void mexFunction( int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[] ) +{ + double *dismat = mxGetPr( prhs[0] ); + double *threshold = mxGetPr( prhs[1] ); + + mwSize m = mxGetM( prhs[0] ); + mwSize n = mxGetN( prhs[0] ); + if( m != n ) + { + mexErrMsgTxt( "Error: provided distance matrix must be square.\n" ); + } + + int numSuperpixel = m; + + mwSize numSegmentation = mxGetM( prhs[1] ) * mxGetN( prhs[1] ); + mexPrintf( "numSegmentation: %d\n", numSegmentation ); + + plhs[0] = mxCreateDoubleMatrix( numSuperpixel, numSegmentation, mxREAL ); + double *label = mxGetPr( plhs[0] ); + + int num = 0; + for( int s = 0; s < numSegmentation; ++s ) + { + double t = threshold[s]; + universe *u = new universe( numSuperpixel ); + for( int ix = 0; ix < numSuperpixel * numSuperpixel; ++ix ) + { + int r1 = ix % m; + int r2 = ix / m; + + int a = u->find( r1 ); + int b = u->find( r2 ); + if( dismat[ix] <= t && abs(dismat[ix]) > 1e-5 && a != b ) + { + u->join(a, b); + } + } + + mexPrintf( "\tafter merging, there are %d regions.\n", u->num_sets() ); + + for( int ix = 0; ix < numSuperpixel; ++ix ) + { + label[num] = u->find( ix ); + ++num; + } + + delete u; + } + if( num != numSuperpixel * numSegmentation ) + { + mexErrMsgTxt( "Error in generating multiple segmentations." ); + } +} \ No newline at end of file diff --git a/multi-segmentation/mexMergeAdjacentRegions2.cpp b/multi-segmentation/mexMergeAdjacentRegions2.cpp new file mode 100644 index 0000000..d158464 --- /dev/null +++ b/multi-segmentation/mexMergeAdjacentRegions2.cpp @@ -0,0 +1,65 @@ +#include "mex.h" +#include "disjoint-set.h" +#include +using std::abs; + +void mexFunction( int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[] ) +{ + double *adjlist = mxGetPr( prhs[0] ); + double *pE = mxGetPr( prhs[1] ); + mwSize nadj = mxGetM( prhs[0] ); + mwSize dummy = mxGetN( prhs[0] ); + if( nadj != mxGetM(prhs[1]) || dummy != 2 ) + { + mexErrMsgTxt( "Error in merging adjacent regions: invalid input" ); + } + + double numSuperpixel = mxGetScalar( prhs[2] ); + double *threshold = mxGetPr( prhs[3] ); + int m = mxGetM( prhs[3] ); + int n = mxGetN( prhs[3] ); + int numSegmentation = m * n; + + mexPrintf( "nadj: %d\n", nadj ); + mexPrintf( "m: %d, n: %d\n", m, n ); + + mexPrintf( "numSuperpixel: %.1f, numSegmentation: %d\n", numSuperpixel, numSegmentation ); + + plhs[0] = mxCreateDoubleMatrix( numSuperpixel, numSegmentation, mxREAL ); + double *label = mxGetPr( plhs[0] ); + + int num = 0; + for( int s = 0; s < numSegmentation; ++s ) + { + double t = threshold[s]; + mexPrintf( "\t*** threshold: %.3f\n", t ); + universe *u = new universe( numSuperpixel ); + for( int ix = 0; ix < nadj; ++ix ) + { + int s1 = adjlist[ix] - 1; + int s2 = adjlist[ix + nadj] - 1; + + int a = u->find( s1 ); + int b = u->find( s2 ); + // mexPrintf( "\t\t*** pE: %.3f\n", pE[ix] ); + if( pE[ix] >= t && a != b ) + { + u->join(a, b); + } + } + + mexPrintf( "\tafter merging, there are %d regions.\n", u->num_sets() ); + + for( int ix = 0; ix < numSuperpixel; ++ix ) + { + label[num] = u->find( ix ); + ++num; + } + + delete u; + } + if( num != numSuperpixel * numSegmentation ) + { + mexErrMsgTxt( "Error in generating multiple segmentations." ); + } +} diff --git a/multi-segmentation/new-segment-graph.h b/multi-segmentation/new-segment-graph.h new file mode 100644 index 0000000..e5f13bc --- /dev/null +++ b/multi-segmentation/new-segment-graph.h @@ -0,0 +1,85 @@ +/* +Copyright (C) 2006 Pedro Felzenszwalb + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef SEGMENT_GRAPH +#define SEGMENT_GRAPH + +#include +#include +#include "disjoint-set.h" + +// threshold function +#define THRESHOLD(size, c) (c/size) + +typedef struct { + float w; + int a, b; +} edge; + +bool operator<(const edge &a, const edge &b) { + return a.w < b.w; +} + +/* +* Segment a graph +* +* Returns a disjoint-set forest representing the segmentation. +* +* num_vertices: number of vertices in graph. +* num_edges: number of edges in graph +* edges: array of edges. +* c: constant for treshold function. +*/ +universe *segment_graph(int num_vertices, int num_edges, edge *edges, + float c, int *size_array) { + // sort edges by weight + std::sort(edges, edges + num_edges); + + // make a disjoint-set forest + // universe *u = new universe(num_vertices); + // take the amout of pixels into consideration + universe *u = new universe(num_vertices, size_array); + + // init thresholds + float *threshold = new float[num_vertices]; + for (int i = 0; i < num_vertices; i++) + threshold[i] = THRESHOLD(u->size(i),c); + + // for each edge, in non-decreasing weight order... + for (int i = 0; i < num_edges; i++) { + edge *pedge = &edges[i]; + + // components conected by this edge + int a = u->find(pedge->a); + int b = u->find(pedge->b); + if (a != b) { + if ((pedge->w <= threshold[a]) && + (pedge->w <= threshold[b])) { + u->join(a, b); + a = u->find(a); + threshold[a] = pedge->w + THRESHOLD(u->size(a), c); + } + } + } + + // free up + delete threshold; + return u; +} + +#endif diff --git a/multi-segmentation/segment-graph.h b/multi-segmentation/segment-graph.h new file mode 100644 index 0000000..0768552 --- /dev/null +++ b/multi-segmentation/segment-graph.h @@ -0,0 +1,83 @@ +/* +Copyright (C) 2006 Pedro Felzenszwalb + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef SEGMENT_GRAPH +#define SEGMENT_GRAPH + +#include +#include +#include "disjoint-set.h" + +// threshold function +#define THRESHOLD(size, c) (c/size) + +typedef struct { + float w; + int a, b; +} edge; + +bool operator<(const edge &a, const edge &b) { + return a.w < b.w; +} + +/* + * Segment a graph + * + * Returns a disjoint-set forest representing the segmentation. + * + * num_vertices: number of vertices in graph. + * num_edges: number of edges in graph + * edges: array of edges. + * c: constant for treshold function. + */ +universe *segment_graph(int num_vertices, int num_edges, edge *edges, + float c) { + // sort edges by weight + std::sort(edges, edges + num_edges); + + // make a disjoint-set forest + universe *u = new universe(num_vertices); + + // init thresholds + float *threshold = new float[num_vertices]; + for (int i = 0; i < num_vertices; i++) + threshold[i] = THRESHOLD(1,c); + + // for each edge, in non-decreasing weight order... + for (int i = 0; i < num_edges; i++) { + edge *pedge = &edges[i]; + + // components conected by this edge + int a = u->find(pedge->a); + int b = u->find(pedge->b); + if (a != b) { + if ((pedge->w <= threshold[a]) && + (pedge->w <= threshold[b])) { + u->join(a, b); + a = u->find(a); + threshold[a] = pedge->w + THRESHOLD(u->size(a), c); + } + } + } + + // free up + delete threshold; + return u; +} + +#endif diff --git a/multi-segmentation/trimapGenerateMultipleSegmentations.asv b/multi-segmentation/trimapGenerateMultipleSegmentations.asv new file mode 100644 index 0000000..be090a2 --- /dev/null +++ b/multi-segmentation/trimapGenerateMultipleSegmentations.asv @@ -0,0 +1,26 @@ +function labels = trimapGenerateMultipleSegmentations( image, imsegs ) + adjmat = imsegs.adjmat; + segimage = imsegs.segimage; + + image_lab = rgb2lab( image ); + bins = [8 16 16]; + Q = computeQuantMatrix( image_lab, bins ); + region_hist = computeRegionHist(Q, bins, segimage); + + num_region = max(segimage(:)); + region_dist = zeros(num_region, num_region); + + ind = find(adjmat); + for ix = 1 : length(ind) + [x y] = ind2sub([num_region, num_region], ind(ix)); + region_dist(x, y) = histDist(region_hist(x,:), region_hist(y,:)); + end + + t = 0.2:0.05:0.8; + labels = mexMergeAdjacentRegions(region_dist, t); + + for jx = 1 : size(labels, 2) + L = labels(:, jx); + temp_label = unique(L); + for ix = 1 : size(labels, 1) + \ No newline at end of file diff --git a/multi-segmentation/trimapGenerateMultipleSegmentations.m b/multi-segmentation/trimapGenerateMultipleSegmentations.m new file mode 100644 index 0000000..5f8d710 --- /dev/null +++ b/multi-segmentation/trimapGenerateMultipleSegmentations.m @@ -0,0 +1,30 @@ +function labels = trimapGenerateMultipleSegmentations( image, imsegs ) + adjmat = imsegs.adjmat; + segimage = imsegs.segimage; + + image_lab = rgb2lab( image ); + bins = [8 16 16]; + Q = computeQuantMatrix( image_lab, bins ); + region_hist = computeRegionHist(Q, bins, segimage); + + num_region = max(segimage(:)); + region_dist = zeros(num_region, num_region); + + ind = find(adjmat); + for ix = 1 : length(ind) + [x y] = ind2sub([num_region, num_region], ind(ix)); + region_dist(x, y) = histDist(region_hist(x,:), region_hist(y,:)); + end + + t = 0.2:0.05:0.8; + labels = mexMergeAdjacentRegions(region_dist, t); + + for jx = 1 : size(labels, 2) + L = labels(:, jx); + temp_label = unique(L); + for ix = 1 : length(temp_label) + idx = find( L == temp_label(ix) ); + labels(idx, jx) = ix; + end + end + \ No newline at end of file diff --git a/multi-segmentation/trimapGenerateMultipleSegmentations2.asv b/multi-segmentation/trimapGenerateMultipleSegmentations2.asv new file mode 100644 index 0000000..97f281c --- /dev/null +++ b/multi-segmentation/trimapGenerateMultipleSegmentations2.asv @@ -0,0 +1,40 @@ +function [labels adjlist = trimapGenerateMultipleSegmentations2( image, imsegs, edgeClassifier, ecal ) + adjmat = imsegs.adjmat; + segimage = imsegs.segimage; + + spFeat = getSuperpixelData(image, imsegs); + [edgeFeat adjlist] = getEdgeData( imsegs, spFeat ); +% spFeat = mcmcGetSuperpixelData( image, imsegs ); +% [edgeFeat adjlist] = mcmcGetEdgeData( imsegs, spFeat ); + pE=test_boosted_dt_mc(edgeClassifier,edgeFeat); + pE = 1 ./ (1+exp(ecal(1)*pE+ecal(2))); + + nSuperpixel = max(segimage(:)); + labels = mexMergeAdjacentRegions2( adjlist, pE, nSuperpixel, 0.2 ); + +% image_lab = rgb2lab( image ); +% bins = [8 16 16]; +% Q = computeQuantMatrix( image_lab, bins ); +% region_hist = computeRegionHist(Q, bins, segimage); +% +% num_region = max(segimage(:)); +% region_dist = zeros(num_region, num_region); +% +% ind = find(adjmat); +% for ix = 1 : length(ind) +% [x y] = ind2sub([num_region, num_region], ind(ix)); +% region_dist(x, y) = histDist(region_hist(x,:), region_hist(y,:)); +% end +% +% t = 0.2:0.05:0.8; +% labels = mexMergeAdjacentRegions(region_dist, t); + + for jx = 1 : size(labels, 2) + L = labels(:, jx); + temp_label = unique(L); + for ix = 1 : length(temp_label) + idx = find( L == temp_label(ix) ); + labels(idx, jx) = ix; + end + end + \ No newline at end of file diff --git a/multi-segmentation/trimapGenerateMultipleSegmentations2.m b/multi-segmentation/trimapGenerateMultipleSegmentations2.m new file mode 100644 index 0000000..7c8e843 --- /dev/null +++ b/multi-segmentation/trimapGenerateMultipleSegmentations2.m @@ -0,0 +1,41 @@ +function [labels adjlist pE] = trimapGenerateMultipleSegmentations2( image, imsegs, edgeClassifier, ecal, t, size_array ) + adjmat = imsegs.adjmat; + segimage = imsegs.segimage; + + spFeat = getSuperpixelData_ver2(image, imsegs); + [edgeFeat adjlist] = getEdgeData_ver2( imsegs, spFeat ); +% spFeat = mcmcGetSuperpixelData( image, imsegs ); +% [edgeFeat adjlist] = mcmcGetEdgeData( imsegs, spFeat ); + pE=test_boosted_dt_mc(edgeClassifier,edgeFeat); + pE = 1 ./ (1+exp(ecal(1)*pE+ecal(2))); + + nSuperpixel = max(segimage(:)); + % labels = mexMergeAdjacentRegions2( adjlist, pE, nSuperpixel, t ); + labels = mexMergeAdjRegs_Felzenszwalb( adjlist, pE, nSuperpixel, t, size_array ); + +% image_lab = rgb2lab( image ); +% bins = [8 16 16]; +% Q = computeQuantMatrix( image_lab, bins ); +% region_hist = computeRegionHist(Q, bins, segimage); +% +% num_region = max(segimage(:)); +% region_dist = zeros(num_region, num_region); +% +% ind = find(adjmat); +% for ix = 1 : length(ind) +% [x y] = ind2sub([num_region, num_region], ind(ix)); +% region_dist(x, y) = histDist(region_hist(x,:), region_hist(y,:)); +% end +% +% t = 0.2:0.05:0.8; +% labels = mexMergeAdjacentRegions(region_dist, t); + + for jx = 1 : size(labels, 2) + L = labels(:, jx); + temp_label = unique(L); + for ix = 1 : length(temp_label) + idx = find( L == temp_label(ix) ); + labels(idx, jx) = ix; + end + end + \ No newline at end of file diff --git a/randomforest-matlab/RF_Class_C/Compile_Check b/randomforest-matlab/RF_Class_C/Compile_Check new file mode 100644 index 0000000..0c26711 --- /dev/null +++ b/randomforest-matlab/RF_Class_C/Compile_Check @@ -0,0 +1,24 @@ +# Note will work only on linux +# check for various not-visible errors by using valgrind +# this code just profiles the timings/memory leaks of the code. +# checked on linux with valgrind and kcachegrind installed +# Added by Abhishek Jaiantilal ( abhishek.jaiantilal@colorado.edu ) +# run as: sh Compile_Check + + +rm callgrind.out.* +#g++ cokus.cpp reg_RF.cpp diabetes_C_wrapper.cpp -g -pg -funroll-loops -msse3 +rm twonorm_test -rf +make twonorm +#g++ twonorm_C_wrapper.cpp rfutils.o rfsub.o classRF.o cokus.o -g -pg -funroll-loops -msse3 + +#to check timings +#valgrind -v --error-limit=no --tool=callgrind --dump-instr=yes ./twonorm_test + +#to check mem-usage +valgrind -v --error-limit=no --tool=memcheck --track-origins=yes --leak-check=full --show-reachable=yes --num-callers=1000 ./twonorm_test + + +# when using timings, the below tool helps +# kcachegrind& + diff --git a/randomforest-matlab/RF_Class_C/MATLAB/RFTreeNode.m b/randomforest-matlab/RF_Class_C/MATLAB/RFTreeNode.m new file mode 100644 index 0000000..245854e --- /dev/null +++ b/randomforest-matlab/RF_Class_C/MATLAB/RFTreeNode.m @@ -0,0 +1,40 @@ +classdef RFTreeNode < handle + properties(SetAccess = public) + isTerminal + nodeClass + bestVar + bestSplit + leftChild + rightChild + end + + methods +% function node = RFTreeNode() +% node.isTerminal = false; +% node.nodeClass = 0; +% node.bestVar = 0; +% node.bestSplit = 0; +% end + + function node = RFTreeNode( isTerminal_, nodeClass_, bestVar_, bestSplit_ ) + node.isTerminal = isTerminal_; + node.nodeClass = nodeClass_; + node.bestVar = bestVar_; + node.bestSplit = bestSplit_; + end + +% function insertLeftChild( parentNode, newNode ) +% if ~isempty(parentNode.leftChild) +% error( 'There already exists a left child for the specified parent node.' ); +% end +% parentNode.leftChild = newNode; +% end +% +% function insertRightChild( parentNode, newNode ) +% if ~isempty(parentNode.rightChild) +% error( 'There already exists a right child for the specified parent node.' ); +% end +% parentNode.rightChild = newNode; +% end + end % end of methods of class RFTreeNode +end % end of classdef \ No newline at end of file diff --git a/randomforest-matlab/RF_Class_C/MATLAB/buildForest.m b/randomforest-matlab/RF_Class_C/MATLAB/buildForest.m new file mode 100644 index 0000000..4c40f3a --- /dev/null +++ b/randomforest-matlab/RF_Class_C/MATLAB/buildForest.m @@ -0,0 +1,114 @@ +function [rf rootIndex] = buildForest( rf_model ) +%(treemap, nodestatus, nodeclass, bestvar, xbestsplit, ndbigtree) + % extract all the fields + treemap = double(rf_model.treemap); + nodestatus = double(rf_model.nodestatus); + nodeclass = double(rf_model.nodeclass); + bestvar = double(rf_model.bestvar); + xbestsplit = double(rf_model.xbestsplit); + ndbigtree = double(rf_model.ndbigtree); + + [nrnodes ntree] = size(nodestatus); + nnodes = sum(ndbigtree(1:ntree)); + rf = cell(1, nnodes); + rootIndex = zeros(1, ntree); + maskIndex = 0 * nodestatus; + + % first create nodes + index = 1; + for n = 1 : ntree + rootIndex(n) = index; + for jx = 1 : nrnodes + if nodestatus(jx, n) ~= 0 + rf{index} = RFTreeNode( nodestatus(jx,n) == -1, nodeclass(jx, n),... + bestvar(jx, n), xbestsplit(jx, n) ); + maskIndex(jx, n) = index; + index = index + 1; + end + end + end + + assert( nnodes == index - 1 ); + + % then create classification trees by linking the nodes + for n = 1 : ntree + for jx = 1 : nrnodes + if nodestatus(jx, n) ~= 0 + nodeIndex = maskIndex(jx, n); + treemat = treemap(:, 2*n-1 : 2*n); + treemat = treemat(:); + + if treemat(2*jx-1) > 0 + leftChildIndex = maskIndex( treemat(2*jx-1), n ); + % rf{nodeIndex}.insertLeftChild( rf{leftChildIndex} ); + rf{nodeIndex}.leftChild = leftChildIndex; + end + + if treemat(2*jx) > 0 + rightChildIndex = maskIndex( treemat(2*jx), n ); + % rf{nodeIndex}.insertRightChild( rf{rightChildIndex} ); + rf{nodeIndex}.rightChild = rightChildIndex; + end + end + end + end +end + +function buildTree(nodes, r, treemat) + +end + +% r = RFTreeNode; +% r.bestVar = 'r'; +% n1 = RFTreeNode; +% n1.bestVar = 'n1'; +% n2 = RFTreeNode; +% n2.bestVar = 'n2'; +% n3 = RFTreeNode; +% n3.bestVar = 'n3'; +% n3.isTerminal = true; +% n4 = RFTreeNode; +% n4.bestVar = 'n4'; +% n5 = RFTreeNode; +% n5.bestVar = 'n5'; +% n5.isTerminal = true; +% n6 = RFTreeNode; +% n6.bestVar = 'n6'; +% n6.isTerminal = true; +% n7 = RFTreeNode; +% n7.bestVar = 'n7'; +% n7.isTerminal = true; +% n8 = RFTreeNode; +% n8.bestVar = 'n8'; +% n8.isTerminal = true; +% +% r.insertLeftChild( n1 ); +% r.insertRightChild( n2 ); +% n1.insertLeftChild( n3 ); +% n1.insertRightChild( n4 ); +% n4.insertLeftChild( n7 ); +% n4.insertRightChild( n8 ); +% n2.insertLeftChild( n5 ); +% n2.insertRightChild( n6 ); +% +% % rf = cell(1, 8); +% % rf{1} = n1; +% % rf{2} = n2; +% % rf{3} = n3; +% % rf{4} = n4; +% % rf{5} = n5; +% % rf{6} = n6; +% % rf{7} = n7; +% % rf{8} = n8; +% % rf{9} = r; +% +% % rf = zeros(1, 8); +% rf(1) = r; +% rf(2) = n1; +% rf(3) = n2; +% rf(4) = n3; +% rf(5) = n4; +% rf(6) = n5; +% rf(7) = n6; +% rf(8) = n7; +% rf(9) = n8; diff --git a/randomforest-matlab/RF_Class_C/MATLAB/classRFPredict.m b/randomforest-matlab/RF_Class_C/MATLAB/classRFPredict.m new file mode 100644 index 0000000..ae8731a --- /dev/null +++ b/randomforest-matlab/RF_Class_C/MATLAB/classRFPredict.m @@ -0,0 +1,35 @@ +function [yhat prdt_per_tree] = classRFPredict( x, rf ) + [nsample ndim] = size(x); + ntree = rf.ntree; + prdt_per_tree = zeros(nsample, ntree); + + ntree = rf.ntree; + + for ix = 1 : nsample + for n = 1 : ntree + k = 1; + while rf.nodestatus{n}(k) ~= -1 + if x(ix, rf.bestvar{n}(k)) <= rf.bestsplit{n}(k) + k = rf.treemap{n}(k, 1); + else + k = rf.treemap{n}(k, 2); + end + end + prdt_per_tree(ix, n) = rf.nodeclass{n}(k); + % fprintf( 'ix: %d, n: %d\n', ix, n ); + end + end + + yhat = zeros(nsample, 1); + for ix = 1 : nsample + % for n = 1 : ntree + % when there are equal votes for the classes, it favors the + % first class + yhat(ix) = mode( prdt_per_tree(ix, :) ); + % end + end + + for ix = 1 : length(rf.new_labels) + yhat( yhat == rf.new_labels(ix) ) = rf.orig_labels(ix); + end +end \ No newline at end of file diff --git a/randomforest-matlab/RF_Class_C/MATLAB/compressClassModel.m b/randomforest-matlab/RF_Class_C/MATLAB/compressClassModel.m new file mode 100644 index 0000000..7622f96 --- /dev/null +++ b/randomforest-matlab/RF_Class_C/MATLAB/compressClassModel.m @@ -0,0 +1,39 @@ +function m = compressClassModel( model ) + maxnnode = max(model.ndbigtree(:)); + ntree = model.ntree; + + m.ntree = ntree; + m.nclass = model.nclass; + m.orig_labels = model.orig_labels; + m.new_labels = model.new_labels; + m.mtry = model.mtry; + m.nrnodes = maxnnode; + m.classwt = model.classwt; + m.cutoff = model.cutoff; + + m.treemap = int32( zeros(maxnnode, ntree*2) ); + m.nodestatus = int32( zeros(maxnnode, ntree) ); + m.nodeclass = int32( zeros(maxnnode, ntree) ); + m.bestvar = int32( zeros(maxnnode, ntree) ); + m.xbestsplit = zeros(maxnnode, ntree); + m.ndbigtree = model.ndbigtree(1:ntree); + + for n = 1 : ntree + nnode = model.ndbigtree(n); + m.nodestatus(1:nnode, n) = model.nodestatus(1:nnode, n); + m.nodeclass(1:nnode, n) = model.nodeclass(1:nnode, n); + m.bestvar(1:nnode, n) = model.bestvar(1:nnode, n); + m.xbestsplit(1:nnode, n) = model.xbestsplit(1:nnode, n); + + treemat = model.treemap(:, 2*n-1 : 2*n); + m.treemap = reshape(treemat(1:2*maxnnode), [maxnnode 2]); +% for jx = 1 : nnode +% +% m.treemap(jx, 2*n-1) = treemat(2*jx-1); % left child +% m.treemap(jx, 2*n) = treemat(2*jx); % right child +% end +% jx = 1 : 1 : nnode; +% m.treemap(jx, 2*n-1) = treemat(2*jx-1); % left child +% m.treemap(jx, 2*n) = treemat(2*jx); % right child + end +end \ No newline at end of file diff --git a/randomforest-matlab/RF_Class_C/MATLAB/compressModel.m b/randomforest-matlab/RF_Class_C/MATLAB/compressModel.m new file mode 100644 index 0000000..75e3e7f --- /dev/null +++ b/randomforest-matlab/RF_Class_C/MATLAB/compressModel.m @@ -0,0 +1,38 @@ +function m = compressModel( rf_model ) + ntree = rf_model.ntree; + treemap = cell(1, ntree); + nodestatus = cell(1, ntree); + nodeclass = cell(1, ntree); + bestvar = cell(1, ntree); + bestsplit = cell(1, ntree); + treesize = rf_model.ndbigtree; + + for ix = 1 : ntree + nnode = treesize(ix); + nodestatus{ix} = rf_model.nodestatus(1:nnode, ix); + nodeclass{ix} = rf_model.nodeclass(1:nnode, ix); + bestvar{ix} = rf_model.bestvar(1:nnode, ix); + bestsplit{ix} = rf_model.xbestsplit(1:nnode, ix); + + temp_treemap = zeros(nnode, 2); + treemat = rf_model.treemap(:, 2*ix-1 : 2*ix); + treemat = treemat(:); + for jx = 1 : nnode + temp_treemap(jx, 1) = treemat(2*jx -1); + temp_treemap(jx, 2) = treemat(2*jx); + end + treemap{ix} = temp_treemap; + end + + m.treemap = treemap; + m.nodestatus = nodestatus; + m.nodeclass = nodeclass; + m.bestvar = bestvar; + m.bestsplit = bestsplit; + m.ntree = ntree; + + m.orig_labels = rf_model.orig_labels; + m.new_labels = rf_model.new_labels; + m.nclass = rf_model.nclass; + m.mtry = rf_model.mtry; +end \ No newline at end of file diff --git a/randomforest-matlab/RF_Class_C/MATLAB/dispTree.m b/randomforest-matlab/RF_Class_C/MATLAB/dispTree.m new file mode 100644 index 0000000..3f39872 --- /dev/null +++ b/randomforest-matlab/RF_Class_C/MATLAB/dispTree.m @@ -0,0 +1,2 @@ +function dispTree( t ) +end \ No newline at end of file diff --git a/randomforest-matlab/RF_Class_C/MATLAB/mexBuildTree.cpp b/randomforest-matlab/RF_Class_C/MATLAB/mexBuildTree.cpp new file mode 100644 index 0000000..999f973 --- /dev/null +++ b/randomforest-matlab/RF_Class_C/MATLAB/mexBuildTree.cpp @@ -0,0 +1,31 @@ +#include "mex.h" +#include "matrix.h" +#include +using std::stack; + +// tree = mexFunction( treemap, bestvar, bestsplit, nodestatus, nodeclass ); +void mexFunction( int nlhs, mxArray *plhs[], + int nrhs, const mxArray *prhs[] ) +{ + double *treemap, *bestvar, *bestsplit, *nodestatus, *nodeclass; + + treemap = static_cast( mxGetData(prhs[0]) ); + bestvar = static_cast( mxGetData(prhs[1]) ); + bestsplit = static_cast( mxGetData(prhs[2]) ); + nodestatus = static_cast( mxGetData(prhs[3]) ); + nodeclass = static_cast( mxGetData(prhs[4]) ); + + int nrnodes = mxGetM( prhs[1] ); + + int k = 0; + stack toVisitNode; + toVisitNode.push( k ); + + while( !toVisitNode.empty() ) + { + int ix = toVisitNode.top(); + toVisitNode.pop(); + + + } +} \ No newline at end of file diff --git a/randomforest-matlab/RF_Class_C/MATLAB/mexDispTree.cpp b/randomforest-matlab/RF_Class_C/MATLAB/mexDispTree.cpp new file mode 100644 index 0000000..c19183c --- /dev/null +++ b/randomforest-matlab/RF_Class_C/MATLAB/mexDispTree.cpp @@ -0,0 +1,47 @@ +#include "mex.h" +#include "matrix.h" +#include +using namespace std; + +void mexFunction( int nlhs, mxArray *plhs[], + int nrhs, const mxArray *prhs[] ) +{ + int r = 0; + char bestVar[100]; + mxArray *pLeftChild, *pRightChild; + + mxArray *pIsTerminal = mxGetProperty( prhs[0], r, "isTerminal" ); + mxLogical *isTerminal = mxGetLogicals( pIsTerminal ); + mxArray *pBestVar = mxGetProperty( prhs[0], r, "bestVar" ); + mxGetString( pBestVar, bestVar, 100 ); + mexPrintf( "%s, %s\n", bestVar, (isTerminal[0] ? "true" : "false") ); + + pRightChild = mxGetProperty( prhs[0], r, "rightChild" ); + pLeftChild = mxGetProperty( prhs[0], r, "leftChild" ); + + stack toVisitNode; + if( !mxIsEmpty(pRightChild) ) + toVisitNode.push( pRightChild ); + if( !mxIsEmpty(pLeftChild) ) + toVisitNode.push( pLeftChild ); + + while( !toVisitNode.empty() ) + { + mxArray *pNode = toVisitNode.top(); + toVisitNode.pop(); + + pIsTerminal = mxGetProperty( pNode, 0, "isTerminal" ); + isTerminal = mxGetLogicals( pIsTerminal ); + pBestVar = mxGetProperty( pNode, 0, "bestVar" ); + mxGetString( pBestVar, bestVar, 100 ); + mexPrintf( "%s, %s\n", bestVar, (isTerminal[0] ? "terminal" : "non-terminal") ); + + pRightChild = mxGetProperty( pNode, 0, "rightChild" ); + pLeftChild = mxGetProperty( pNode, 0, "leftChild" ); + + if( !mxIsEmpty(pRightChild) ) + toVisitNode.push( pRightChild ); + if( !mxIsEmpty(pLeftChild) ) + toVisitNode.push( pLeftChild ); + } +} \ No newline at end of file diff --git a/randomforest-matlab/RF_Class_C/MATLAB/mexDispTree.mexw64 b/randomforest-matlab/RF_Class_C/MATLAB/mexDispTree.mexw64 new file mode 100644 index 0000000..01ac46c Binary files /dev/null and b/randomforest-matlab/RF_Class_C/MATLAB/mexDispTree.mexw64 differ diff --git a/randomforest-matlab/RF_Class_C/Makefile b/randomforest-matlab/RF_Class_C/Makefile new file mode 100644 index 0000000..07bfd73 --- /dev/null +++ b/randomforest-matlab/RF_Class_C/Makefile @@ -0,0 +1,78 @@ +# Makefile to compile mex/standalone version to Andy Liaw et al.'s C code (used in R package randomForest) +# Added by Abhishek Jaiantilal ( abhishek.jaiantilal@colorado.edu ) +# License: GPLv2 +# Version: 0.02 + + +# Makefile to generate mex or standalone. will work in cygwin (for windows) or linux +# +# make mex: generates matlab mex files which can be easily called up +# make diabetes: generates a standalone file to test on the pima indian +# diabetes dataset. +# + + +#source directory +SRC=src/ + +#temporary .o output directory +BUILD=tempbuild/ + +CC=g++ +FORTRAN=gfortran # or g77 whichever is present +CFLAGS= -fpic -O2 -funroll-loops -msse3#-g -Wall +FFLAGS=-O2 -fpic #-g +LDFORTRAN=#-gfortran +MEXFLAGS=-g +all: clean classTree cokus rfsub rfutils classRF twonorm mex +#all: regTree regrf rf rfsub rfutils classTree shared mex-setup + +mex: clean classTree2 cokus rfsub rfutils mex_classRF + +twonorm: clean cokus classTree2 rfsub rfutils + echo 'Generating twonorm executable' + $(CC) $(CFLAGS) -c $(SRC)classRF.cpp -o $(BUILD)classRF.o + $(CC) $(CFLAGS) $(SRC)twonorm_C_wrapper.cpp $(SRC)classRF.cpp $(BUILD)classTree.o $(BUILD)rfutils.o rfsub.o $(BUILD)cokus.o -o twonorm_test -lgfortran -lm + +mex_classRF: $(SRC)classRF.cpp $(SRC)mex_ClassificationRF_train.cpp $(SRC)mex_ClassificationRF_predict.cpp + echo 'Generating Mex' +# /usr/local/MATLAB/R2012a/bin/mex -c $(SRC)classRF.cpp -outdir $(BUILD)classRF.o -DMATLAB $(MEXFLAGS) +# /usr/local/MATLAB/R2012a/bin/mex $(SRC)mex_ClassificationRF_train.cpp $(SRC)classRF.cpp $(BUILD)classTree.o $(BUILD)rfutils.o rfsub.o $(BUILD)cokus.o -o mexClassRF_train -lgfortran -lm -DMATLAB $(MEXFLAGS) + /usr/local/MATLAB/R2012a/bin/mex $(SRC)mex_ClassificationRF_predict.cpp $(SRC)classRF.cpp $(BUILD)classTree2.o $(BUILD)rfutils.o rfsub.o $(BUILD)cokus.o -o mexClassRF_predict -lgfortran -lm -DMATLAB $(MEXFLAGS) + +cokus: $(SRC)cokus.cpp + echo 'Compiling Cokus (random number generator)' + $(CC) $(CFLAGS) -c $(SRC)cokus.cpp -o $(BUILD)cokus.o + +classRF: $(SRC)classRF.cpp + $(CC) $(CFLAGS) -c $(SRC)classRF.cpp -o $(BUILD)classRF.o +# $(CC) $(CFLAGS) classRF.o classTree.o rfutils.o rfsub.o cokus.o -o classRF $(LDFORTRAN) + +classTree: $(SRC)classTree.cpp + echo 'Compiling classTree.cpp' + $(CC) $(CFLAGS) -c $(SRC)classTree.cpp -o $(BUILD)classTree.o + +classTree2: $(SRC)classTree2.cpp + echo 'Compiling classTree.cpp' + $(CC) $(CFLAGS) -c $(SRC)classTree2.cpp -o $(BUILD)classTree2.o + + +rfsub: $(SRC)rfsub.f + echo 'Compiling rfsub.f (fortran subroutines)' + $(FORTRAN) $(FFLAGS) -c $(SRC)rfsub.f -o rfsub.o +#for compiling via a cross compiler for 64 bit +# x86_64-pc-mingw32-gfortran -c $(SRC)rfsub.f -o rfsub.o + +rfutils: $(SRC)rfutils.cpp + echo 'Compiling rfutils.cpp' + $(CC) $(CFLAGS) -c $(SRC)rfutils.cpp -o $(BUILD)rfutils.o + + +clean: + rm twonorm_test -rf + rm $(BUILD)*.o *.o -rf + rm *~ -rf + rm *.mexw32 twonorm_test -rf + rm *.mexa64 -rf + rm classRF -rf + rm *.exe -rf diff --git a/randomforest-matlab/RF_Class_C/Makefile.windows b/randomforest-matlab/RF_Class_C/Makefile.windows new file mode 100644 index 0000000..5797dfd --- /dev/null +++ b/randomforest-matlab/RF_Class_C/Makefile.windows @@ -0,0 +1,72 @@ +# Makefile to compile mex/standalone version to Andy Liaw et al.'s C code (used in R package randomForest) +# Added by Abhishek Jaiantilal ( abhishek.jaiantilal@colorado.edu ) +# License: GPLv2 +# Version: 0.02 + + +# Makefile to generate mex or standalone. will work in cygwin (for windows) or linux +# +# make mex: generates matlab mex files which can be easily called up +# make diabetes: generates a standalone file to test on the pima indian +# diabetes dataset. +# + + +#source directory +SRC=src/ + +#temporary .o output directory +BUILD=tempbuild/ + +CC=g++ +FORTRAN=g77#gfortran +CFLAGS= -fpic -O2 -funroll-loops -msse3#-g -Wall +FFLAGS=-O2 -fpic #-g +LDFORTRAN=#-gfortran +MEXFLAGS=-g +all: clean classTree cokus rfsub rfutils classRF twonorm mex +#all: regTree regrf rf rfsub rfutils classTree shared mex-setup + +mex: clean classTree cokus rfsub rfutils mex_classRF + +twonorm: clean cokus classTree rfsub rfutils + echo 'Generating twonorm executable' + $(CC) $(CFLAGS) -c $(SRC)classRF.cpp -o $(BUILD)classRF.o + $(CC) $(CFLAGS) $(SRC)twonorm_C_wrapper.cpp $(SRC)classRF.cpp $(BUILD)classTree.o $(BUILD)rfutils.o rfsub.o $(BUILD)cokus.o -o twonorm_test -lm + +mex_classRF: $(SRC)classRF.cpp $(SRC)mex_ClassificationRF_train.cpp $(SRC)mex_ClassificationRF_predict.cpp + echo 'Generating Mex' + mex -c $(SRC)classRF.cpp -o $(BUILD)classRF.o -DMATLAB $(MEXFLAGS) + mex $(SRC)mex_ClassificationRF_train.cpp $(BUILD)classRF.o $(BUILD)classTree.o $(BUILD)rfutils.o rfsub.o $(BUILD)cokus.o -o mexClassRF_train -lm -DMATLAB $(MEXFLAGS) + mex $(SRC)mex_ClassificationRF_predict.cpp $(BUILD)classRF.o $(BUILD)classTree.o $(BUILD)rfutils.o rfsub.o $(BUILD)cokus.o -o mexClassRF_predict -lm -DMATLAB $(MEXFLAGS) + +cokus: $(SRC)cokus.cpp + echo 'Compiling Cokus (random number generator)' + $(CC) $(CFLAGS) -c $(SRC)cokus.cpp -o $(BUILD)cokus.o + +classRF: $(SRC)classRF.cpp + $(CC) $(CFLAGS) -c $(SRC)classRF.cpp -o $(BUILD)classRF.o +# $(CC) $(CFLAGS) classRF.o classTree.o rfutils.o rfsub.o cokus.o -o classRF $(LDFORTRAN) + +classTree: $(SRC)classTree.cpp + echo 'Compiling classTree.cpp' + $(CC) $(CFLAGS) -c $(SRC)classTree.cpp -o $(BUILD)classTree.o + + +rfsub: $(SRC)rfsub.f + echo 'Compiling rfsub.f (fortran subroutines)' + $(FORTRAN) $(FFLAGS) -c $(SRC)rfsub.f -o rfsub.o + +rfutils: $(SRC)rfutils.cpp + echo 'Compiling rfutils.cpp' + $(CC) $(CFLAGS) -c $(SRC)rfutils.cpp -o $(BUILD)rfutils.o + + +clean: + rm twonorm_test -rf + rm *.o $(BUILD)*.o -rf + rm *~ -rf + rm *.mexw32 twonorm_test -rf + rm *.mexa64 -rf + rm classRF -rf + rm *.exe -rf \ No newline at end of file diff --git a/randomforest-matlab/RF_Class_C/README.txt b/randomforest-matlab/RF_Class_C/README.txt new file mode 100644 index 0000000..480135b --- /dev/null +++ b/randomforest-matlab/RF_Class_C/README.txt @@ -0,0 +1,80 @@ +mex/standalone interface to Andy Liaw et al.'s C code (used in R package randomForest) +Added by Abhishek Jaiantilal ( abhishek.jaiantilal@colorado.edu ) +License: GPLv2 +Version: 0.02 + +Added Binaries for Windows 32/64 bit +Commented out compile_windows.m, if you feel upto it, remove the comments and recompile + +CLASSIFICATION BASED RANDOMFOREST + + +****A tutorial for matlab now in tutorial_ClassRF.m**** + + +Ways to generate Mex's and Standalone files + +rfsub.o is compiled using fortran from rfsub.f. In case cywin or a fortran +compiler is not present just copy the appropriate (depending on OS) +rfsub.o from precompiled_rfsub directory to the current directory + + +___STANDALONE____ (not exactly standalone but an interface via C) +An example for a C file using the twonorm dataset for classification +is shown in src/twonorm_C_wrapper.cpp + +This is a standalone version that needs to set right parameters in CPP file. + +Compiling in windows: +Method 1: use cygwin and make: go to current directory and run 'make twonorm -f Makefile.windows' +in cygwin command prompt. Need to have gcc/g++ and g77 (in cygwin) + installed. Also the custom makefile differs from the linux version which has -lgfortran +whereas the windows version doesn't. Will generate twonorm_test.exe + +Method 2: use DevC++ (download from http://www.bloodshed.net/devcpp.html ). +Open the twonorm_C_devc.dev file which is a project file which has the sources +etc set. Just compile and run. Will generate twonorm_C_devcpp.exe + +Compiling in linux: +Method 1: use linux and make: go to this directory and run 'make diabetes' +in command prompt. Need to have gcc/g++ and fortran installed. Will generate diabetes_test. +run as ./diabetes_test + + +___MATLAB___ +generates Mex files that can be called in Matlab directly. + +Compiling in windows: +Use the compile_windows.m and run in windows. It will compile and generate +appropriate mex files. Need Visual C++ or some other compiler +(VC++ express edition also works). Won't work with Matlab's inbuilt compiler (lcc) + + +Compiling in linux: +Use the compile_linux.m and run in windows. It will compile and generate +appropriate mex files. + +Using the Mex interface: +There are 2 functions classRF_train and classRF_predict as given below. +See the sample file test_ClassRF_extensively.m + + +%function Y_hat = classRF_predict(X,model) + %requires 2 arguments + %X: data matrix + %model: generated via classRF_train function + +%function model = classRF_train(X,Y,ntree,mtry, extra_options) + %requires 2 arguments and the rest 2 are optional + %X: data matrix + %Y: target values + %ntree (optional): number of trees (default is 500) + %mtry (default is max(floor(D/3),1) D=number of features in X) + %there are about 14 odd options for extra_options. Refer to tutorial_ClassRF.m to examine them + +Version History: + v0.02 (May-15-09):Updated so that classification package now has about 95% of the total options + that the R-package gives. Woohoo. Tracing of what happening behind screen works better. + v0.01 (Mar-22-09): very basic interface for mex/standalone to Liaw et al's + randomForest Package supports only ntree and mtry changing for time being. + \ No newline at end of file diff --git a/randomforest-matlab/RF_Class_C/Version_History.txt b/randomforest-matlab/RF_Class_C/Version_History.txt new file mode 100644 index 0000000..e04d357 --- /dev/null +++ b/randomforest-matlab/RF_Class_C/Version_History.txt @@ -0,0 +1,34 @@ +CHANGES + +Added Binaries for Windows 32/64 bit +Commented out compile_windows.m, if you feel upto it, remove the comments and recompile + +svn-v8? +Added almost 95% of the total options provided by the R-package to classification. +Added tutorial for classification based RF in tutorial_ClassRF.m +Moving now to version 0.02 + +svn-v4 + +Added a `cruft' conditional compile for win64 (-DWIN64) target which involves exporting (extern) +fortran and c function names with another _ at the start (there was one at the end already) + +CROSS-compiling target for win64 shown in the makefile target for rfsub + +Reasons for crosscompiling lies with cygwin not supporting generation of 32 bit binaries. + +right now the rfsub.o is directly taken from the precompiled_rfsub directory for +windows systems, that is the compiled_windows.m directly uses the precompiled +rfsub.o to generate based on the current windows version (tested on winxp 64 and 32). +Its hard to set up the required software (gfortran/g77) on cygwin +(which also are available only to generate 32 bit binaries). + +for windows based rfsub.o. crosscompiler from mingw64 was used on linux from +http://sourceforge.net/project/showfiles.php?group_id=202880&package_id=245516&release_id=546049 + +for linux, its simpler to set up gfortran and gcc so will depend on recompiling +everytime mex is recompiled. Checked on 32 and 64 bit linux. + + +svn-v2 +initial commit - mapped to v0.01preview version \ No newline at end of file diff --git a/randomforest-matlab/RF_Class_C/classRF_predict.m b/randomforest-matlab/RF_Class_C/classRF_predict.m new file mode 100644 index 0000000..d45c259 --- /dev/null +++ b/randomforest-matlab/RF_Class_C/classRF_predict.m @@ -0,0 +1,62 @@ +%************************************************************** +%* mex interface to Andy Liaw et al.'s C code (used in R package randomForest) +%* Added by Abhishek Jaiantilal ( abhishek.jaiantilal@colorado.edu ) +%* License: GPLv2 +%* Version: 0.02 +% +% Calls Classification Random Forest +% A wrapper matlab file that calls the mex file +% This does prediction given the data and the model file +% Options depicted in predict function in http://cran.r-project.org/web/packages/randomForest/randomForest.pdf +%************************************************************** +%function [Y_hat votes] = classRF_predict(X,model, extra_options) +% requires 2 arguments +% X: data matrix +% model: generated via classRF_train function +% extra_options.predict_all = predict_all if set will send all the prediction. +% +% +% Returns +% Y_hat - prediction for the data +% votes - unnormalized weights for the model +% prediction_per_tree - per tree prediction. the returned object . +% If predict.all=TRUE, then the individual component of the returned object is a character +% matrix where each column contains the predicted class by a tree in the forest. +% +% +% Not yet implemented +% proximity + +function [Y_new, votes, prediction_per_tree] = classRF_predict(X,model, extra_options) + + if nargin<2 + error('need atleast 2 parameters,X matrix and model'); + end + + if exist('extra_options','var') + if isfield(extra_options,'predict_all') + predict_all = extra_options.predict_all; + end + end + + if ~exist('predict_all','var'); predict_all=0;end + + + + [Y_hat,prediction_per_tree,votes] = mexClassRF_predict(X',model.nrnodes,model.ntree,model.xbestsplit,model.classwt,model.cutoff,model.treemap,model.nodestatus,model.nodeclass,model.bestvar,model.ndbigtree,model.nclass, predict_all); + %keyboard + votes = votes'; + + clear mexClassRF_predict + + Y_new = double(Y_hat); + new_labels = model.new_labels; + orig_labels = model.orig_labels; + + for i=1:length(orig_labels) + Y_new(find(Y_hat==new_labels(i)))=Inf; + Y_new(isinf(Y_new))=orig_labels(i); + end + + 1; + \ No newline at end of file diff --git a/randomforest-matlab/RF_Class_C/classRF_train.m b/randomforest-matlab/RF_Class_C/classRF_train.m new file mode 100644 index 0000000..a2e5218 --- /dev/null +++ b/randomforest-matlab/RF_Class_C/classRF_train.m @@ -0,0 +1,383 @@ +%************************************************************** +%* mex interface to Andy Liaw et al.'s C code (used in R package randomForest) +%* Added by Abhishek Jaiantilal ( abhishek.jaiantilal@colorado.edu ) +%* License: GPLv2 +%* Version: 0.02 +% +% Calls Classification Random Forest +% A wrapper matlab file that calls the mex file +% This does training given the data and labels +% Documentation copied from R-packages pdf +% http://cran.r-project.org/web/packages/randomForest/randomForest.pdf +% Tutorial on getting this working in tutorial_ClassRF.m +%************************************************************** +% function model = classRF_train(X,Y,ntree,mtry, extra_options) +% +%___Options +% requires 2 arguments and the rest 3 are optional +% X: data matrix +% Y: target values +% ntree (optional): number of trees (default is 500). also if set to 0 +% will default to 500 +% mtry (default is floor(sqrt(size(X,2))) D=number of features in X). also if set to 0 +% will default to 500 +% +% +% Note: TRUE = 1 and FALSE = 0 below +% extra_options represent a structure containing various misc. options to +% control the RF +% extra_options.replace = 0 or 1 (default is 1) sampling with or without +% replacement +% extra_options.classwt = priors of classes. Here the function first gets +% the labels in ascending order and assumes the +% priors are given in the same order. So if the class +% labels are [-1 1 2] and classwt is [0.1 2 3] then +% there is a 1-1 correspondence. (ascending order of +% class labels). Once this is set the freq of labels in +% train data also affects. +% extra_options.cutoff (Classification only) = A vector of length equal to number of classes. The ?winning? +% class for an observation is the one with the maximum ratio of proportion +% of votes to cutoff. Default is 1/k where k is the number of classes (i.e., majority +% vote wins). +% extra_options.strata = (not yet stable in code) variable that is used for stratified +% sampling. I don't yet know how this works. Disabled +% by default +% extra_options.sampsize = Size(s) of sample to draw. For classification, +% if sampsize is a vector of the length the number of strata, then sampling is stratified by strata, +% and the elements of sampsize indicate the numbers to be +% drawn from the strata. +% extra_options.nodesize = Minimum size of terminal nodes. Setting this number larger causes smaller trees +% to be grown (and thus take less time). Note that the default values are different +% for classification (1) and regression (5). +% extra_options.importance = Should importance of predictors be assessed? +% extra_options.localImp = Should casewise importance measure be computed? (Setting this to TRUE will +% override importance.) +% extra_options.proximity = Should proximity measure among the rows be calculated? +% extra_options.oob_prox = Should proximity be calculated only on 'out-of-bag' data? +% extra_options.do_trace = If set to TRUE, give a more verbose output as randomForest is run. If set to +% some integer, then running output is printed for every +% do_trace trees. +% extra_options.keep_inbag Should an n by ntree matrix be returned that keeps track of which samples are +% 'in-bag' in which trees (but not how many times, if sampling with replacement) +% +% Options eliminated +% corr_bias which happens only for regression ommitted +% norm_votes - always set to return total votes for each class. +% +%___Returns model which has +% importance = a matrix with nclass + 2 (for classification) or two (for regression) columns. +% For classification, the first nclass columns are the class-specific measures +% computed as mean decrease in accuracy. The nclass + 1st column is the +% mean decrease in accuracy over all classes. The last column is the mean decrease +% in Gini index. For Regression, the first column is the mean decrease in +% accuracy and the second the mean decrease in MSE. If importance=FALSE, +% the last measure is still returned as a vector. +% importanceSD = The ?standard errors? of the permutation-based importance measure. For classification, +% a p by nclass + 1 matrix corresponding to the first nclass + 1 +% columns of the importance matrix. For regression, a length p vector. +% localImp = a p by n matrix containing the casewise importance measures, the [i,j] element +% of which is the importance of i-th variable on the j-th case. NULL if +% localImp=FALSE. +% ntree = number of trees grown. +% mtry = number of predictors sampled for spliting at each node. +% votes (classification only) a matrix with one row for each input data point and one +% column for each class, giving the fraction or number of ?votes? from the random +% forest. +% oob_times number of times cases are 'out-of-bag' (and thus used in computing OOB error +% estimate) +% proximity if proximity=TRUE when randomForest is called, a matrix of proximity +% measures among the input (based on the frequency that pairs of data points are +% in the same terminal nodes). +% errtr = first column is OOB Err rate, second is for class 1 and so on + +function model=classRF_train(X,Y,ntree,mtry, extra_options) + DEFAULTS_ON =0; + %DEBUG_ON=0; + + TRUE=1; + FALSE=0; + + orig_labels = sort(unique(Y)); + Y_new = Y; + new_labels = 1:length(orig_labels); + + for i=1:length(orig_labels) + Y_new(find(Y==orig_labels(i)))=Inf; + Y_new(isinf(Y_new))=new_labels(i); + end + + Y = Y_new; + + if exist('extra_options','var') + if isfield(extra_options,'DEBUG_ON'); DEBUG_ON = extra_options.DEBUG_ON; end + if isfield(extra_options,'replace'); replace = extra_options.replace; end + if isfield(extra_options,'classwt'); classwt = extra_options.classwt; end + if isfield(extra_options,'cutoff'); cutoff = extra_options.cutoff; end + if isfield(extra_options,'strata'); strata = extra_options.strata; end + if isfield(extra_options,'sampsize'); sampsize = extra_options.sampsize; end + if isfield(extra_options,'nodesize'); nodesize = extra_options.nodesize; end + if isfield(extra_options,'importance'); importance = extra_options.importance; end + if isfield(extra_options,'localImp'); localImp = extra_options.localImp; end + if isfield(extra_options,'nPerm'); nPerm = extra_options.nPerm; end + if isfield(extra_options,'proximity'); proximity = extra_options.proximity; end + if isfield(extra_options,'oob_prox'); oob_prox = extra_options.oob_prox; end + %if isfield(extra_options,'norm_votes'); norm_votes = extra_options.norm_votes; end + if isfield(extra_options,'do_trace'); do_trace = extra_options.do_trace; end + %if isfield(extra_options,'corr_bias'); corr_bias = extra_options.corr_bias; end + if isfield(extra_options,'keep_inbag'); keep_inbag = extra_options.keep_inbag; end + end + keep_forest=1; %always save the trees :) + + %set defaults if not already set + if ~exist('DEBUG_ON','var') DEBUG_ON=FALSE; end + if ~exist('replace','var'); replace = TRUE; end + %if ~exist('classwt','var'); classwt = []; end %will handle these three later + %if ~exist('cutoff','var'); cutoff = 1; end + %if ~exist('strata','var'); strata = 1; end + if ~exist('sampsize','var'); + if (replace) + sampsize = size(X,1); + else + sampsize = ceil(0.632*size(X,1)); + end; + end + if ~exist('nodesize','var'); nodesize = 1; end %classification=1, regression=5 + if ~exist('importance','var'); importance = FALSE; end + if ~exist('localImp','var'); localImp = FALSE; end + if ~exist('nPerm','var'); nPerm = 1; end + %if ~exist('proximity','var'); proximity = 1; end %will handle these two later + %if ~exist('oob_prox','var'); oob_prox = 1; end + %if ~exist('norm_votes','var'); norm_votes = TRUE; end + if ~exist('do_trace','var'); do_trace = FALSE; end + %if ~exist('corr_bias','var'); corr_bias = FALSE; end + if ~exist('keep_inbag','var'); keep_inbag = FALSE; end + + + if ~exist('ntree','var') | ntree<=0 + ntree=500; + DEFAULTS_ON=1; + end + if ~exist('mtry','var') | mtry<=0 | mtry>size(X,2) + mtry =floor(sqrt(size(X,2))); + end + + addclass =isempty(Y); + + if (~addclass && length(unique(Y))<2) + error('need atleast two classes for classification'); + end + [N D] = size(X); + + if N==0; error(' data (X) has 0 rows');end + + if (mtry <1 || mtry > D) + DEFAULTS_ON=1; + end + + mtry = max(1,min(D,round(mtry))); + + if DEFAULTS_ON + fprintf('\tSetting to defaults %d trees and mtry=%d\n',ntree,mtry); + end + + if ~isempty(Y) + if length(Y)~=N, + error('Y size is not the same as X size'); + end + addclass = FALSE; + else + if ~addclass, + addclass=TRUE; + end + error('have to fill stuff here') + end + + if ~isempty(find(isnan(X))); error('NaNs in X'); end + if ~isempty(find(isnan(Y))); error('NaNs in Y'); end + + %now handle categories. Problem is that categories in R are more + %enhanced. In this i ask the user to specify the column/features to + %consider as categories, 1 if all the values are real values else + %specify the number of categories here + if exist ('extra_options','var') && isfield(extra_options,'categories') + ncat = extra_options.categories; + else + ncat = ones(1,D); + end + + maxcat = max(ncat); + if maxcat>32 + error('Can not handle categorical predictors with more than 32 categories'); + end + + %classRF - line 88 in randomForest.default.R + nclass = length(unique(Y)); + if ~exist('cutoff','var') + cutoff = ones(1,nclass)* (1/nclass); + else + if sum(cutoff)>1 || sum(cutoff)<0 || length(find(cutoff<=0))>0 || length(cutoff)~=nclass + error('Incorrect cutoff specified'); + end + end + if ~exist('classwt','var') + classwt = ones(1,nclass); + ipi=0; + else + if length(classwt)~=nclass + error('Length of classwt not equal to the number of classes') + end + if ~isempty(find(classwt<=0)) + error('classwt must be positive'); + end + ipi=1; + end + + if ~exist('proximity','var') + proximity = addclass; + oob_prox = proximity; + end + + if ~exist('oob_prox','var') + oob_prox = proximity; + end + + %i handle the below in the mex file +% if proximity +% prox = zeros(N,N); +% proxts = 1; +% else +% prox = 1; +% proxts = 1; +% end + + %i handle the below in the mex file + if localImp + importance = TRUE; +% impmat = zeors(D,N); + else +% impmat = 1; + end + + if importance + if (nPerm<1) + nPerm = int32(1); + else + nPerm = int32(nPerm); + end + + %classRF +% impout = zeros(D,nclass+2); +% impSD = zeros(D,nclass+1); + else +% impout = zeros(D,1); +% impSD = 1; + end + + %i handle the below in the mex file + %somewhere near line 157 in randomForest.default.R + if addclass +% nsample = 2*n; + else +% nsample = n; + end + + Stratify = (length(sampsize)>1); + if (~Stratify && sampsize>N) + error('Sampsize too large') + end + + if Stratify + if ~exist('strata','var') + strata = Y; + end + nsum = sum(sampsize); + if ( ~isempty(find(sampsize<=0)) || nsum==0) + error('Bad sampsize specification'); + end + else + nsum = sampsize; + end + %i handle the below in the mex file + %nrnodes = 2*floor(nsum/nodesize)+1; + %xtest = 1; + %ytest = 1; + %ntest = 1; + %labelts = FALSE; + %nt = ntree; + + + + + %[ldau,rdau,nodestatus,nrnodes,upper,avnode,mbest,ndtree]= + %keyboard + + + + if Stratify + strata = int32(strata); + else + strata = int32(1); + end + + Options = int32([addclass, importance, localImp, proximity, oob_prox, do_trace, keep_forest, replace, Stratify, keep_inbag]); + + + if DEBUG_ON + %print the parameters that i am sending in + fprintf('size(x) %d\n',size(X)); + fprintf('size(y) %d\n',size(Y)); + fprintf('nclass %d\n',nclass); + fprintf('size(ncat) %d\n',size(ncat)); + fprintf('maxcat %d\n',maxcat); + fprintf('size(sampsize) %d\n',size(sampsize)); + fprintf('sampsize[0] %d\n',sampsize(1)); + fprintf('Stratify %d\n',Stratify); + fprintf('Proximity %d\n',proximity); + fprintf('oob_prox %d\n',oob_prox); + fprintf('strata %d\n',strata); + fprintf('ntree %d\n',ntree); + fprintf('mtry %d\n',mtry); + fprintf('ipi %d\n',ipi); + fprintf('classwt %f\n',classwt); + fprintf('cutoff %f\n',cutoff); + fprintf('nodesize %f\n',nodesize); + end + + + [nrnodes,ntree,xbestsplit,classwt,cutoff,treemap,nodestatus,nodeclass,bestvar,ndbigtree,mtry ... + outcl, counttr, prox, impmat, impout, impSD, errtr, inbag] ... + = mexClassRF_train(X',int32(Y_new),length(unique(Y)),ntree,mtry,int32(ncat), ... + int32(maxcat), int32(sampsize), strata, Options, int32(ipi), ... + classwt, cutoff, int32(nodesize),int32(nsum)); + model.nrnodes=nrnodes; + model.ntree=ntree; + model.xbestsplit=xbestsplit; + model.classwt=classwt; + model.cutoff=cutoff; + model.treemap=treemap; + model.nodestatus=nodestatus; + model.nodeclass=nodeclass; + model.bestvar = bestvar; + model.ndbigtree = ndbigtree; + model.mtry = mtry; + model.orig_labels=orig_labels; + model.new_labels=new_labels; + model.nclass = length(unique(Y)); + model.outcl = outcl; + model.counttr = counttr; + if proximity + model.proximity = prox; + else + model.proximity = []; + end + model.localImp = impmat; + model.importance = impout; + model.importanceSD = impSD; + model.errtr = errtr'; + model.inbag = inbag; + model.votes = counttr'; + model.oob_times = sum(counttr)'; + clear mexClassRF_train + %keyboard + 1; + diff --git a/randomforest-matlab/RF_Class_C/compile_linux.m b/randomforest-matlab/RF_Class_C/compile_linux.m new file mode 100644 index 0000000..702b947 --- /dev/null +++ b/randomforest-matlab/RF_Class_C/compile_linux.m @@ -0,0 +1,16 @@ +% ******************************************************************** +% * mex File compiling code for Random Forest (for linux) +% * mex interface to Andy Liaw et al.'s C code (used in R package randomForest) +% * Added by Abhishek Jaiantilal ( abhishek.jaiantilal@colorado.edu ) +% * License: GPLv2 +% * Version: 0.02 +% ********************************************************************/ +function compile_linux + + system('rm *.mexglx *.mexa64;'); + + system('make clean;make mex;'); + + %the fortran code makes it hard to NOT use the Makefile + + diff --git a/randomforest-matlab/RF_Class_C/compile_windows.m b/randomforest-matlab/RF_Class_C/compile_windows.m new file mode 100644 index 0000000..b72f527 --- /dev/null +++ b/randomforest-matlab/RF_Class_C/compile_windows.m @@ -0,0 +1,25 @@ +% ******************************************************************** +% * mex File compiling code for Random Forest (for linux) +% * mex interface to Andy Liaw et al.'s C code (used in R package randomForest) +% * Added by Abhishek Jaiantilal ( abhishek.jaiantilal@colorado.edu ) +% * License: GPLv2 +% * Version: 0.02 +% ********************************************************************/ + +function compile_windows + % system('del *.mexw32;del *.mexw64;'); + + fprintf('I am going to use the precompiled fortran file\n'); + fprintf('If it doesnt work then use cygwin+g77 (or gfortran) to recompile rfsub.f\n'); + + if strcmp(computer,'PCWIN64') + % mex -DMATLAB -DWIN64 -output mexClassRF_train src/classRF.cpp src/classTree2.cpp src/cokus.cpp precompiled_rfsub/win64/rfsub.o src/mex_ClassificationRF_train.cpp src/rfutils.cpp + mex -DMATLAB -DWIN64 -output mexClassRF_predict src/classRF.cpp src/classTree.cpp src/cokus.cpp precompiled_rfsub/win64/rfsub.o src/mex_ClassificationRF_predict.cpp src/rfutils.cpp + elseif strcmp(computer,'PCWIN') + mex -DMATLAB -output mexClassRF_train src/classRF.cpp src/classTree.cpp src/cokus.cpp precompiled_rfsub/win32/rfsub.o src/mex_ClassificationRF_train.cpp src/rfutils.cpp + mex -DMATLAB -output mexClassRF_predict src/classRF.cpp src/classTree.cpp src/cokus.cpp precompiled_rfsub/win32/rfsub.o src/mex_ClassificationRF_predict.cpp src/rfutils.cpp + else + error('Wrong script to run on this Comp architecture. I cannot detect any windows system') + end + fprintf('Mex`s compiled correctly\n') + fprintf('As this package has precompiled mex files ignore this file i.e. compile_windows.m\n') \ No newline at end of file diff --git a/randomforest-matlab/RF_Class_C/data/X_twonorm.txt b/randomforest-matlab/RF_Class_C/data/X_twonorm.txt new file mode 100644 index 0000000..8776262 --- /dev/null +++ b/randomforest-matlab/RF_Class_C/data/X_twonorm.txt @@ -0,0 +1,300 @@ + 7.3950000e-01 7.4720000e-01 -2.9880000e-01 7.8790000e-01 1.0296000e+00 7.1490000e-01 4.9380000e-01 1.0399000e+00 5.4860000e-01 2.1075000e+00 1.0512000e+00 2.7520000e-01 2.1920000e-01 4.0900000e-01 5.7230000e-01 -1.1091000e+00 -1.2670000e-01 7.5170000e-01 1.6644000e+00 2.3640000e-01 + -2.8740000e-01 1.6702000e+00 -2.7310000e-01 2.7243000e+00 6.9140000e-01 1.7339000e+00 1.4485000e+00 1.0482000e+00 3.4500000e-01 2.2350000e-01 6.7620000e-01 1.9467000e+00 -1.8823000e+00 -2.7600000e-02 1.1070000e-01 1.7459000e+00 1.8887000e+00 -1.3930000e-01 1.9630000e-01 -8.3300000e-02 + 1.6035000e+00 -4.0100000e-02 1.8280000e-01 1.4500000e-01 5.0210000e-01 2.0701000e+00 -7.0980000e-01 3.3960000e-01 6.1800000e-02 1.5882000e+00 6.2840000e-01 8.5750000e-01 2.8528000e+00 1.0051000e+00 5.3910000e-01 2.5880000e-01 -8.0950000e-01 -5.0400000e-02 6.4830000e-01 -1.1413000e+00 + 1.4777000e+00 -3.0000000e-04 1.3220000e-01 2.6400000e-01 1.7445000e+00 -5.4250000e-01 4.0940000e-01 2.0146000e+00 3.9710000e-01 6.9600000e-01 1.0170000e-01 6.5170000e-01 -4.7860000e-01 5.4450000e-01 -5.4360000e-01 5.3350000e-01 8.0030000e-01 1.2900000e+00 -4.1000000e-03 3.4500000e-01 + -2.7536000e+00 -1.5990000e+00 1.3153000e+00 1.2690000e+00 6.0170000e-01 -3.7610000e-01 2.2910000e-01 -9.5700000e-02 -3.3340000e-01 -5.7030000e-01 -1.3716000e+00 -1.9717000e+00 -4.0440000e-01 -5.6110000e-01 -1.0035000e+00 -1.2461000e+00 -3.7000000e-01 4.3400000e-02 -3.1030000e+00 9.0510000e-01 + 1.7786000e+00 1.3037000e+00 -4.3320000e-01 1.3189000e+00 1.3530000e+00 4.6030000e-01 1.7060000e-01 7.9720000e-01 1.7948000e+00 5.7360000e-01 1.1022000e+00 1.7870000e+00 7.6890000e-01 -5.1950000e-01 -1.9103000e+00 -4.2710000e-01 4.6080000e-01 3.2266000e+00 1.6442000e+00 8.3390000e-01 + 2.7935000e+00 2.9900000e-02 -1.0752000e+00 -1.2082000e+00 -5.7630000e-01 1.4091000e+00 -9.3790000e-01 -1.0959000e+00 6.8800000e-02 1.9812000e+00 8.0010000e-01 -4.8280000e-01 -8.5320000e-01 -6.7700000e-01 3.3080000e-01 -1.4210000e+00 -7.6230000e-01 -4.1820000e-01 -2.1160000e-01 -1.8874000e+00 + 1.1509000e+00 -6.4440000e-01 -8.6760000e-01 -2.9340000e-01 1.6661000e+00 4.8080000e-01 -1.2502000e+00 5.7000000e-01 2.0056000e+00 -1.4690000e-01 -5.6170000e-01 8.1840000e-01 8.5780000e-01 1.2562000e+00 -8.8530000e-01 4.4680000e-01 9.3350000e-01 -1.2960000e-01 8.8250000e-01 -4.0750000e-01 + 2.3710000e+00 1.1185000e+00 -3.6880000e-01 5.6630000e-01 2.8460000e-01 1.1395000e+00 3.1910000e-01 1.2060000e-01 -5.2430000e-01 1.6213000e+00 1.9758000e+00 6.3810000e-01 7.9060000e-01 2.6460000e-01 2.2194000e+00 -1.1126000e+00 -1.9820000e-01 -1.1150000e+00 7.5070000e-01 1.2630000e+00 + -2.7440000e-01 -2.7129000e+00 -1.5521000e+00 2.0220000e-01 -3.9840000e-01 4.9790000e-01 -1.5212000e+00 -7.5810000e-01 -4.9290000e-01 4.2800000e-01 -1.9410000e-01 1.9112000e+00 1.0760000e-01 -9.1540000e-01 7.7510000e-01 1.0320000e-01 -1.3300000e+00 -1.3605000e+00 -3.2870000e-01 2.8909000e+00 + -1.2789000e+00 4.4170000e-01 1.4872000e+00 -2.5710000e-01 7.1140000e-01 -1.3900000e-02 -1.6420000e+00 1.9683000e+00 -1.2510000e+00 4.4480000e-01 7.6800000e-01 -8.6400000e-02 1.0156000e+00 -1.1979000e+00 -9.3200000e-01 -9.6600000e-01 1.1464000e+00 3.6520000e-01 -1.4753000e+00 -2.1833000e+00 + -5.5850000e-01 4.8890000e-01 7.8880000e-01 7.9290000e-01 -3.3010000e-01 -3.4950000e-01 1.6456000e+00 1.3019000e+00 -3.1000000e-01 1.5200000e-02 8.3370000e-01 1.7834000e+00 1.7487000e+00 6.8200000e-01 1.5116000e+00 -6.7810000e-01 3.2690000e-01 2.0194000e+00 1.7243000e+00 5.1500000e-01 + 1.1291000e+00 5.5820000e-01 -6.6580000e-01 -1.8270000e-01 -9.8850000e-01 -2.4866000e+00 -1.4295000e+00 2.1770000e-01 2.1041000e+00 9.9710000e-01 9.3660000e-01 6.9080000e-01 3.0450000e-01 1.2469000e+00 -1.1473000e+00 1.2010000e-01 1.1730000e-01 -1.8029000e+00 4.6210000e-01 -1.5028000e+00 + -6.3760000e-01 -7.8000000e-01 -7.8820000e-01 -3.4500000e-02 -1.1657000e+00 5.1050000e-01 -1.4798000e+00 1.1600000e-01 7.2940000e-01 -3.5300000e-02 3.0980000e-01 -7.0520000e-01 2.0400000e-01 9.1090000e-01 -8.3830000e-01 -2.7400000e-01 -1.0852000e+00 1.1379000e+00 1.0032000e+00 -3.0870000e-01 + 5.1550000e-01 -1.4400000e-01 -1.9900000e-02 -7.7400000e-01 -9.9320000e-01 -1.1674000e+00 6.0880000e-01 5.8660000e-01 -9.2030000e-01 -1.7367000e+00 -2.3130000e-01 7.4180000e-01 -4.0320000e-01 -2.8149000e+00 2.5970000e-01 6.2110000e-01 -1.1289000e+00 -1.3431000e+00 -2.8872000e+00 -3.7210000e-01 + 1.0090000e+00 5.1080000e-01 8.8530000e-01 -1.6136000e+00 1.1112000e+00 -2.1600000e-01 -7.3560000e-01 -5.7000000e-01 4.4860000e-01 -1.3582000e+00 -1.4607000e+00 -1.7266000e+00 -8.1900000e-02 5.3380000e-01 -5.0930000e-01 -1.3210000e+00 8.9610000e-01 -1.0631000e+00 7.1000000e-01 -6.1960000e-01 + -1.0960000e-01 3.7610000e-01 7.5570000e-01 9.9100000e-02 -4.4300000e-02 -8.3730000e-01 3.4470000e-01 -9.1780000e-01 9.3770000e-01 1.0242000e+00 6.2510000e-01 1.5680000e-01 -8.2690000e-01 -1.7243000e+00 2.2440000e-01 -8.7550000e-01 -8.8670000e-01 -2.1382000e+00 -2.0255000e+00 1.1363000e+00 + -8.8990000e-01 1.1888000e+00 4.7550000e-01 1.9046000e+00 -2.0000000e-01 1.6689000e+00 8.1390000e-01 1.3831000e+00 2.2062000e+00 1.3921000e+00 2.1240000e-01 1.3413000e+00 2.6990000e+00 7.3360000e-01 -1.6344000e+00 -2.4750000e-01 -6.6910000e-01 -1.5336000e+00 1.6536000e+00 1.1554000e+00 + -9.7220000e-01 -1.0470000e-01 -2.2760000e-01 4.4040000e-01 5.7830000e-01 6.8300000e-02 -1.5660000e+00 -9.5170000e-01 -1.6846000e+00 1.5180000e-01 -5.7630000e-01 -3.4310000e-01 2.0670000e-01 8.6790000e-01 -1.2441000e+00 -1.6745000e+00 1.2700000e-02 3.9800000e-02 1.4254000e+00 -1.7934000e+00 + -2.7950000e-01 1.2516000e+00 -1.2764000e+00 9.0740000e-01 1.8345000e+00 -7.4560000e-01 1.7020000e-01 1.9179000e+00 7.9960000e-01 4.4260000e-01 2.3615000e+00 1.0979000e+00 3.1042000e+00 2.4560000e-01 1.1890000e-01 -1.3371000e+00 -3.8420000e-01 8.7720000e-01 -2.0100000e-02 2.0196000e+00 + -1.0651000e+00 -2.1043000e+00 -1.3847000e+00 -9.6830000e-01 -2.7110000e-01 3.6080000e-01 -9.0900000e-01 -1.1286000e+00 -8.8740000e-01 1.0500000e-02 8.2120000e-01 -2.0420000e+00 -8.7810000e-01 -6.3730000e-01 -8.6840000e-01 -5.8990000e-01 -1.4218000e+00 1.0005000e+00 -4.3950000e-01 1.4490000e-01 + 6.2110000e-01 2.0747000e+00 -1.1010000e-01 4.3630000e-01 -5.8570000e-01 1.5724000e+00 1.4076000e+00 -2.1000000e-02 1.1878000e+00 8.1880000e-01 1.3670000e-01 1.1558000e+00 1.2580000e+00 9.6260000e-01 1.7789000e+00 -1.8864000e+00 1.7505000e+00 8.6020000e-01 1.0604000e+00 -3.3470000e-01 + -8.9910000e-01 8.2560000e-01 -4.8740000e-01 -1.9460000e-01 1.0143000e+00 3.1110000e-01 -1.0736000e+00 4.0000000e-02 -5.8380000e-01 2.1340000e-01 7.9590000e-01 -6.9210000e-01 -1.3844000e+00 -6.3310000e-01 -7.2040000e-01 -1.3000000e-02 -3.9130000e-01 1.2390000e-01 -1.0166000e+00 8.0340000e-01 + -2.3140000e-01 2.2658000e+00 1.7833000e+00 1.1388000e+00 2.9380000e-01 -1.3775000e+00 1.2770000e-01 -7.2810000e-01 1.4158000e+00 5.2560000e-01 9.2810000e-01 3.8160000e-01 1.2131000e+00 -3.5300000e-01 9.9480000e-01 -5.6000000e-01 -7.1700000e-02 7.2880000e-01 7.8710000e-01 -9.2380000e-01 + 9.5550000e-01 3.1486000e+00 1.2565000e+00 -2.7800000e-02 -1.2444000e+00 -6.3420000e-01 2.3546000e+00 7.7200000e-01 2.3130000e-01 5.1970000e-01 -4.8610000e-01 1.6818000e+00 8.6070000e-01 2.5670000e-01 2.7254000e+00 -1.2651000e+00 2.6070000e-01 1.1726000e+00 9.1600000e-01 1.9370000e-01 + 4.8150000e-01 -4.9520000e-01 1.4425000e+00 1.3450000e+00 -5.3740000e-01 6.8720000e-01 1.0019000e+00 4.8550000e-01 -3.6710000e-01 -1.5390000e-01 3.7330000e-01 -4.7910000e-01 4.4210000e-01 1.7111000e+00 4.7220000e-01 3.6560000e-01 -1.6560000e-01 1.2163000e+00 -1.0600000e+00 3.5160000e-01 + 6.8540000e-01 -1.5501000e+00 1.0298000e+00 -2.9908000e+00 -2.1494000e+00 -2.3427000e+00 -4.9760000e-01 -6.3640000e-01 -3.6690000e-01 -4.8020000e-01 4.8530000e-01 -9.1110000e-01 3.1150000e-01 1.3352000e+00 -1.5171000e+00 -7.4930000e-01 4.2320000e-01 -8.0200000e-02 1.4000000e-01 -4.2120000e-01 + -9.5530000e-01 8.1170000e-01 -8.3160000e-01 1.0588000e+00 -4.7900000e-02 7.3180000e-01 4.7000000e-03 -1.7553000e+00 -6.1240000e-01 8.6790000e-01 -1.1039000e+00 1.7810000e-01 9.4500000e-02 -2.8114000e+00 -1.9413000e+00 -6.2890000e-01 -7.8690000e-01 3.4050000e-01 -1.8652000e+00 2.1500000e-01 + -3.1850000e-01 -6.8260000e-01 1.1000000e+00 -1.4400000e-01 1.2479000e+00 -1.0525000e+00 2.3097000e+00 -8.8410000e-01 -4.9300000e-02 1.4661000e+00 9.6250000e-01 2.0846000e+00 2.5372000e+00 -4.5310000e-01 3.4660000e-01 9.5200000e-02 -1.0602000e+00 -1.1422000e+00 5.0480000e-01 1.4537000e+00 + -8.1750000e-01 -3.0030000e-01 -9.7960000e-01 -2.3833000e+00 -1.3339000e+00 -8.5230000e-01 1.8170000e-01 -4.4100000e-02 -1.5706000e+00 1.0500000e-01 9.0730000e-01 -1.6698000e+00 -9.2870000e-01 -1.0412000e+00 -1.0461000e+00 -2.6614000e+00 -3.2054000e+00 -8.4490000e-01 -2.4459000e+00 2.3000000e-02 + -8.7930000e-01 8.5660000e-01 -6.2410000e-01 -1.4048000e+00 -8.3350000e-01 -1.5653000e+00 1.3616000e+00 3.4930000e-01 -4.2280000e-01 7.8200000e-02 2.2750000e-01 -9.5140000e-01 -6.8690000e-01 -7.9730000e-01 -1.2572000e+00 -3.9610000e-01 -9.3010000e-01 3.4030000e-01 1.9840000e-01 9.0000000e-01 + -6.9650000e-01 5.5290000e-01 5.3400000e-01 -2.8890000e+00 -2.4500000e-01 -1.6065000e+00 1.7430000e-01 9.4000000e-02 -2.1503000e+00 -2.5000000e-01 -3.0540000e-01 -1.4470000e-01 -7.0500000e-02 -7.5900000e-01 8.3600000e-01 -2.3024000e+00 -9.7770000e-01 -1.1800000e-02 -2.0225000e+00 -1.4963000e+00 + 1.3416000e+00 -5.4240000e-01 2.7150000e-01 8.7800000e-01 -1.5719000e+00 1.6285000e+00 -7.4030000e-01 -1.2900000e-02 1.1169000e+00 -1.3820000e-01 1.7984000e+00 5.9630000e-01 -1.6800000e+00 6.5680000e-01 4.8780000e-01 -4.9900000e-01 9.6540000e-01 1.8351000e+00 1.9797000e+00 1.0612000e+00 + 8.3680000e-01 2.4929000e+00 1.2023000e+00 4.9370000e-01 9.5960000e-01 -1.0390000e+00 3.9330000e-01 -2.7200000e-02 1.4097000e+00 1.9662000e+00 -8.6340000e-01 2.7346000e+00 -1.7664000e+00 4.9180000e-01 2.9150000e-01 -4.1200000e-01 1.0487000e+00 1.4928000e+00 3.3180000e-01 1.9413000e+00 + 5.1670000e-01 2.7160000e-01 2.0830000e-01 1.5435000e+00 1.1818000e+00 1.7279000e+00 1.5830000e-01 -1.1691000e+00 -6.0500000e-02 -1.7520000e-01 5.9290000e-01 -6.5870000e-01 1.4950000e-01 -2.1700000e-01 1.8590000e-01 2.5215000e+00 -1.3316000e+00 2.0570000e-01 1.4821000e+00 7.9230000e-01 + 7.2340000e-01 -7.1560000e-01 -1.3704000e+00 -1.6000000e-03 7.1510000e-01 -1.2330000e-01 5.7950000e-01 1.6310000e-01 -6.8700000e-01 -3.1999000e+00 2.9600000e-02 -1.1700000e-01 -2.2180000e-01 -3.7276000e+00 -1.6749000e+00 3.1190000e-01 9.2940000e-01 -4.8170000e-01 -2.5257000e+00 1.7227000e+00 + -1.7785000e+00 -1.3095000e+00 2.2477000e+00 4.6890000e-01 -7.1260000e-01 2.0425000e+00 -4.4640000e-01 4.1860000e-01 2.5440000e-01 -2.3247000e+00 -1.2710000e-01 -1.7308000e+00 -1.0269000e+00 -2.9470000e-01 1.0507000e+00 1.7750000e-01 -1.9313000e+00 -3.9630000e-01 -1.4185000e+00 -2.8240000e-01 + 1.0240000e+00 -1.6900000e-01 -5.8400000e-01 -4.1280000e-01 1.9397000e+00 -1.1478000e+00 7.2530000e-01 1.2957000e+00 -7.0920000e-01 3.8240000e-01 2.9760000e-01 1.2696000e+00 -5.9400000e-01 1.5370000e-01 -3.1900000e-01 -4.3800000e-02 4.6590000e-01 -5.5900000e-01 6.3770000e-01 1.3481000e+00 + -1.0528000e+00 -1.2574000e+00 1.6970000e+00 -1.0805000e+00 1.2370000e-01 2.2380000e-01 -1.5012000e+00 1.0948000e+00 9.7720000e-01 -1.9226000e+00 7.7890000e-01 -1.4520000e-01 -1.8546000e+00 -1.3946000e+00 -1.1568000e+00 -2.4666000e+00 -2.4286000e+00 9.2790000e-01 -8.2660000e-01 5.7920000e-01 + 3.8300000e-02 -5.4000000e-02 1.3140000e-01 1.5891000e+00 1.8973000e+00 8.0300000e-02 -1.3011000e+00 -8.7520000e-01 1.7575000e+00 1.0740000e+00 5.8880000e-01 -1.1976000e+00 1.6663000e+00 1.5023000e+00 -3.5610000e-01 1.9305000e+00 5.3840000e-01 5.7570000e-01 2.3933000e+00 1.1048000e+00 + 1.3504000e+00 4.7100000e-02 4.6330000e-01 1.9315000e+00 -4.1800000e-02 3.6660000e-01 -2.1000000e-02 -1.1759000e+00 2.0302000e+00 1.0856000e+00 8.5740000e-01 1.0516000e+00 -4.2450000e-01 1.9879000e+00 2.8514000e+00 1.8954000e+00 -1.0162000e+00 1.0321000e+00 5.8520000e-01 1.3132000e+00 + 6.9820000e-01 8.2300000e-02 7.0230000e-01 5.8570000e-01 -7.2200000e-02 -9.7500000e-02 1.6530000e+00 -5.4180000e-01 2.4236000e+00 5.7060000e-01 2.6102000e+00 -2.0160000e-01 1.1399000e+00 1.3734000e+00 1.4383000e+00 -1.4690000e-01 4.4530000e-01 1.5509000e+00 6.2940000e-01 -1.1895000e+00 + -1.3613000e+00 -6.1840000e-01 -8.6960000e-01 -1.7244000e+00 4.6700000e-02 -5.0850000e-01 -9.7350000e-01 1.2083000e+00 -8.3380000e-01 -2.1158000e+00 -5.1300000e-02 -4.0280000e-01 -8.4280000e-01 -1.2182000e+00 -6.1920000e-01 4.4600000e-01 -1.2764000e+00 -2.3264000e+00 4.2800000e-01 -1.5129000e+00 + 1.7230000e+00 1.4362000e+00 -4.3790000e-01 4.0630000e-01 6.5400000e-01 1.3370000e+00 1.4907000e+00 1.9437000e+00 1.0110000e-01 -2.0600000e-01 -1.1307000e+00 -2.6690000e-01 -7.8000000e-01 1.7003000e+00 8.3170000e-01 -2.5320000e-01 -1.4260000e-01 -3.8410000e-01 1.0136000e+00 9.6460000e-01 + -1.1660000e-01 6.9610000e-01 1.1955000e+00 -1.6540000e-01 -1.4818000e+00 2.5640000e-01 2.8640000e-01 -7.8000000e-03 -5.6950000e-01 8.2150000e-01 6.4000000e-03 6.8000000e-02 -1.4728000e+00 -8.7240000e-01 -8.6400000e-02 -3.8390000e-01 -1.5107000e+00 8.4530000e-01 -1.3244000e+00 -4.0600000e-01 + -6.1130000e-01 1.5341000e+00 4.8200000e-01 1.0515000e+00 1.8824000e+00 1.2031000e+00 -2.8980000e-01 -5.4490000e-01 1.9834000e+00 -4.0750000e-01 1.0398000e+00 2.0738000e+00 -5.5140000e-01 2.2044000e+00 1.3320000e+00 -1.1252000e+00 -1.2854000e+00 -7.3270000e-01 2.3127000e+00 1.2658000e+00 + -8.9420000e-01 6.4890000e-01 1.3440000e+00 1.4245000e+00 9.1800000e-01 2.0277000e+00 -6.2950000e-01 5.8910000e-01 2.0699000e+00 4.8100000e-01 5.4230000e-01 1.5180000e-01 -3.3710000e-01 1.5154000e+00 1.4299000e+00 -2.6570000e+00 -6.0570000e-01 -6.6160000e-01 7.0340000e-01 6.1750000e-01 + 4.7750000e-01 -7.3170000e-01 1.1764000e+00 -2.1957000e+00 -3.9650000e-01 -2.7500000e-01 -8.9080000e-01 -1.2005000e+00 -9.4950000e-01 8.1030000e-01 4.2000000e-01 -6.5100000e-02 1.4700000e-01 -6.9930000e-01 -3.1870000e-01 -7.4740000e-01 -1.8280000e+00 -1.7052000e+00 8.6100000e-02 -6.5490000e-01 + -2.2673000e+00 2.1020000e-01 9.7280000e-01 5.1670000e-01 4.8220000e-01 2.4910000e-01 2.0200000e-01 -5.5910000e-01 -6.3710000e-01 7.4310000e-01 -1.0573000e+00 1.3690000e-01 1.0769000e+00 -1.2849000e+00 -4.2910000e-01 1.4504000e+00 -3.9230000e-01 1.3884000e+00 2.8230000e-01 -1.5730000e-01 + -8.7010000e-01 -5.7100000e-02 -1.4830000e-01 -8.9540000e-01 -4.0910000e-01 8.6390000e-01 -1.9290000e-01 -8.0560000e-01 -1.8800000e-01 -1.4524000e+00 -1.5018000e+00 7.5770000e-01 -3.9620000e-01 -8.9510000e-01 -6.4960000e-01 1.2525000e+00 -1.1414000e+00 -2.6960000e-01 -1.4917000e+00 -1.7056000e+00 + 3.5600000e-02 -1.2610000e-01 -5.3400000e-01 4.9250000e-01 -3.7150000e-01 -1.7600000e-02 6.6030000e-01 1.8823000e+00 8.3000000e-03 8.7100000e-02 -1.1309000e+00 2.1220000e-01 2.1662000e+00 2.7483000e+00 1.6320000e-01 4.4370000e-01 1.3175000e+00 1.3807000e+00 1.5100000e+00 1.0366000e+00 + 1.5990000e-01 8.3100000e-01 1.2731000e+00 -1.7640000e-01 -7.1420000e-01 -2.5760000e-01 -1.9810000e-01 9.2440000e-01 1.1890000e+00 1.6045000e+00 9.1310000e-01 4.9860000e-01 8.3490000e-01 -1.8800000e-02 1.1344000e+00 -1.5710000e-01 -7.6050000e-01 -8.2140000e-01 1.8005000e+00 1.6814000e+00 + -1.9446000e+00 -1.8593000e+00 -1.8460000e+00 -2.2770000e-01 -4.5800000e-02 -1.1878000e+00 6.7140000e-01 -1.8871000e+00 -7.9510000e-01 -3.1260000e-01 -1.3042000e+00 -4.5010000e-01 -9.6110000e-01 2.7130000e-01 -2.3012000e+00 4.1550000e-01 -1.2573000e+00 -6.8500000e-02 -1.5690000e-01 1.1315000e+00 + -1.2711000e+00 -1.2979000e+00 -8.7400000e-01 -1.6857000e+00 -1.2816000e+00 -1.2688000e+00 -1.0400000e-01 -3.0090000e-01 -9.3910000e-01 -2.1727000e+00 1.0442000e+00 -9.4200000e-01 -9.6750000e-01 -8.8570000e-01 1.6800000e-01 -8.1260000e-01 -2.1502000e+00 8.6980000e-01 -1.2448000e+00 -7.0380000e-01 + -7.3040000e-01 4.7300000e-02 1.0055000e+00 -3.1090000e-01 1.8997000e+00 -1.9260000e-01 1.6003000e+00 -6.7410000e-01 -4.7710000e-01 -2.5380000e-01 4.9660000e-01 1.7175000e+00 2.0349000e+00 -7.8600000e-01 -3.6830000e-01 1.4405000e+00 -4.5980000e-01 -4.1730000e-01 8.9130000e-01 1.2210000e+00 + 2.1082000e+00 1.4006000e+00 2.0813000e+00 -5.7900000e-01 1.5980000e+00 4.3490000e-01 6.3600000e-02 3.3900000e-01 -1.9300000e-01 -8.1000000e-03 1.6095000e+00 1.7965000e+00 -2.8058000e+00 2.3358000e+00 -7.2720000e-01 -7.4950000e-01 7.4430000e-01 -8.0350000e-01 1.2800000e+00 1.8138000e+00 + -1.2340000e+00 -2.9650000e-01 2.1720000e-01 -6.9970000e-01 -6.2310000e-01 1.2264000e+00 -2.2770000e-01 -2.1137000e+00 1.2070000e-01 7.1200000e-01 -8.2240000e-01 6.7030000e-01 -1.3168000e+00 6.6490000e-01 1.0276000e+00 8.1700000e-02 -1.1494000e+00 4.3140000e-01 8.2100000e-01 2.0430000e-01 + -1.0431000e+00 -2.6733000e+00 -1.4400000e+00 -3.8400000e-02 5.5850000e-01 2.2590000e-01 3.9900000e-02 -2.8010000e-01 5.1580000e-01 -1.0600000e-02 -1.7610000e-01 1.3490000e-01 -2.2972000e+00 -7.6640000e-01 -5.9050000e-01 -4.1720000e-01 -1.8949000e+00 -4.2420000e-01 -9.9070000e-01 1.2528000e+00 + 1.7638000e+00 1.6364000e+00 2.9170000e-01 -5.3260000e-01 -2.7860000e-01 -1.1910000e-01 3.8800000e+00 -2.4450000e-01 2.6602000e+00 -4.0000000e-02 7.3820000e-01 -1.9333000e+00 3.5250000e-01 3.9900000e-01 -7.2690000e-01 -4.9310000e-01 1.1796000e+00 1.0540000e+00 6.6820000e-01 1.4124000e+00 + -2.8250000e-01 -1.1387000e+00 -2.6810000e-01 -1.4449000e+00 3.9840000e-01 -3.4720000e-01 1.0372000e+00 -7.2040000e-01 -2.6123000e+00 7.3470000e-01 -1.7337000e+00 5.0120000e-01 1.6300000e+00 -2.0836000e+00 -1.7851000e+00 -1.4748000e+00 1.9490000e-01 -2.1260000e-01 -2.0744000e+00 -5.9830000e-01 + 7.8200000e-02 -2.5650000e-01 1.7161000e+00 -2.0054000e+00 1.4270000e-01 -5.1400000e-02 -1.4774000e+00 5.9600000e-01 -6.3650000e-01 -7.5060000e-01 5.8200000e-02 -1.4867000e+00 -1.7353000e+00 1.8450000e-01 3.9970000e-01 -3.7220000e-01 -7.2130000e-01 1.7460000e-01 -4.3290000e-01 3.2150000e-01 + -1.9310000e-01 -5.5740000e-01 -1.2770000e+00 1.4740000e-01 -4.8080000e-01 -1.5354000e+00 1.4350000e-01 4.4990000e-01 -9.6320000e-01 -4.9310000e-01 -2.5470000e-01 -6.0030000e-01 5.8770000e-01 3.2870000e-01 1.6230000e+00 3.2990000e-01 8.2260000e-01 -7.8360000e-01 -5.3840000e-01 -1.2251000e+00 + 1.0388000e+00 1.3870000e+00 -8.2210000e-01 2.8730000e-01 -4.5110000e-01 8.3320000e-01 3.1342000e+00 1.4810000e-01 -5.8290000e-01 1.9739000e+00 2.4087000e+00 1.3910000e+00 -4.2280000e-01 -1.1482000e+00 5.8930000e-01 8.5180000e-01 2.0850000e-01 -1.1510000e-01 3.8780000e-01 -1.3430000e-01 + -3.6500000e-01 -2.2913000e+00 -8.5380000e-01 -7.2830000e-01 -4.7560000e-01 -1.6723000e+00 -7.1600000e-02 2.0995000e+00 -1.8434000e+00 -2.1665000e+00 -1.6444000e+00 -1.7462000e+00 -4.9870000e-01 -4.1520000e-01 8.1090000e-01 -7.0540000e-01 -1.2305000e+00 -9.7520000e-01 -1.0904000e+00 -9.0100000e-01 + -7.6210000e-01 5.5710000e-01 -9.1710000e-01 -8.8000000e-01 -1.7548000e+00 7.3000000e-01 -2.0478000e+00 -8.5250000e-01 -2.6930000e-01 2.7068000e+00 -3.3210000e-01 1.2192000e+00 1.4359000e+00 -3.5300000e-01 2.7280000e-01 4.1010000e-01 -5.8900000e-02 -1.0709000e+00 -7.2370000e-01 -1.4051000e+00 + 2.3966000e+00 1.7523000e+00 2.4730000e-01 7.1690000e-01 5.7330000e-01 1.4247000e+00 1.4293000e+00 3.8010000e-01 4.0500000e-01 8.6370000e-01 4.3590000e-01 3.4910000e-01 1.1368000e+00 1.6162000e+00 1.7195000e+00 5.7200000e-02 8.3000000e-02 2.4088000e+00 9.3840000e-01 2.3382000e+00 + 7.6310000e-01 2.8280000e-01 2.1186000e+00 5.2100000e-01 2.8820000e-01 2.0796000e+00 -6.9310000e-01 1.0830000e-01 7.5510000e-01 1.4840000e+00 2.3320000e-01 -1.1797000e+00 1.6182000e+00 1.0155000e+00 7.7790000e-01 1.0306000e+00 5.2600000e-01 1.0666000e+00 1.6396000e+00 2.6930000e-01 + -1.5716000e+00 9.8800000e-01 1.6520000e+00 3.5390000e-01 -2.8100000e-02 1.1380000e-01 -1.4182000e+00 -1.2210000e+00 -1.5531000e+00 1.1131000e+00 -2.8470000e-01 -1.0000000e-01 -1.0390000e-01 -9.7210000e-01 -4.8170000e-01 -1.4525000e+00 -2.8180000e-01 5.5980000e-01 2.0900000e-01 -5.5820000e-01 + -4.0300000e-02 -1.6524000e+00 -1.3576000e+00 -3.9980000e-01 -1.0799000e+00 1.2070000e-01 1.9617000e+00 -1.3645000e+00 -7.1620000e-01 -1.0578000e+00 -1.0700000e+00 -6.4710000e-01 5.5820000e-01 -1.0630000e+00 -3.6464000e+00 -3.8890000e-01 -1.0254000e+00 -5.5380000e-01 5.9060000e-01 -1.2799000e+00 + 4.0660000e-01 1.6670000e-01 -1.5685000e+00 4.2710000e-01 6.7640000e-01 1.7160000e-01 2.0670000e-01 -1.8960000e-01 1.8320000e+00 -1.1130000e-01 1.2870000e-01 1.5735000e+00 -1.8840000e-01 8.3480000e-01 -1.8100000e-01 1.8119000e+00 1.9914000e+00 7.8420000e-01 1.7605000e+00 -2.8190000e-01 + -9.3400000e-01 -6.4500000e-01 -7.2570000e-01 -7.1000000e-02 -1.3105000e+00 -5.1820000e-01 -1.1860000e-01 3.8980000e-01 3.3160000e-01 -1.0424000e+00 -1.3710000e+00 -1.1495000e+00 -6.7850000e-01 -6.8300000e-01 -4.2250000e-01 -1.5304000e+00 1.2332000e+00 -9.8870000e-01 -1.4065000e+00 2.6660000e-01 + 6.7420000e-01 1.7705000e+00 -8.5460000e-01 4.7130000e-01 4.5570000e-01 8.7350000e-01 -5.8060000e-01 9.0870000e-01 5.3100000e-02 1.0050000e+00 1.5381000e+00 1.1448000e+00 2.3549000e+00 1.1177000e+00 -1.6696000e+00 -1.4260000e-01 3.0580000e-01 4.0930000e-01 7.1410000e-01 1.3290000e+00 + 3.8690000e-01 1.9760000e-01 2.9120000e-01 6.9620000e-01 7.0640000e-01 9.7930000e-01 -2.8380000e-01 -1.7070000e+00 5.4010000e-01 4.1350000e-01 2.1140000e-01 3.7420000e-01 4.9540000e-01 2.2253000e+00 -1.0289000e+00 -1.1498000e+00 1.0360000e+00 1.8720000e-01 -2.3196000e+00 3.2000000e-03 + 7.8990000e-01 -2.4330000e-01 -2.5640000e-01 5.1400000e-01 2.0420000e+00 1.9498000e+00 3.7920000e-01 -5.2710000e-01 1.3326000e+00 1.1768000e+00 -5.1780000e-01 8.0430000e-01 9.9540000e-01 2.1840000e+00 2.2059000e+00 -1.0475000e+00 9.8140000e-01 -1.3227000e+00 -1.8700000e-01 -6.8600000e-01 + -2.0490000e-01 -6.5630000e-01 -1.5563000e+00 6.3380000e-01 -2.0794000e+00 -7.6040000e-01 1.1650000e-01 -4.5710000e-01 -9.0960000e-01 -6.8450000e-01 6.9600000e-02 4.7330000e-01 9.6910000e-01 -8.0590000e-01 -7.3080000e-01 1.0283000e+00 5.6830000e-01 -2.4098000e+00 -1.6730000e+00 5.3170000e-01 + 9.2460000e-01 4.0830000e-01 6.6100000e-02 1.3312000e+00 -4.9660000e-01 8.1600000e-02 -1.6737000e+00 -1.0773000e+00 7.6830000e-01 9.4040000e-01 -9.4570000e-01 7.5220000e-01 -7.8610000e-01 -8.0420000e-01 6.5990000e-01 1.0829000e+00 -1.0896000e+00 -1.3940000e-01 1.0983000e+00 1.6424000e+00 + -3.5260000e-01 -4.7600000e-02 1.8445000e+00 -1.1960000e-01 1.7233000e+00 -5.0950000e-01 -9.2560000e-01 1.1870000e+00 2.5640000e-01 1.9055000e+00 1.7030000e-01 -4.9900000e-02 5.7950000e-01 1.0369000e+00 8.3950000e-01 9.3360000e-01 1.9331000e+00 1.6919000e+00 -9.9600000e-01 1.5320000e+00 + -2.0522000e+00 -7.6150000e-01 1.1291000e+00 5.2360000e-01 -1.1797000e+00 3.8130000e-01 -1.1317000e+00 1.7701000e+00 -7.1890000e-01 -1.4066000e+00 4.3900000e-02 -9.2050000e-01 -7.6020000e-01 6.2620000e-01 4.1720000e-01 -1.2936000e+00 1.8770000e-01 -7.9840000e-01 -2.2588000e+00 -2.6930000e-01 + 2.0284000e+00 3.5190000e-01 -1.4070000e-01 1.1478000e+00 -4.2790000e-01 -3.6800000e-01 -1.7156000e+00 9.0210000e-01 8.5800000e-02 -7.8500000e-02 1.0256000e+00 1.0845000e+00 1.6670000e-01 5.7640000e-01 -1.1117000e+00 4.6450000e-01 3.7520000e-01 -1.1642000e+00 5.2230000e-01 -1.1196000e+00 + -6.3420000e-01 8.9680000e-01 -1.0681000e+00 -7.3820000e-01 3.2600000e-02 7.7010000e-01 1.1436000e+00 -1.2530000e+00 4.4850000e-01 6.4990000e-01 -1.2130000e-01 6.9110000e-01 -3.1070000e-01 1.0500000e-02 -1.1702000e+00 -1.0035000e+00 9.6400000e-02 -2.1570000e+00 -1.6090000e-01 -9.5290000e-01 + 7.8350000e-01 -7.0700000e-02 -2.6160000e-01 -1.4029000e+00 1.0642000e+00 -1.4526000e+00 7.2640000e-01 -1.7681000e+00 1.2280000e-01 -1.1781000e+00 -1.2000000e-02 9.6720000e-01 -1.2840000e-01 -3.2500000e-01 -2.0161000e+00 7.5110000e-01 5.3420000e-01 7.4000000e-01 -2.5234000e+00 4.0540000e-01 + -5.9690000e-01 5.2400000e-02 -4.3630000e-01 -1.8350000e-01 3.1500000e-01 4.3210000e-01 -1.1529000e+00 -1.3173000e+00 3.4670000e-01 -2.5921000e+00 4.0000000e-04 -1.3567000e+00 -1.0471000e+00 -3.0115000e+00 -1.5920000e-01 -1.1607000e+00 -7.4810000e-01 -2.1920000e-01 -2.6092000e+00 -2.1260000e-01 + 1.6971000e+00 -5.3820000e-01 4.6090000e-01 -4.9520000e-01 1.2847000e+00 -1.1886000e+00 1.1339000e+00 -9.4860000e-01 1.3676000e+00 1.6010000e+00 1.2129000e+00 2.1300000e-01 7.5590000e-01 1.3912000e+00 2.7840000e-01 -1.3001000e+00 -4.1630000e-01 1.1609000e+00 -4.6010000e-01 1.6870000e-01 + 1.6270000e-01 -2.4409000e+00 4.6760000e-01 7.3940000e-01 1.2475000e+00 1.3317000e+00 -1.3270000e-01 3.6340000e-01 -1.9670000e-01 6.2260000e-01 2.8130000e-01 1.3852000e+00 1.1048000e+00 3.4220000e-01 2.0140000e-01 1.0447000e+00 1.0729000e+00 4.3470000e-01 6.3550000e-01 1.1283000e+00 + 7.6030000e-01 -1.2782000e+00 -1.5045000e+00 -9.8500000e-01 -1.9287000e+00 1.8810000e-01 -2.3990000e-01 -1.1140000e-01 1.2610000e-01 -9.5010000e-01 -1.2136000e+00 4.4360000e-01 1.8060000e-01 -3.0280000e-01 1.1273000e+00 9.0280000e-01 -1.1120000e-01 -1.9571000e+00 6.2540000e-01 -1.6539000e+00 + 3.1850000e-01 2.6025000e+00 2.7410000e-01 8.2040000e-01 -1.8550000e-01 1.0209000e+00 -3.0140000e-01 9.2660000e-01 1.5220000e-01 6.8590000e-01 2.4412000e+00 2.0290000e-01 -6.6000000e-02 -6.2220000e-01 1.6767000e+00 -8.1150000e-01 -7.2110000e-01 1.0720000e-01 1.1153000e+00 4.6930000e-01 + 5.7330000e-01 -2.2389000e+00 1.8444000e+00 1.4339000e+00 8.8590000e-01 2.0268000e+00 8.8850000e-01 6.3880000e-01 2.5511000e+00 1.7580000e-01 5.8680000e-01 8.6220000e-01 1.0182000e+00 1.5870000e+00 1.5698000e+00 1.0450000e-01 7.6330000e-01 4.5210000e-01 9.7090000e-01 -4.7870000e-01 + -1.1250000e-01 -1.2454000e+00 1.0943000e+00 -7.5530000e-01 -1.9764000e+00 -8.1360000e-01 -1.1427000e+00 -8.6150000e-01 -7.9590000e-01 -2.1260000e-01 -2.9149000e+00 -5.4560000e-01 1.2674000e+00 -3.4270000e-01 -2.8390000e-01 -1.0327000e+00 -1.1070000e-01 -3.1900000e-01 -8.8200000e-01 -1.8020000e-01 + -1.5635000e+00 -1.7240000e+00 -3.6550000e-01 -2.9790000e-01 5.6930000e-01 6.4900000e-01 -1.7093000e+00 -9.1850000e-01 9.1450000e-01 -2.2641000e+00 1.3201000e+00 -3.1100000e-02 5.6570000e-01 8.9840000e-01 -5.6260000e-01 -2.9240000e-01 -5.3430000e-01 4.1000000e-01 -1.2256000e+00 -1.1697000e+00 + -2.0280000e-01 -8.1070000e-01 -3.6470000e-01 -1.7533000e+00 -2.0782000e+00 -8.5530000e-01 -9.1290000e-01 -1.0944000e+00 4.9200000e-01 -1.3709000e+00 -1.9408000e+00 -2.1933000e+00 2.1880000e-01 2.3330000e-01 -3.1006000e+00 -9.4300000e-02 4.5000000e-03 -1.6518000e+00 -6.0900000e-01 -3.5500000e-01 + -1.5851000e+00 9.1150000e-01 -2.3900000e-01 1.2580000e+00 6.3400000e-02 -4.3310000e-01 6.6580000e-01 1.3971000e+00 -8.0760000e-01 2.1783000e+00 7.7830000e-01 6.2500000e-01 1.9412000e+00 -3.4210000e-01 -3.2500000e-02 1.7051000e+00 5.0380000e-01 7.4960000e-01 3.2530000e-01 2.3624000e+00 + -4.0780000e-01 -6.7410000e-01 -5.1810000e-01 -8.3890000e-01 -7.5950000e-01 1.7673000e+00 1.1598000e+00 -1.8433000e+00 -1.6239000e+00 5.6790000e-01 1.0690000e+00 -2.3997000e+00 -6.0520000e-01 -5.5460000e-01 5.0010000e-01 2.8040000e-01 -2.9280000e-01 -1.2450000e+00 8.4260000e-01 4.6290000e-01 + -4.5200000e-01 1.2300000e-01 9.4430000e-01 6.1910000e-01 7.9000000e-03 -5.3570000e-01 -1.3033000e+00 9.5090000e-01 -7.1160000e-01 1.3830000e-01 -1.3668000e+00 -6.6540000e-01 -4.9910000e-01 -2.3289000e+00 -6.8940000e-01 -2.4609000e+00 -9.5600000e-02 -1.1473000e+00 1.4200000e-02 -1.7434000e+00 + 5.5320000e-01 -6.0090000e-01 7.7350000e-01 -3.6410000e-01 6.3990000e-01 1.4492000e+00 1.2343000e+00 5.4320000e-01 -3.9510000e-01 2.2580000e-01 -1.3131000e+00 -1.1389000e+00 8.4050000e-01 -1.0149000e+00 1.8819000e+00 -1.6091000e+00 1.8956000e+00 2.7520000e-01 -5.7570000e-01 -1.3961000e+00 + -1.2993000e+00 -9.8110000e-01 8.4560000e-01 1.0183000e+00 -4.8630000e-01 4.2720000e-01 -8.8920000e-01 -4.4240000e-01 -3.9610000e-01 -4.2700000e-01 1.7758000e+00 -5.5840000e-01 -1.9061000e+00 -7.4500000e-01 7.0900000e-02 -5.1590000e-01 -5.6850000e-01 -6.9330000e-01 6.4500000e-02 7.7300000e-02 + -1.2854000e+00 -4.6230000e-01 -2.6150000e-01 -1.3700000e-02 -1.2803000e+00 -1.3350000e+00 5.3720000e-01 -1.8144000e+00 -3.0040000e-01 1.0082000e+00 9.9300000e-02 -5.4340000e-01 -3.7300000e-02 -2.1850000e+00 1.0875000e+00 -4.6900000e-01 -6.8520000e-01 -1.6519000e+00 -1.1140000e-01 6.9660000e-01 + 2.4390000e-01 -8.8110000e-01 -1.7792000e+00 1.3865000e+00 1.0958000e+00 1.1775000e+00 2.2558000e+00 2.4610000e-01 -1.5774000e+00 -9.0900000e-01 1.6650000e-01 -7.6340000e-01 4.8710000e-01 2.3973000e+00 1.2934000e+00 3.8830000e-01 5.5850000e-01 -1.7730000e-01 1.6607000e+00 3.2750000e-01 + 1.7832000e+00 -2.4740000e-01 -6.4080000e-01 5.7000000e-01 8.5100000e-02 -1.3131000e+00 -9.3910000e-01 -6.5880000e-01 -2.0638000e+00 -2.9800000e-01 -6.6570000e-01 3.9860000e-01 -9.1740000e-01 5.3710000e-01 -5.4510000e-01 1.0162000e+00 -1.9060000e-01 -6.9990000e-01 -4.8600000e-01 -1.6161000e+00 + 1.6546000e+00 1.2540000e-01 6.4740000e-01 6.2270000e-01 -4.4390000e-01 -1.2010000e-01 1.3640000e-01 -7.2440000e-01 2.1409000e+00 -7.8600000e-02 1.2275000e+00 2.5983000e+00 8.2800000e-01 2.0230000e-01 1.2328000e+00 9.7940000e-01 5.2050000e-01 -2.9057000e+00 1.3347000e+00 1.0348000e+00 + -1.8850000e-01 -1.5176000e+00 2.5631000e+00 5.5380000e-01 9.5270000e-01 1.3236000e+00 1.1782000e+00 -6.0930000e-01 -1.6267000e+00 -1.1970000e+00 -2.0763000e+00 -1.2384000e+00 1.9330000e-01 -1.1349000e+00 4.9590000e-01 -6.6070000e-01 5.1940000e-01 1.2085000e+00 6.1550000e-01 -1.2430000e+00 + 1.9530000e-01 -8.0670000e-01 -4.9660000e-01 -2.6730000e-01 -1.0690000e-01 -6.8610000e-01 -1.2249000e+00 1.3939000e+00 -1.5360000e+00 3.7450000e-01 -7.9140000e-01 -2.6760000e+00 -6.3120000e-01 2.1800000e-02 -7.5550000e-01 -7.5530000e-01 -1.1286000e+00 -1.5005000e+00 -9.2660000e-01 -2.3561000e+00 + -2.0735000e+00 8.6460000e-01 -1.4256000e+00 -5.2220000e-01 1.4010000e-01 -4.6690000e-01 -9.2380000e-01 1.3500000e+00 -4.1590000e-01 5.7810000e-01 -6.7180000e-01 2.8120000e-01 -1.2900000e-02 -2.5805000e+00 -2.8550000e-01 -8.6010000e-01 -1.0359000e+00 2.2650000e-01 -1.6062000e+00 -2.7588000e+00 + -6.8200000e-01 -1.6979000e+00 -5.1100000e-01 -2.3716000e+00 -1.3480000e+00 8.1850000e-01 -2.1660000e-01 -7.1000000e-01 -6.8760000e-01 -6.2470000e-01 -1.2434000e+00 -1.8939000e+00 9.1080000e-01 -2.0510000e-01 -2.3485000e+00 -1.1176000e+00 -2.4592000e+00 -2.5330000e-01 -4.4860000e-01 -9.0330000e-01 + -1.2207000e+00 -4.5450000e-01 -7.9250000e-01 -1.3289000e+00 7.8100000e-01 -7.8800000e-01 -5.3110000e-01 -3.3500000e-01 -3.1200000e-01 3.6900000e-02 3.9870000e-01 -2.2019000e+00 -2.0680000e-01 -9.7350000e-01 -9.5810000e-01 -1.4658000e+00 -6.6840000e-01 -1.1627000e+00 -2.9370000e-01 -1.6310000e+00 + -6.8560000e-01 -1.6618000e+00 6.8700000e-01 1.6630000e-01 3.1510000e-01 -5.1760000e-01 2.2650000e-01 -1.2300000e+00 -9.8500000e-01 -6.5770000e-01 -7.0560000e-01 5.7850000e-01 3.3410000e-01 4.4320000e-01 8.2710000e-01 9.4800000e-02 -3.3190000e-01 -8.0850000e-01 -2.3001000e+00 -6.3810000e-01 + 1.2704000e+00 2.0786000e+00 -3.2510000e-01 1.9478000e+00 7.1770000e-01 8.3880000e-01 1.2350000e-01 3.0330000e-01 1.2603000e+00 -7.8100000e-02 1.4173000e+00 6.3550000e-01 8.2450000e-01 2.4560000e-01 -1.8940000e-01 1.1541000e+00 -1.4241000e+00 1.3419000e+00 1.7660000e-01 1.6431000e+00 + 1.5803000e+00 1.8003000e+00 -1.5302000e+00 -1.3592000e+00 -8.8300000e-01 -1.9436000e+00 1.1797000e+00 -7.9100000e-02 -9.8960000e-01 -4.7640000e-01 -6.9300000e-02 -1.0794000e+00 -2.1600000e+00 -1.3980000e+00 -9.5550000e-01 6.1750000e-01 -1.9461000e+00 -8.9470000e-01 1.2354000e+00 -8.5700000e-01 + -1.0976000e+00 -1.1082000e+00 4.8700000e-01 -1.1824000e+00 -1.5066000e+00 2.2499000e+00 7.1410000e-01 -1.2682000e+00 -5.2930000e-01 -7.0840000e-01 -3.7870000e-01 6.5800000e-02 -2.4497000e+00 2.1630000e-01 -1.1362000e+00 7.2210000e-01 -2.4660000e-01 -4.2170000e-01 -1.6040000e+00 3.1760000e-01 + 1.5462000e+00 -1.4610000e-01 1.0788000e+00 -8.3720000e-01 5.1300000e-02 1.3800000e-02 -4.8920000e-01 3.7800000e-02 -1.2970000e-01 -3.4600000e-01 -1.6169000e+00 2.8820000e-01 -1.8584000e+00 -4.6500000e-02 2.3670000e-01 4.2970000e-01 -5.7220000e-01 -6.2980000e-01 6.5900000e-01 1.6647000e+00 + -2.4440000e-01 3.6440000e-01 -6.8350000e-01 -6.4450000e-01 -5.0610000e-01 -1.1910000e-01 -7.9280000e-01 -1.9496000e+00 -2.2033000e+00 -7.5530000e-01 9.0060000e-01 -1.2934000e+00 -9.2280000e-01 -1.1766000e+00 7.1970000e-01 -2.0130000e-01 -2.3823000e+00 -5.7630000e-01 3.8250000e-01 -2.2600000e-02 + -7.2130000e-01 6.7610000e-01 -2.1110000e+00 -1.5409000e+00 4.0920000e-01 -1.4604000e+00 -7.0760000e-01 -2.1810000e-01 -9.5350000e-01 2.2460000e-01 -6.3160000e-01 -1.2145000e+00 -1.3806000e+00 1.5800000e-02 -8.9220000e-01 1.4699000e+00 -1.5949000e+00 -6.8960000e-01 -6.8940000e-01 -1.2322000e+00 + -1.8390000e+00 -1.4969000e+00 -1.9574000e+00 8.5440000e-01 -2.6010000e-01 2.8720000e-01 -1.1869000e+00 -4.8490000e-01 -6.6900000e-01 2.0700000e-02 6.5250000e-01 -1.7274000e+00 6.0700000e-01 1.7901000e+00 -9.0120000e-01 9.0050000e-01 -5.5190000e-01 -8.7860000e-01 -1.7403000e+00 -2.3107000e+00 + 7.1180000e-01 2.2140000e-01 8.6600000e-01 1.3470000e-01 -5.4970000e-01 -2.2140000e-01 -1.2529000e+00 6.0870000e-01 1.6400000e-01 -7.5200000e-02 5.6260000e-01 -1.3480000e-01 5.4910000e-01 4.0140000e-01 4.2280000e-01 -4.1500000e-01 5.8800000e-01 1.5207000e+00 -5.7920000e-01 -2.1560000e-01 + 8.2790000e-01 8.1980000e-01 9.6150000e-01 -4.6920000e-01 1.6340000e-01 -3.3340000e-01 9.1740000e-01 7.0540000e-01 3.0404000e+00 2.8910000e-01 1.7286000e+00 4.3390000e-01 -1.1910000e+00 -6.3800000e-01 -2.0800000e-02 -9.8500000e-02 -2.2932000e+00 5.2520000e-01 1.4131000e+00 6.7230000e-01 + 7.5440000e-01 2.4900000e-01 6.9090000e-01 -1.3266000e+00 -1.4972000e+00 -1.1151000e+00 8.2390000e-01 1.5520000e-01 -7.4300000e-01 -2.2723000e+00 -8.7910000e-01 -2.6474000e+00 8.6870000e-01 -8.2930000e-01 -2.9826000e+00 5.6690000e-01 -2.6510000e-01 6.3800000e-01 -1.4540000e+00 -3.2460000e-01 + -9.5080000e-01 -3.6110000e-01 -1.5582000e+00 1.5480000e-01 1.1667000e+00 -2.1333000e+00 -2.0162000e+00 -1.3090000e+00 -1.3114000e+00 -1.1439000e+00 1.1227000e+00 -3.3300000e-02 -1.4108000e+00 1.2500000e-02 1.1142000e+00 -1.6240000e+00 1.8648000e+00 -2.0949000e+00 -1.7736000e+00 -9.4160000e-01 + 3.5900000e-02 1.3040000e+00 -3.3380000e-01 2.7790000e-01 -8.1790000e-01 1.9666000e+00 5.5950000e-01 4.2370000e-01 6.0420000e-01 1.8395000e+00 3.1940000e-01 -7.5790000e-01 9.8230000e-01 1.3887000e+00 -7.7150000e-01 6.1480000e-01 1.5988000e+00 9.0230000e-01 6.6730000e-01 2.1948000e+00 + -1.3271000e+00 -1.6638000e+00 2.2910000e-01 7.2900000e-02 -1.5380000e-01 -1.4660000e-01 -1.1490000e-01 -7.8520000e-01 -2.5725000e+00 -1.1810000e+00 -4.2210000e-01 7.8630000e-01 -1.3658000e+00 -1.6063000e+00 -1.9072000e+00 -2.0410000e+00 -4.0340000e-01 -1.0518000e+00 1.3353000e+00 -5.6560000e-01 + 1.1355000e+00 -4.2850000e-01 8.7580000e-01 -2.2095000e+00 -1.1340000e+00 -1.9530000e-01 8.6680000e-01 9.1250000e-01 -1.5257000e+00 -9.7690000e-01 -1.2644000e+00 -4.4790000e-01 -6.8510000e-01 3.9570000e-01 -5.4460000e-01 -1.9674000e+00 -9.7090000e-01 -9.4820000e-01 -7.0300000e-01 -1.5114000e+00 + 7.5380000e-01 1.5750000e-01 -9.7390000e-01 1.1740000e+00 -2.4620000e-01 2.3840000e-01 1.7860000e+00 1.3045000e+00 -8.6370000e-01 7.5300000e-01 1.7111000e+00 2.4034000e+00 1.0421000e+00 3.6940000e-01 5.6710000e-01 1.8170000e-01 1.1130000e-01 1.2171000e+00 -1.6223000e+00 -3.0620000e-01 + -1.9018000e+00 -1.4011000e+00 -1.0062000e+00 -6.8650000e-01 6.8030000e-01 2.4300000e-02 3.0440000e-01 -1.1007000e+00 -1.8750000e+00 -6.7510000e-01 4.8860000e-01 -1.3176000e+00 -8.6050000e-01 -5.9310000e-01 1.5179000e+00 -6.8570000e-01 -5.2610000e-01 -1.2000000e-02 -9.9800000e-02 -1.1286000e+00 + 1.2469000e+00 -1.2412000e+00 8.0300000e-01 2.0953000e+00 8.2670000e-01 1.8181000e+00 1.0550000e+00 2.8700000e-01 -8.1130000e-01 1.1685000e+00 9.9070000e-01 2.5373000e+00 2.0108000e+00 1.0558000e+00 -7.4160000e-01 4.8510000e-01 -6.8070000e-01 8.5550000e-01 -1.1700000e+00 1.2989000e+00 + 2.3510000e-01 1.5361000e+00 -7.9100000e-02 1.4431000e+00 9.5220000e-01 -7.1500000e-02 -1.2363000e+00 -2.2810000e-01 1.1310000e+00 -3.8960000e-01 2.0324000e+00 3.6140000e-01 -1.3540000e-01 2.0901000e+00 4.0260000e-01 -6.9000000e-02 8.1520000e-01 1.4062000e+00 -1.0185000e+00 -1.1095000e+00 + -1.4051000e+00 -1.4257000e+00 -2.0783000e+00 1.7220000e-01 -1.2480000e+00 -9.7420000e-01 1.4650000e-01 -5.9290000e-01 -1.3495000e+00 -2.1030000e-01 -8.8430000e-01 1.2196000e+00 3.0720000e-01 1.1492000e+00 -2.0410000e-01 -1.6395000e+00 3.4860000e-01 -2.8380000e-01 -1.2180000e-01 1.2290000e-01 + 1.8762000e+00 -1.5918000e+00 -1.2412000e+00 7.5100000e-02 -1.4840000e-01 -1.4910000e-01 1.6252000e+00 -2.0314000e+00 8.5690000e-01 -8.5030000e-01 -6.2770000e-01 2.7800000e-02 -1.5947000e+00 -2.0942000e+00 -7.2300000e-01 -5.5390000e-01 8.4870000e-01 8.1000000e-03 6.6270000e-01 -5.8970000e-01 + -8.1730000e-01 -1.0404000e+00 6.6490000e-01 6.3090000e-01 9.4960000e-01 -8.0660000e-01 -1.0149000e+00 -6.2320000e-01 -8.5510000e-01 1.4627000e+00 -3.3890000e-01 -7.6080000e-01 2.0441000e+00 -2.1466000e+00 -9.0220000e-01 -1.5630000e+00 -1.4760000e-01 -1.6454000e+00 4.9550000e-01 3.1280000e-01 + 9.1950000e-01 8.9780000e-01 -2.8648000e+00 1.1860000e-01 -1.8300000e+00 7.4000000e-03 -9.7560000e-01 1.2350000e-01 1.7340000e-01 -6.8460000e-01 1.0154000e+00 7.5730000e-01 2.0000000e-01 -3.8790000e-01 4.2200000e-02 -8.5380000e-01 3.1323000e+00 -5.6430000e-01 -3.3260000e-01 -3.0600000e-01 + 1.5285000e+00 -2.4860000e-01 2.6610000e-01 1.0450000e+00 4.4950000e-01 9.9860000e-01 1.3104000e+00 2.5722000e+00 2.8018000e+00 8.8730000e-01 1.2457000e+00 1.3438000e+00 1.3840000e+00 -4.3000000e-01 1.7260000e+00 6.5960000e-01 -4.5910000e-01 -1.0538000e+00 1.2964000e+00 -9.2770000e-01 + 1.1060000e-01 -1.4599000e+00 -2.8690000e-01 -1.4233000e+00 3.3620000e-01 8.2690000e-01 1.6441000e+00 -1.1717000e+00 -1.0878000e+00 -1.4847000e+00 3.2000000e-02 -5.0670000e-01 -3.1480000e-01 -3.6100000e-02 -1.4117000e+00 1.3360000e-01 5.8870000e-01 -1.4827000e+00 -3.4400000e-01 -8.2200000e-02 + 1.3947000e+00 2.4245000e+00 -1.6480000e-01 -2.6940000e-01 1.1577000e+00 1.1414000e+00 4.9380000e-01 1.2125000e+00 -1.0495000e+00 2.8140000e-01 -2.6600000e-02 2.2769000e+00 -2.6260000e-01 -1.1799000e+00 -1.3950000e-01 3.8670000e-01 -5.0270000e-01 2.0672000e+00 1.9160000e-01 -1.0643000e+00 + -1.7216000e+00 6.6130000e-01 1.0977000e+00 -6.8700000e-02 1.1340000e-01 3.1644000e+00 3.0630000e-01 3.6830000e-01 7.5550000e-01 -7.1380000e-01 1.3218000e+00 1.7389000e+00 1.5300000e-01 -2.9210000e-01 2.3080000e-01 1.5807000e+00 -9.5410000e-01 -6.9000000e-01 2.4420000e-01 -1.1520000e+00 + 9.8210000e-01 -2.0590000e-01 -2.7980000e-01 3.6900000e-02 -5.0680000e-01 1.4162000e+00 1.3023000e+00 7.2580000e-01 5.7430000e-01 1.0862000e+00 5.9150000e-01 -1.2536000e+00 6.8620000e-01 4.3710000e-01 5.7970000e-01 -5.4100000e-02 5.9520000e-01 2.8650000e-01 -4.6710000e-01 1.1249000e+00 + -1.6483000e+00 1.6013000e+00 1.2480000e-01 1.8060000e-01 -1.4841000e+00 2.0797000e+00 5.9150000e-01 4.4540000e-01 7.8310000e-01 8.9360000e-01 7.3600000e-02 8.3510000e-01 1.1058000e+00 9.8990000e-01 1.2794000e+00 -2.4380000e-01 1.6018000e+00 4.4310000e-01 -1.1100000e-01 2.9160000e-01 + -3.6160000e-01 2.4310000e-01 -1.5628000e+00 -6.7070000e-01 -1.8040000e-01 -2.1312000e+00 -9.9150000e-01 -1.5926000e+00 -2.1875000e+00 -5.6370000e-01 4.9190000e-01 -1.4000000e-03 2.2460000e-01 7.5750000e-01 -2.5271000e+00 2.9310000e-01 -3.2950000e-01 5.9830000e-01 4.9140000e-01 -3.2690000e-01 + 2.0004000e+00 2.1120000e-01 -9.1630000e-01 -4.5390000e-01 1.2557000e+00 9.0750000e-01 2.7220000e-01 5.0250000e-01 9.8930000e-01 -6.5350000e-01 1.1602000e+00 -8.4440000e-01 9.7370000e-01 3.0200000e-01 -1.4556000e+00 -1.2100000e+00 -1.2413000e+00 1.0286000e+00 6.4050000e-01 -2.0054000e+00 + -3.0470000e-01 2.2469000e+00 5.8700000e-02 -9.7180000e-01 -6.1000000e-02 8.9140000e-01 -8.0900000e-02 6.1470000e-01 9.5160000e-01 1.0541000e+00 1.9670000e-01 1.6440000e-01 2.1940000e-01 5.6000000e-02 3.0680000e-01 1.0166000e+00 1.0118000e+00 -1.2356000e+00 8.2380000e-01 -1.1438000e+00 + -1.2737000e+00 1.3905000e+00 -4.4110000e-01 4.5170000e-01 -1.4507000e+00 -6.4420000e-01 -4.7170000e-01 7.0530000e-01 1.3503000e+00 -4.1560000e-01 2.3840000e-01 1.0078000e+00 8.8600000e-01 -1.5232000e+00 6.7560000e-01 9.8330000e-01 1.5308000e+00 2.5890000e-01 1.0030000e-01 -7.1510000e-01 + 1.1956000e+00 -7.3430000e-01 -5.7330000e-01 1.2165000e+00 -4.6460000e-01 2.5450000e-01 1.3699000e+00 -2.5860000e-01 1.9923000e+00 1.9260000e+00 4.6110000e-01 -9.6580000e-01 7.7540000e-01 -2.5219000e+00 -5.6670000e-01 -8.8640000e-01 8.5030000e-01 1.2504000e+00 -3.4100000e-02 9.8210000e-01 + 5.1630000e-01 8.3040000e-01 9.0790000e-01 -1.2082000e+00 -1.0564000e+00 1.7598000e+00 2.2086000e+00 6.5620000e-01 -1.7637000e+00 2.6837000e+00 -1.4040000e-01 1.5669000e+00 2.2620000e-01 5.2200000e-02 9.4370000e-01 1.1399000e+00 8.2820000e-01 4.4680000e-01 -1.2828000e+00 -5.0400000e-02 + -6.9000000e-03 1.6558000e+00 1.0331000e+00 -4.4430000e-01 -4.2030000e-01 2.2181000e+00 4.2320000e-01 -4.0330000e-01 -3.4000000e-02 1.1088000e+00 4.3070000e-01 6.0730000e-01 -1.3070000e-01 -1.1334000e+00 -6.7950000e-01 1.0942000e+00 8.1880000e-01 6.5530000e-01 1.9354000e+00 2.6130000e-01 + -7.3980000e-01 -1.2630000e+00 4.8970000e-01 -2.0668000e+00 1.4820000e-01 -4.7560000e-01 -1.0868000e+00 6.5830000e-01 -6.8260000e-01 -2.4142000e+00 2.0030000e-01 -4.6810000e-01 1.1620000e-01 -1.7285000e+00 1.0580000e-01 -5.3250000e-01 1.1133000e+00 -6.7840000e-01 5.3080000e-01 1.9710000e+00 + -5.8150000e-01 -1.6900000e-02 4.4200000e-02 -2.5300000e-02 -2.3290000e+00 -1.1653000e+00 -1.4438000e+00 3.2090000e-01 -1.1638000e+00 -1.6031000e+00 -3.3250000e-01 -4.4440000e-01 -1.4689000e+00 -1.3438000e+00 6.4460000e-01 -7.9150000e-01 -2.7915000e+00 4.4870000e-01 -1.5973000e+00 -3.5440000e-01 + 1.2101000e+00 -3.0650000e-01 3.1020000e-01 2.0553000e+00 1.8963000e+00 4.8660000e-01 2.3735000e+00 6.9830000e-01 1.4680000e-01 1.1438000e+00 -4.2620000e-01 6.9890000e-01 -3.5550000e-01 4.5600000e-01 1.2941000e+00 -1.5530000e+00 -2.5330000e-01 -6.0760000e-01 1.4906000e+00 1.6071000e+00 + 2.7470000e-01 1.5649000e+00 1.4998000e+00 7.8800000e-01 2.5700000e-01 7.4120000e-01 -3.7920000e-01 1.7640000e-01 1.8081000e+00 1.4017000e+00 4.6730000e-01 -3.5900000e-01 1.7975000e+00 5.2430000e-01 9.8210000e-01 -9.5790000e-01 3.7630000e-01 1.1776000e+00 -1.9890000e-01 2.1110000e-01 + -8.7000000e-01 -2.1893000e+00 -1.5262000e+00 -2.0360000e-01 6.4770000e-01 1.5230000e-01 1.3560000e-01 -1.1619000e+00 -8.9020000e-01 -3.9730000e-01 -8.6100000e-01 8.0730000e-01 -1.3833000e+00 7.9400000e-02 -7.8450000e-01 9.2570000e-01 -1.5947000e+00 2.5160000e-01 -1.1707000e+00 1.6155000e+00 + -1.2865000e+00 -2.1127000e+00 1.3190000e-01 -2.8990000e-01 -2.0100000e-01 -1.5022000e+00 -8.0330000e-01 3.8540000e-01 -8.9210000e-01 -7.9950000e-01 -1.7579000e+00 -1.5156000e+00 -2.1244000e+00 -5.4140000e-01 -8.4670000e-01 -6.5460000e-01 3.3930000e-01 -4.7020000e-01 9.4420000e-01 6.7800000e-01 + 2.1060000e-01 -8.2040000e-01 -1.2555000e+00 -2.0990000e-01 -8.0560000e-01 -1.2941000e+00 -1.9011000e+00 -1.9776000e+00 -9.5800000e-01 -1.0209000e+00 4.4580000e-01 2.4570000e-01 -4.4800000e-02 4.2240000e-01 5.7670000e-01 -2.6700000e-01 -3.1580000e-01 -9.8330000e-01 -2.9440000e-01 -1.0407000e+00 + -1.3695000e+00 -1.4299000e+00 3.3260000e-01 -2.4830000e-01 -6.1730000e-01 -2.9616000e+00 -1.2802000e+00 2.7000000e-03 -4.3460000e-01 -5.0610000e-01 -3.5810000e-01 -8.2100000e-02 -4.8630000e-01 -1.3360000e-01 -1.4631000e+00 7.8190000e-01 4.7620000e-01 -1.1977000e+00 -1.4070000e-01 2.3590000e-01 + 3.9070000e-01 -4.2070000e-01 4.7910000e-01 -1.4730000e-01 -1.1728000e+00 -2.8750000e-01 1.1849000e+00 -2.9143000e+00 4.3860000e-01 -2.2728000e+00 -7.9230000e-01 4.1080000e-01 -2.6610000e-01 1.6657000e+00 -1.8472000e+00 8.3950000e-01 -1.0746000e+00 2.2430000e-01 -1.7580000e+00 -1.6989000e+00 + 9.6400000e-02 -4.5740000e-01 -8.4210000e-01 -3.5100000e-01 1.5460000e-01 -7.0610000e-01 -9.4540000e-01 1.1591000e+00 -1.3913000e+00 -9.8900000e-02 -3.9160000e-01 -1.0892000e+00 -3.4050000e-01 -6.4970000e-01 -6.6630000e-01 -1.2942000e+00 -7.7400000e-02 -3.6430000e-01 6.7800000e-02 -1.6576000e+00 + -6.9730000e-01 1.6833000e+00 8.3650000e-01 -1.8741000e+00 4.2000000e-02 -1.4188000e+00 -1.2081000e+00 -2.4090000e-01 -6.7340000e-01 -1.0306000e+00 -2.1229000e+00 -7.4220000e-01 -1.6321000e+00 -9.4830000e-01 -6.8140000e-01 -7.0800000e-01 -4.2980000e-01 -8.5470000e-01 -5.0980000e-01 1.6540000e-01 + -1.3910000e-01 1.2224000e+00 4.2530000e-01 1.1227000e+00 7.5390000e-01 -7.8400000e-01 1.8795000e+00 2.0660000e-01 1.6298000e+00 -5.8550000e-01 2.0535000e+00 3.5980000e-01 4.6730000e-01 6.3350000e-01 3.6670000e-01 8.2600000e-02 9.2900000e-02 1.8545000e+00 -2.7970000e-01 -2.0210000e-01 + 9.6940000e-01 -1.0386000e+00 -2.0505000e+00 -1.5698000e+00 -2.6508000e+00 5.9590000e-01 2.0310000e-01 -1.3779000e+00 1.2904000e+00 -1.5724000e+00 7.4600000e-01 -9.5000000e-01 -1.6255000e+00 -7.1780000e-01 -2.5490000e-01 -1.1638000e+00 -1.0459000e+00 -9.3420000e-01 -4.1060000e-01 -1.7640000e-01 + -7.6200000e-02 -4.3310000e-01 -9.0520000e-01 5.3580000e-01 -7.7870000e-01 1.5600000e-01 -2.1444000e+00 -1.0270000e-01 -6.3130000e-01 3.4580000e-01 -1.5850000e-01 8.1690000e-01 -9.2480000e-01 5.3330000e-01 -2.1948000e+00 -1.5426000e+00 -2.6218000e+00 -1.6234000e+00 -2.6900000e-02 -5.5320000e-01 + -2.8660000e-01 -2.3753000e+00 -7.9440000e-01 -4.6860000e-01 -9.0040000e-01 1.0888000e+00 -3.1010000e-01 2.0810000e-01 -1.0900000e+00 1.6797000e+00 -9.5700000e-01 1.9390000e+00 -1.2791000e+00 -9.5900000e-01 4.5110000e-01 -8.0490000e-01 -1.4390000e-01 -1.5081000e+00 -1.2586000e+00 -4.3830000e-01 + 2.5920000e-01 2.2810000e-01 1.7995000e+00 -2.0960000e+00 -1.8862000e+00 3.0120000e-01 4.4480000e-01 -1.5256000e+00 -1.5254000e+00 -1.7148000e+00 -6.8650000e-01 -9.7360000e-01 2.1300000e-02 -2.1345000e+00 -4.5100000e-01 1.9650000e-01 -1.2036000e+00 -1.1437000e+00 -3.8900000e-01 -1.7892000e+00 + 2.2415000e+00 1.7057000e+00 1.0542000e+00 7.8000000e-03 9.1650000e-01 -7.5490000e-01 -9.4560000e-01 -7.1260000e-01 -1.2618000e+00 8.0010000e-01 8.8940000e-01 -4.8670000e-01 7.5210000e-01 2.7750000e-01 1.6132000e+00 -9.3320000e-01 2.4609000e+00 1.0383000e+00 2.1510000e-01 1.6876000e+00 + -1.7600000e-02 4.2960000e-01 6.6380000e-01 1.5091000e+00 -4.7100000e-01 -6.8350000e-01 -1.4780000e-01 1.2145000e+00 -5.4120000e-01 1.1215000e+00 4.2630000e-01 -9.8920000e-01 -4.4530000e-01 1.0255000e+00 3.0450000e-01 -5.2920000e-01 1.5244000e+00 -5.3990000e-01 -1.3231000e+00 6.3610000e-01 + -1.1833000e+00 -1.9610000e-01 -1.5010000e+00 -8.4870000e-01 -2.3930000e-01 -6.8320000e-01 -3.0490000e+00 1.6463000e+00 -1.0689000e+00 -4.1760000e-01 -5.2300000e-01 1.2562000e+00 -8.5450000e-01 1.4474000e+00 9.5340000e-01 -1.0744000e+00 -4.6970000e-01 -7.8510000e-01 -7.5790000e-01 -2.0500000e-01 + -1.7904000e+00 9.7440000e-01 -2.2388000e+00 4.5000000e-03 1.8734000e+00 -7.2160000e-01 1.2080000e+00 8.3970000e-01 -6.8990000e-01 -8.8040000e-01 8.0810000e-01 -9.7320000e-01 8.6630000e-01 -1.0256000e+00 -2.4700000e-01 1.5335000e+00 9.4530000e-01 5.8800000e-02 -1.8670000e-01 -6.2000000e-01 + 1.0188000e+00 -7.6920000e-01 1.0150000e-01 -5.9130000e-01 -2.0881000e+00 2.1321000e+00 1.2260000e+00 5.8030000e-01 -1.8264000e+00 -9.5080000e-01 7.4750000e-01 -1.1536000e+00 -1.8359000e+00 1.6880000e-01 2.7800000e-02 1.8660000e-01 -1.4503000e+00 -1.4928000e+00 -7.0700000e-02 -4.4250000e-01 + 6.7680000e-01 -6.4430000e-01 1.0361000e+00 1.1047000e+00 1.2795000e+00 -1.4559000e+00 -4.4990000e-01 1.3011000e+00 5.9430000e-01 1.2865000e+00 8.6470000e-01 1.4929000e+00 2.2973000e+00 1.3624000e+00 2.8923000e+00 -2.1280000e-01 1.3995000e+00 1.1525000e+00 2.4922000e+00 8.6560000e-01 + 1.2960000e-01 4.2180000e-01 3.9600000e-02 1.4140000e+00 -1.3336000e+00 1.1600000e-01 -2.4760000e-01 -1.3180000e+00 4.6520000e-01 -8.8020000e-01 -2.9576000e+00 2.8770000e-01 3.1570000e-01 -7.6900000e-02 -6.8000000e-03 -2.3110000e-01 5.8430000e-01 -1.9173000e+00 -1.8259000e+00 -8.3480000e-01 + -3.0500000e-01 -1.2518000e+00 3.2000000e-02 -9.7030000e-01 -8.8000000e-03 -3.8890000e-01 -3.1250000e-01 -4.6340000e-01 8.9720000e-01 2.7010000e-01 -8.7010000e-01 -5.5650000e-01 -5.1280000e-01 -4.0700000e-02 -2.2270000e-01 3.6910000e-01 -9.1840000e-01 -1.9220000e+00 3.5910000e-01 -1.9884000e+00 + -1.5728000e+00 -1.7637000e+00 1.5310000e-01 -7.0700000e-01 -7.5810000e-01 5.6980000e-01 1.0950000e+00 2.0400000e-01 -6.8240000e-01 -4.8470000e-01 -7.5880000e-01 5.1160000e-01 -2.1590000e+00 -1.2397000e+00 1.7013000e+00 -6.2740000e-01 -1.4938000e+00 -8.0790000e-01 -6.2310000e-01 -1.5289000e+00 + -4.7110000e-01 -1.3910000e+00 9.3460000e-01 -6.0660000e-01 1.3402000e+00 8.5370000e-01 9.4680000e-01 1.0540000e+00 1.2500000e+00 1.6498000e+00 8.2270000e-01 3.2150000e-01 1.2885000e+00 -7.6490000e-01 5.9880000e-01 2.6310000e-01 -5.9240000e-01 9.3600000e-01 7.2340000e-01 2.4930000e-01 + 7.2530000e-01 -2.2600000e-02 -1.0528000e+00 7.3740000e-01 1.8792000e+00 1.8776000e+00 -1.6410000e-01 2.4547000e+00 -2.9070000e-01 5.4550000e-01 -1.1898000e+00 3.7580000e-01 -3.9170000e-01 -1.2103000e+00 2.4202000e+00 1.9341000e+00 -7.2990000e-01 6.5760000e-01 1.1379000e+00 3.4180000e-01 + 1.9112000e+00 4.3220000e-01 4.5530000e-01 1.1875000e+00 1.0161000e+00 -1.2234000e+00 2.8450000e-01 5.9740000e-01 1.3690000e+00 -8.2200000e-02 -8.2480000e-01 1.8609000e+00 8.1270000e-01 3.2530000e-01 -4.6400000e-01 -1.1690000e+00 9.6860000e-01 1.3852000e+00 1.2929000e+00 -2.9250000e-01 + -2.2536000e+00 -6.7920000e-01 -4.9800000e-01 4.0760000e-01 -4.1970000e-01 7.4000000e-02 -6.8930000e-01 9.8260000e-01 1.1957000e+00 -1.1959000e+00 8.0360000e-01 -3.6950000e-01 -1.9740000e-01 -8.2540000e-01 -1.4248000e+00 8.1200000e-02 -1.2324000e+00 9.7450000e-01 -2.1115000e+00 -4.4500000e-01 + -7.6120000e-01 -3.0580000e-01 -6.6870000e-01 6.5330000e-01 -1.6000000e-01 2.5650000e-01 -6.4830000e-01 1.2788000e+00 8.2400000e-02 5.2620000e-01 3.6880000e-01 8.7980000e-01 -4.9150000e-01 1.2253000e+00 -1.6027000e+00 3.0395000e+00 -5.6320000e-01 1.3490000e-01 -1.1337000e+00 1.5710000e-01 + 2.2799000e+00 2.9220000e-01 7.8180000e-01 -7.9000000e-02 3.0640000e-01 -1.1389000e+00 1.3281000e+00 9.3460000e-01 1.3518000e+00 -4.5590000e-01 1.1249000e+00 1.9938000e+00 6.4150000e-01 -5.6310000e-01 -9.9740000e-01 4.3100000e-02 4.6360000e-01 6.8310000e-01 6.9160000e-01 -2.1500000e-02 + 3.2910000e-01 -4.7130000e-01 -1.9650000e-01 -3.6610000e-01 -8.4930000e-01 4.0910000e-01 8.6400000e-02 1.6102000e+00 9.4500000e-02 -1.2462000e+00 -1.9600000e-02 -1.3130000e-01 7.2690000e-01 1.1186000e+00 -6.8890000e-01 5.8760000e-01 1.8518000e+00 2.9400000e-02 2.3400000e-01 -9.5200000e-01 + 6.6860000e-01 7.9700000e-01 1.5550000e-01 1.9000000e-02 2.0255000e+00 1.7467000e+00 2.6290000e-01 5.9050000e-01 -1.1243000e+00 -8.7720000e-01 6.2160000e-01 1.2740000e-01 1.1143000e+00 2.4524000e+00 -1.0506000e+00 1.1520000e-01 1.0880000e-01 -1.1055000e+00 5.3670000e-01 -3.9810000e-01 + -1.5549000e+00 -2.1717000e+00 -1.2407000e+00 -5.3760000e-01 1.5565000e+00 3.1000000e-03 8.1230000e-01 -3.7110000e-01 -6.5200000e-01 -1.7621000e+00 4.4800000e-01 -5.1540000e-01 -1.8864000e+00 -3.4715000e+00 9.9700000e-01 -1.5883000e+00 -5.6310000e-01 -3.1260000e-01 -1.4791000e+00 -5.2890000e-01 + 7.8120000e-01 2.1240000e-01 -1.3241000e+00 -1.0017000e+00 1.6306000e+00 -1.3230000e-01 1.6064000e+00 5.5950000e-01 9.4790000e-01 1.2055000e+00 8.7270000e-01 1.2750000e-01 1.0668000e+00 7.5050000e-01 3.5990000e-01 9.5500000e-02 1.1118000e+00 2.3420000e-01 1.3746000e+00 4.6370000e-01 + 2.3960000e-01 -4.8530000e-01 -5.4450000e-01 3.9620000e-01 1.3707000e+00 -4.3100000e-02 -2.0709000e+00 -1.3011000e+00 -1.3044000e+00 -2.1736000e+00 -1.9410000e-01 -1.0470000e-01 5.8840000e-01 -9.8740000e-01 -1.3530000e+00 -1.3708000e+00 -6.2110000e-01 -3.7150000e-01 1.0848000e+00 -7.6600000e-01 + 8.0560000e-01 -1.2970000e-01 -8.6920000e-01 2.3940000e-01 -3.0890000e-01 4.4730000e-01 4.6680000e-01 -2.3170000e-01 1.9058000e+00 -4.1500000e-02 -1.2430000e-01 6.3620000e-01 -3.2890000e-01 -6.5700000e-01 2.5920000e-01 -1.5637000e+00 -1.1309000e+00 2.4788000e+00 5.1520000e-01 1.2602000e+00 + 4.8390000e-01 2.1507000e+00 2.8721000e+00 3.7530000e-01 5.0540000e-01 4.7150000e-01 -5.6700000e-02 1.5844000e+00 2.2800000e-02 7.8600000e-02 3.5580000e-01 1.8653000e+00 -4.9440000e-01 1.6059000e+00 -1.9930000e-01 6.9520000e-01 3.0490000e-01 1.6912000e+00 8.7510000e-01 7.6280000e-01 + 3.9630000e-01 7.0210000e-01 3.6770000e-01 -1.5153000e+00 1.3230000e-01 4.9830000e-01 -2.6966000e+00 1.4177000e+00 -1.2402000e+00 -9.6700000e-01 3.9880000e-01 2.4430000e-01 -2.0830000e-01 4.9320000e-01 -5.6290000e-01 2.4690000e-01 8.5060000e-01 9.5200000e-02 1.2290000e-01 -2.4540000e-01 + -1.9158000e+00 1.4860000e-01 -4.3700000e-02 -3.2000000e-01 5.4640000e-01 2.5340000e-01 -1.8690000e-01 -3.3660000e-01 -8.8500000e-01 -1.3866000e+00 -5.5030000e-01 -3.6280000e-01 -1.1561000e+00 9.6600000e-02 -8.0830000e-01 -8.4770000e-01 -6.7980000e-01 -1.4129000e+00 -2.4383000e+00 1.2900000e+00 + -1.3860000e-01 7.6450000e-01 -1.1678000e+00 5.4310000e-01 3.0170000e-01 9.1080000e-01 2.6110000e+00 9.7430000e-01 7.5390000e-01 -7.9650000e-01 -9.5000000e-03 -5.7910000e-01 8.3060000e-01 -7.3500000e-02 7.3210000e-01 5.7420000e-01 -1.7204000e+00 2.1827000e+00 6.5800000e-01 -1.0916000e+00 + 3.5110000e-01 1.1351000e+00 4.2180000e-01 1.9037000e+00 6.6840000e-01 5.8960000e-01 1.9351000e+00 8.0670000e-01 1.4195000e+00 2.1494000e+00 -2.0845000e+00 7.2600000e-02 2.5323000e+00 -1.4000000e-01 1.2470000e-01 -4.0100000e-01 -1.0600000e+00 1.0779000e+00 1.0101000e+00 7.0390000e-01 + 9.6080000e-01 2.4770000e-01 -1.0468000e+00 8.4110000e-01 -2.4970000e-01 -1.0802000e+00 -1.2160000e+00 3.3740000e-01 -2.2503000e+00 1.4146000e+00 5.8280000e-01 -6.8230000e-01 -1.5364000e+00 9.6250000e-01 -1.2728000e+00 2.2640000e-01 6.1600000e-02 4.1200000e-01 -2.0741000e+00 -4.1630000e-01 + -1.3496000e+00 -2.0208000e+00 -1.9040000e+00 1.8756000e+00 3.1260000e-01 9.7120000e-01 3.7110000e-01 -3.8460000e-01 6.2170000e-01 -2.1961000e+00 -2.2196000e+00 3.4700000e-02 9.4700000e-02 -1.3520000e-01 -1.6703000e+00 6.3730000e-01 -1.1593000e+00 -4.6960000e-01 -4.8410000e-01 -8.8940000e-01 + -3.7410000e-01 -1.5431000e+00 8.4470000e-01 8.8450000e-01 1.3406000e+00 -1.7746000e+00 -1.0953000e+00 -5.2770000e-01 -1.8721000e+00 8.0760000e-01 -1.9998000e+00 9.8000000e-01 -1.2089000e+00 -1.5130000e-01 1.2943000e+00 8.4270000e-01 4.1620000e-01 1.2860000e-01 1.3727000e+00 1.0901000e+00 + 1.6405000e+00 9.4540000e-01 -6.2410000e-01 4.7340000e-01 3.8530000e-01 -1.1151000e+00 -6.0780000e-01 1.4713000e+00 2.1171000e+00 1.9773000e+00 1.0499000e+00 8.4090000e-01 2.6875000e+00 3.1324000e+00 1.4809000e+00 1.0376000e+00 1.3990000e-01 -2.0560000e-01 -3.5200000e-02 4.8920000e-01 + 1.5723000e+00 -7.6540000e-01 -1.5868000e+00 2.4548000e+00 1.5337000e+00 6.7860000e-01 4.3900000e-01 -7.6390000e-01 4.1150000e-01 -3.6760000e-01 4.3970000e-01 1.7862000e+00 3.7120000e-01 1.1470000e-01 9.5790000e-01 -1.2040000e-01 2.5690000e-01 -2.9806000e+00 6.4440000e-01 1.0035000e+00 + 1.2390000e+00 5.4560000e-01 -1.3816000e+00 9.1960000e-01 -5.9500000e-02 4.8780000e-01 1.6289000e+00 8.1740000e-01 -1.2450000e+00 2.3000000e-03 1.6880000e-01 7.5040000e-01 2.0910000e-01 4.2420000e-01 1.7353000e+00 -4.8240000e-01 -4.5070000e-01 -5.6400000e-02 8.2490000e-01 -4.2200000e-02 + 2.7052000e+00 -8.0300000e-02 7.3800000e-01 7.9210000e-01 3.0300000e-02 5.3940000e-01 5.8300000e-02 2.7940000e-01 4.5300000e-02 7.5470000e-01 -9.3710000e-01 2.1311000e+00 -6.7630000e-01 3.7470000e-01 -1.3782000e+00 8.5120000e-01 -1.0122000e+00 1.4410000e-01 1.4405000e+00 8.8800000e-02 + -4.7080000e-01 6.9890000e-01 -7.4180000e-01 1.7340000e-01 2.5000000e-03 -4.5810000e-01 5.0590000e-01 -2.1101000e+00 -9.5420000e-01 -4.1080000e-01 -2.0336000e+00 -2.1837000e+00 5.7490000e-01 -1.1319000e+00 -1.1229000e+00 -2.0228000e+00 -1.9029000e+00 -1.8454000e+00 1.8434000e+00 -1.8277000e+00 + -5.6700000e-02 1.4276000e+00 1.5167000e+00 5.9020000e-01 1.4460000e+00 1.3041000e+00 1.3262000e+00 -2.1820000e-01 1.4116000e+00 3.3280000e-01 1.8442000e+00 5.5040000e-01 8.9590000e-01 -1.6665000e+00 2.8301000e+00 5.3240000e-01 1.0205000e+00 3.0210000e-01 4.2650000e-01 9.6230000e-01 + -1.9599000e+00 -5.8640000e-01 1.6050000e+00 -3.8090000e-01 7.9670000e-01 2.0720000e-01 -6.2450000e-01 7.1060000e-01 -7.9050000e-01 -5.9700000e-01 -5.8950000e-01 -5.7900000e-01 9.6700000e-02 1.2544000e+00 2.4290000e-01 -1.3832000e+00 -1.5728000e+00 -1.9140000e-01 -2.0537000e+00 -1.2685000e+00 + 1.4720000e-01 4.0440000e-01 2.8480000e-01 7.7420000e-01 7.2660000e-01 9.6140000e-01 -1.4350000e-01 6.0740000e-01 8.0640000e-01 -1.0462000e+00 -1.3866000e+00 -3.0500000e-01 2.7540000e-01 7.0080000e-01 -4.8380000e-01 5.7600000e-01 -1.9059000e+00 1.9000000e-02 -4.0870000e-01 3.5000000e-03 + -2.4266000e+00 -6.8000000e-01 -7.1200000e-01 -6.4680000e-01 -9.8250000e-01 4.6600000e-02 -1.3331000e+00 1.3012000e+00 -1.3729000e+00 -1.1270000e-01 -1.2204000e+00 -1.1975000e+00 -1.5479000e+00 -1.0829000e+00 8.7320000e-01 -1.6523000e+00 -3.5620000e-01 -1.1912000e+00 -1.4812000e+00 -1.7300000e-01 + 6.2440000e-01 -2.4462000e+00 -7.9320000e-01 4.3410000e-01 -1.0744000e+00 -5.1510000e-01 -1.0478000e+00 -5.6000000e-02 -7.4340000e-01 -7.6300000e-02 -1.4745000e+00 -1.8490000e-01 1.8920000e-01 6.4690000e-01 -5.0270000e-01 -1.7925000e+00 -1.4240000e-01 -9.7810000e-01 1.8140000e+00 4.4330000e-01 + -4.6700000e-01 -8.7530000e-01 3.5340000e-01 -2.9700000e-01 -1.7364000e+00 -2.0284000e+00 1.4320000e+00 -2.5973000e+00 3.1930000e-01 -1.4759000e+00 9.2480000e-01 6.9980000e-01 1.3093000e+00 3.6240000e-01 -1.9906000e+00 -4.4340000e-01 -1.1410000e-01 -2.3150000e-01 -2.9490000e-01 -3.2080000e-01 + -1.4049000e+00 1.7643000e+00 9.9200000e-02 -1.6263000e+00 -1.3305000e+00 9.4600000e-02 -6.6930000e-01 -5.2830000e-01 -1.2613000e+00 -7.6250000e-01 -2.0290000e-01 -1.1218000e+00 -2.1250000e-01 -1.7338000e+00 -5.1100000e-01 -9.7030000e-01 7.8600000e-01 -6.6720000e-01 1.3920000e-01 -1.1802000e+00 + -9.3760000e-01 -4.5270000e-01 -9.1380000e-01 5.7520000e-01 -5.0240000e-01 1.1726000e+00 1.0517000e+00 -3.0190000e-01 5.1400000e-01 -7.5120000e-01 1.0741000e+00 -2.4410000e-01 -8.0950000e-01 2.8508000e+00 3.5620000e-01 1.0467000e+00 6.9800000e-02 2.7400000e-02 1.8320000e-01 1.8248000e+00 + 5.2350000e-01 -2.9020000e-01 3.3300000e-02 1.2900000e-01 -3.0920000e-01 1.1652000e+00 -5.3800000e-02 2.0816000e+00 6.4640000e-01 4.9410000e-01 -1.5422000e+00 -3.4130000e-01 5.0000000e-03 2.5723000e+00 9.3860000e-01 4.0860000e-01 -4.1480000e-01 -2.2078000e+00 2.2613000e+00 -8.6410000e-01 + -5.4100000e-02 9.6340000e-01 4.1550000e-01 -2.0310000e-01 1.0271000e+00 -1.3861000e+00 -1.4281000e+00 -5.8190000e-01 6.3070000e-01 1.7450000e-01 3.6270000e-01 -2.1240000e-01 -1.3501000e+00 -1.5891000e+00 -1.3001000e+00 2.4930000e-01 2.9490000e-01 -8.4480000e-01 1.1243000e+00 -9.3380000e-01 + 3.8900000e-02 -1.0325000e+00 -3.3200000e-01 -5.0700000e-01 -1.2282000e+00 1.1030000e-01 8.3010000e-01 6.3200000e-02 -6.5430000e-01 1.1608000e+00 -1.5420000e-01 9.6900000e-01 8.9830000e-01 1.6373000e+00 2.7400000e-01 -9.8510000e-01 1.1767000e+00 8.1390000e-01 3.3650000e-01 4.3480000e-01 + 4.5010000e-01 1.5339000e+00 3.1959000e+00 1.1690000e-01 -5.6640000e-01 1.4496000e+00 -8.5700000e-02 -5.5520000e-01 2.5990000e-01 1.4479000e+00 8.6340000e-01 -2.6510000e-01 -9.0100000e-02 1.2660000e-01 -6.9000000e-03 1.4940000e-01 2.4395000e+00 7.7480000e-01 2.4048000e+00 2.0100000e-01 + 1.0463000e+00 9.5340000e-01 6.9520000e-01 -2.5308000e+00 5.4400000e-01 1.7227000e+00 8.8430000e-01 -1.4736000e+00 -6.0470000e-01 -4.6000000e-03 1.0417000e+00 8.9800000e-01 -5.9350000e-01 3.7700000e-01 7.6690000e-01 1.1137000e+00 1.2030000e+00 1.8255000e+00 3.0860000e-01 6.5280000e-01 + -2.1833000e+00 6.9270000e-01 -1.5200000e+00 -8.0200000e-02 3.4230000e-01 1.4150000e-01 -2.0526000e+00 6.5100000e-01 -6.3810000e-01 -7.1280000e-01 -2.9390000e+00 8.5280000e-01 1.4686000e+00 -2.6690000e-01 5.8500000e-02 -1.1980000e+00 -1.8160000e-01 2.2460000e-01 -9.5760000e-01 5.7270000e-01 + -5.8750000e-01 4.2990000e-01 6.7150000e-01 1.5668000e+00 2.4382000e+00 -1.7260000e-01 -1.6615000e+00 4.9220000e-01 -2.3045000e+00 5.6540000e-01 -1.3961000e+00 -7.0270000e-01 1.0132000e+00 -1.8964000e+00 1.2821000e+00 -1.1365000e+00 1.0840000e-01 -3.0200000e-02 -1.5483000e+00 2.0345000e+00 + 1.9780000e-01 4.1660000e-01 2.9300000e-01 5.7040000e-01 6.9730000e-01 -3.3360000e-01 -1.7240000e-01 1.4100000e+00 1.4771000e+00 7.0780000e-01 2.4837000e+00 1.2673000e+00 3.7817000e+00 2.5549000e+00 2.3260000e-01 2.8220000e-01 2.7310000e-01 1.1403000e+00 2.4573000e+00 1.1555000e+00 + -7.3170000e-01 -7.6010000e-01 -1.6613000e+00 -1.3164000e+00 -1.2845000e+00 -9.3910000e-01 1.1917000e+00 -1.7329000e+00 -4.9430000e-01 -4.8440000e-01 -1.3512000e+00 2.9080000e-01 -1.9060000e-01 1.1298000e+00 -6.7280000e-01 -1.0447000e+00 -2.2930000e-01 -1.6385000e+00 1.5830000e-01 -3.9870000e-01 + -3.0200000e-02 1.9016000e+00 1.3894000e+00 1.6540000e-01 4.7280000e-01 5.2920000e-01 8.6490000e-01 1.1431000e+00 3.8350000e-01 -6.4310000e-01 -9.7400000e-01 8.6950000e-01 1.2303000e+00 5.6270000e-01 2.3330000e-01 -2.4240000e-01 1.1993000e+00 2.7610000e-01 1.4824000e+00 -1.7620000e-01 + 1.0158000e+00 1.4100000e-01 1.4813000e+00 -3.5200000e-02 3.8520000e-01 -1.5660000e-01 1.3288000e+00 -9.6720000e-01 7.9200000e-02 8.3600000e-02 5.5630000e-01 -1.2398000e+00 4.7100000e-02 1.9731000e+00 1.0753000e+00 1.0241000e+00 1.0104000e+00 1.9600000e-02 -1.3600000e-01 -1.2960000e-01 + -1.8597000e+00 1.1271000e+00 -8.5000000e-03 -1.3271000e+00 6.1160000e-01 -4.7250000e-01 -2.0709000e+00 -1.8677000e+00 -8.3650000e-01 -1.6594000e+00 -1.1757000e+00 -2.9400000e-02 -4.3100000e-02 1.1160000e-01 1.8532000e+00 -3.6530000e-01 -7.4600000e-01 9.7770000e-01 3.0220000e-01 -1.5370000e+00 + -3.0470000e-01 4.6310000e-01 -2.4887000e+00 -1.2987000e+00 1.8250000e-01 -1.9775000e+00 -6.6130000e-01 -2.0535000e+00 1.0883000e+00 -9.7610000e-01 1.0141000e+00 -1.7370000e-01 -1.4756000e+00 -4.6430000e-01 1.3961000e+00 -1.4498000e+00 3.1260000e-01 -6.8360000e-01 2.1540000e-01 -2.1530000e-01 + 3.3900000e-01 -7.3840000e-01 6.8910000e-01 1.2110000e-01 -2.5561000e+00 -7.9980000e-01 -1.3320000e-01 -1.5730000e-01 4.6810000e-01 5.4400000e-01 1.6608000e+00 -1.0066000e+00 -5.0410000e-01 -1.7793000e+00 -9.0200000e-01 -2.2550000e-01 3.0760000e-01 -9.0080000e-01 -1.0540000e-01 -1.9278000e+00 + 1.9081000e+00 -1.5401000e+00 -1.5067000e+00 -1.1110000e+00 3.0580000e-01 1.1185000e+00 -9.9700000e-02 4.3320000e-01 -1.7674000e+00 5.0610000e-01 -7.4510000e-01 1.3780000e-01 -1.2877000e+00 -2.0700000e-02 1.1390000e-01 8.0020000e-01 6.2200000e-01 -8.7050000e-01 -2.8601000e+00 -4.1890000e-01 + 7.1020000e-01 6.8560000e-01 -5.7300000e-02 -1.7160000e-01 -1.8820000e-01 9.4040000e-01 1.6775000e+00 1.0713000e+00 1.4696000e+00 -1.0876000e+00 3.3480000e-01 2.2330000e-01 5.4410000e-01 2.6340000e+00 -4.2540000e-01 2.3315000e+00 5.3720000e-01 1.6801000e+00 1.9290000e-01 1.5444000e+00 + 1.5302000e+00 -2.8104000e+00 -1.0958000e+00 -8.1370000e-01 9.0940000e-01 -1.0410000e+00 -1.9930000e-01 -6.8690000e-01 1.1339000e+00 -2.5899000e+00 -1.5604000e+00 1.3877000e+00 2.0690000e-01 -1.2627000e+00 1.3378000e+00 4.4200000e-02 1.7157000e+00 -3.6140000e-01 -1.1000000e+00 5.2080000e-01 + 1.2473000e+00 -2.9530000e-01 3.8710000e-01 2.7616000e+00 1.4100000e+00 1.8115000e+00 9.4910000e-01 1.2688000e+00 -8.2560000e-01 -1.6000000e-02 -1.3003000e+00 -7.3280000e-01 1.1035000e+00 4.7700000e-01 1.1652000e+00 -5.9200000e-01 5.2190000e-01 -8.3220000e-01 9.3070000e-01 8.3880000e-01 + -2.6830000e-01 -5.9370000e-01 1.0144000e+00 4.3730000e-01 6.2440000e-01 1.6429000e+00 8.5950000e-01 9.3010000e-01 7.1020000e-01 2.3407000e+00 -7.8650000e-01 4.8030000e-01 -1.0250000e+00 9.5800000e-01 2.1915000e+00 4.7470000e-01 5.6800000e-01 1.4449000e+00 -3.9070000e-01 8.6160000e-01 + -1.3920000e-01 -1.1844000e+00 3.1620000e-01 -3.4800000e-01 -8.7830000e-01 -5.4640000e-01 -2.5670000e-01 -6.0200000e-01 7.7260000e-01 6.7240000e-01 -3.3700000e-01 5.8000000e-03 -1.9879000e+00 -1.2830000e-01 -3.1390000e-01 1.3670000e-01 -1.8705000e+00 -2.9629000e+00 1.2210000e-01 1.8367000e+00 + 2.1272000e+00 -1.9000000e-02 6.4590000e-01 -1.5642000e+00 5.3010000e-01 5.4100000e-01 5.4610000e-01 -9.9800000e-02 6.0260000e-01 -6.6610000e-01 -1.1265000e+00 2.2640000e-01 -1.0225000e+00 -2.4421000e+00 -4.6960000e-01 2.3020000e-01 -2.1998000e+00 -1.9257000e+00 -3.3200000e-02 -1.6720000e+00 + 1.2084000e+00 -1.2915000e+00 2.6580000e-01 7.5320000e-01 -9.1510000e-01 5.1430000e-01 -3.4700000e-02 2.0163000e+00 1.7228000e+00 1.4183000e+00 8.9000000e-01 -5.2470000e-01 6.7990000e-01 4.0800000e-01 -1.1360000e-01 -4.8160000e-01 -2.3340000e-01 2.4570000e+00 -5.5820000e-01 9.6930000e-01 + 6.2850000e-01 -3.9110000e-01 -9.3960000e-01 -1.9599000e+00 -3.6300000e-01 -3.9340000e-01 -7.6490000e-01 -7.1040000e-01 5.8520000e-01 -1.8671000e+00 -1.6700000e-02 -2.3313000e+00 1.3493000e+00 5.9750000e-01 -7.9570000e-01 1.5610000e-01 5.5100000e-02 -7.1110000e-01 -4.1400000e-02 -2.0301000e+00 + 5.3660000e-01 8.9750000e-01 -1.9267000e+00 -3.3420000e-01 -2.6287000e+00 -1.6700000e-02 -6.0310000e-01 -1.9898000e+00 -3.9560000e-01 5.7070000e-01 -5.3370000e-01 -3.0630000e-01 -2.6200000e-02 -6.8200000e-01 5.8830000e-01 1.0276000e+00 -4.1910000e-01 7.6790000e-01 -8.6120000e-01 -8.5280000e-01 + -4.5080000e-01 5.8960000e-01 6.8500000e-02 6.1600000e-02 8.0600000e-02 2.1536000e+00 9.8920000e-01 1.4956000e+00 5.7520000e-01 -8.0090000e-01 5.0490000e-01 -9.8130000e-01 3.6600000e-02 -1.1257000e+00 1.0863000e+00 5.6630000e-01 2.8320000e-01 -9.2430000e-01 1.3170000e-01 8.0990000e-01 + -9.0330000e-01 1.2577000e+00 2.0103000e+00 1.1660000e-01 2.6570000e-01 -1.0817000e+00 2.2926000e+00 1.7489000e+00 9.9510000e-01 1.8716000e+00 7.7880000e-01 -3.9900000e-02 -3.2460000e-01 1.2409000e+00 1.7186000e+00 2.3620000e-01 1.6283000e+00 1.3560000e-01 -2.7420000e-01 1.1855000e+00 + -9.0890000e-01 -8.1200000e-01 -6.9820000e-01 1.0965000e+00 -2.0797000e+00 -3.2550000e-01 -2.7565000e+00 2.6910000e-01 4.5940000e-01 -8.6150000e-01 -2.2439000e+00 -1.7260000e-01 -4.4400000e-02 -1.0313000e+00 -1.0301000e+00 -2.5152000e+00 -4.9630000e-01 -4.5910000e-01 1.5479000e+00 6.0740000e-01 + 1.5887000e+00 -5.4110000e-01 6.8650000e-01 2.3530000e-01 -2.7310000e-01 1.0900000e-02 -4.0220000e-01 -6.8420000e-01 -2.3430000e-01 -1.0966000e+00 -8.0790000e-01 -4.1920000e-01 -4.3890000e-01 1.1235000e+00 -1.8984000e+00 4.4760000e-01 1.1621000e+00 -6.9540000e-01 -1.5352000e+00 -2.6790000e-01 + -1.5424000e+00 -7.1800000e-02 -1.6064000e+00 -3.7190000e-01 -1.5576000e+00 -1.1180000e-01 -3.5300000e-01 2.7910000e-01 -2.6510000e-01 -7.9350000e-01 2.2360000e-01 -1.9514000e+00 -1.0401000e+00 -4.5610000e-01 2.2725000e+00 -1.0614000e+00 -5.4420000e-01 3.0360000e-01 -4.9680000e-01 -4.6970000e-01 + 1.7680000e-01 1.8405000e+00 -1.1280000e+00 2.1000000e-02 3.0000000e-02 -2.9270000e-01 -1.2870000e+00 -2.7830000e-01 -3.3790000e-01 -9.7920000e-01 1.6052000e+00 1.7300000e-02 6.5920000e-01 -2.7190000e-01 -7.7720000e-01 -5.4790000e-01 8.5910000e-01 3.6180000e-01 9.9840000e-01 -1.0070000e-01 + 2.1371000e+00 3.3500000e-01 -2.3772000e+00 -3.4340000e-01 9.3400000e-01 1.0124000e+00 1.0165000e+00 4.1210000e-01 7.7540000e-01 1.3882000e+00 1.4737000e+00 -7.8700000e-01 1.4352000e+00 -3.7910000e-01 1.1163000e+00 8.1080000e-01 9.4690000e-01 -2.5520000e-01 -2.7130000e-01 1.8020000e+00 + -4.3500000e-01 -6.6860000e-01 1.8200000e-01 2.4350000e-01 -1.2727000e+00 -1.2485000e+00 6.5100000e-02 -7.0940000e-01 7.4880000e-01 7.3360000e-01 7.1500000e-02 6.5510000e-01 5.5140000e-01 -2.3656000e+00 2.9880000e-01 -4.7960000e-01 -2.3787000e+00 -1.0175000e+00 2.8910000e-01 -5.9750000e-01 + -4.2800000e-02 -1.1397000e+00 2.6920000e-01 2.1500000e-02 1.6640000e-01 2.8550000e-01 -1.0475000e+00 9.1640000e-01 -1.8552000e+00 1.4556000e+00 1.7510000e-01 -5.0110000e-01 -9.5040000e-01 1.5920000e-01 8.4210000e-01 3.5000000e-02 -1.3719000e+00 2.9040000e-01 -6.3400000e-02 4.9600000e-01 + 4.3010000e-01 7.0050000e-01 1.0080000e-01 1.5410000e-01 -1.1424000e+00 -1.4260000e-01 3.7390000e-01 -2.2640000e-01 9.8000000e-02 -6.3880000e-01 7.8020000e-01 -5.7520000e-01 4.9330000e-01 1.7979000e+00 4.9140000e-01 2.0604000e+00 6.6210000e-01 -1.0674000e+00 -8.5990000e-01 2.1020000e-01 + -1.9434000e+00 -7.0510000e-01 -1.3162000e+00 -7.9010000e-01 1.2540000e-01 -1.4696000e+00 1.5880000e+00 -1.8934000e+00 -5.0170000e-01 -9.2990000e-01 -9.5500000e-02 -1.1655000e+00 -3.4600000e-02 -8.7680000e-01 -8.2630000e-01 -4.6380000e-01 8.8040000e-01 2.1857000e+00 2.8600000e-01 4.2800000e-01 + 1.1640000e+00 -8.8370000e-01 3.9270000e-01 5.1530000e-01 1.0715000e+00 1.3735000e+00 1.9539000e+00 3.0340000e-01 1.0727000e+00 -1.4020000e-01 1.2020000e+00 5.1000000e-03 -1.3201000e+00 -1.9640000e-01 -2.5130000e-01 -6.7080000e-01 -4.0740000e-01 7.7510000e-01 -3.2360000e-01 -3.7090000e-01 + 1.0004000e+00 -2.0703000e+00 -1.9797000e+00 -6.4530000e-01 -5.5730000e-01 5.6880000e-01 4.5000000e-03 -1.1730000e-01 -6.1520000e-01 1.5260000e-01 -8.6840000e-01 1.6659000e+00 -3.1550000e-01 -1.4234000e+00 -2.2770000e-01 -7.1700000e-01 -9.4750000e-01 -5.3600000e-01 -1.0940000e-01 1.4528000e+00 + -1.2509000e+00 -1.6650000e-01 -1.7740000e-01 2.7710000e-01 2.6800000e-02 1.1720000e-01 -6.4480000e-01 -7.9350000e-01 4.5010000e-01 -1.8720000e-01 -1.8491000e+00 -1.0018000e+00 5.0640000e-01 5.3070000e-01 -1.8060000e-01 6.8610000e-01 -1.3920000e+00 1.0650000e+00 -9.6000000e-01 -1.4756000e+00 + -4.1140000e-01 -1.3921000e+00 8.6570000e-01 9.2810000e-01 1.0760000e-01 -5.0470000e-01 1.4941000e+00 5.5030000e-01 -9.0340000e-01 1.0171000e+00 -6.5690000e-01 1.0721000e+00 9.2060000e-01 8.0220000e-01 -1.4290000e+00 -5.3200000e-02 3.1900000e-02 7.3140000e-01 -5.4000000e-01 -7.6860000e-01 + 5.5650000e-01 5.0300000e-01 1.3022000e+00 1.3320000e+00 3.7450000e-01 -4.2000000e-02 -5.3030000e-01 -6.6100000e-02 6.2700000e-01 1.5990000e-01 3.3870000e-01 2.5540000e-01 2.2723000e+00 4.5910000e-01 3.9340000e-01 8.6400000e-02 -1.4560000e-01 -4.6000000e-01 1.3568000e+00 -1.2391000e+00 + -4.4690000e-01 -1.0676000e+00 -2.1640000e-01 -8.8280000e-01 7.0210000e-01 -1.0786000e+00 6.7880000e-01 5.5020000e-01 -1.3216000e+00 -2.0278000e+00 -1.2410000e+00 -3.4704000e+00 -1.0690000e-01 1.1991000e+00 -1.5100000e+00 2.1946000e+00 6.2210000e-01 -1.0206000e+00 -6.0640000e-01 -1.2731000e+00 + -2.1337000e+00 1.1503000e+00 5.5260000e-01 1.8387000e+00 3.9700000e-01 -1.0161000e+00 7.2090000e-01 -1.2223000e+00 6.0020000e-01 6.6400000e-02 6.1060000e-01 -5.6760000e-01 -1.0460000e-01 -8.3000000e-02 1.2322000e+00 -3.7920000e-01 9.1760000e-01 6.0770000e-01 1.0270000e-01 3.7530000e-01 + 6.7580000e-01 -1.1461000e+00 -8.9370000e-01 1.8750000e-01 9.6980000e-01 4.1880000e-01 4.5710000e-01 -3.5850000e-01 -1.5710000e-01 -8.3570000e-01 3.1990000e-01 2.1370000e-01 -3.5700000e-02 -2.1005000e+00 -1.0708000e+00 5.9600000e-02 -5.5310000e-01 3.9800000e-01 -1.4159000e+00 -2.4470000e-01 + 2.5850000e-01 -2.1134000e+00 4.8720000e-01 -1.4252000e+00 4.7770000e-01 -2.4113000e+00 -8.1810000e-01 1.3305000e+00 2.5800000e-02 -1.9015000e+00 1.3562000e+00 1.8649000e+00 -1.4959000e+00 -4.3400000e-02 2.1200000e-02 -3.3590000e-01 -1.5910000e+00 -5.1900000e-02 -1.7488000e+00 -3.5940000e-01 + -5.0250000e-01 -6.2840000e-01 9.7990000e-01 -2.9830000e-01 1.4608000e+00 -1.3368000e+00 -9.5920000e-01 -9.8440000e-01 -6.8630000e-01 -2.4900000e-01 2.2440000e-01 -1.3500000e-01 1.0744000e+00 9.7070000e-01 8.0000000e-02 -1.8874000e+00 -4.5170000e-01 -3.0810000e-01 6.5200000e-02 -1.0731000e+00 + 1.2162000e+00 -7.7680000e-01 1.4820000e-01 -6.0620000e-01 -4.4170000e-01 2.7730000e-01 4.9140000e-01 -5.2180000e-01 -2.1087000e+00 -3.6150000e-01 -2.7537000e+00 -7.8050000e-01 1.7990000e-01 -9.7020000e-01 4.4490000e-01 -1.1424000e+00 1.4236000e+00 -7.0260000e-01 -1.3291000e+00 -1.9056000e+00 + 4.9080000e-01 -2.6902000e+00 1.0505000e+00 -1.5810000e+00 -2.1428000e+00 1.0230000e-01 4.7520000e-01 5.4820000e-01 7.7410000e-01 -1.3778000e+00 4.8900000e-02 -5.8600000e-02 -1.9053000e+00 -6.7200000e-01 8.2170000e-01 -1.2798000e+00 -1.1090000e+00 1.9700000e-02 -1.5399000e+00 -7.8350000e-01 + 3.7210000e-01 9.4040000e-01 -7.8620000e-01 -4.7300000e-01 5.3340000e-01 2.9220000e-01 1.5181000e+00 1.1491000e+00 1.0918000e+00 2.3270000e-01 1.0883000e+00 -6.1600000e-01 -1.2420000e-01 -1.5400000e-01 -8.0380000e-01 -9.8610000e-01 2.1100000e-01 4.4880000e-01 4.6600000e-01 -1.4426000e+00 + -6.0500000e-02 -2.5060000e-01 -8.8940000e-01 9.6830000e-01 1.1921000e+00 5.8380000e-01 7.1890000e-01 8.8150000e-01 8.6860000e-01 -8.2830000e-01 7.4020000e-01 -8.1620000e-01 1.7639000e+00 -6.5190000e-01 3.4220000e-01 1.4082000e+00 2.8949000e+00 -1.4325000e+00 -8.1960000e-01 -4.5680000e-01 + 1.6792000e+00 -2.3850000e-01 7.5340000e-01 8.2090000e-01 1.2518000e+00 1.8390000e+00 -2.6170000e-01 4.7320000e-01 8.2440000e-01 -6.6200000e-02 -6.5680000e-01 8.1370000e-01 4.2580000e-01 1.1549000e+00 1.2118000e+00 -5.7700000e-01 1.2100000e-02 1.1682000e+00 1.4901000e+00 1.0407000e+00 + 7.2010000e-01 1.4900000e-02 -1.7570000e-01 5.3500000e-02 -1.2566000e+00 -1.2896000e+00 -1.8274000e+00 -1.1191000e+00 -1.1081000e+00 -1.5670000e+00 2.8000000e-01 -3.1410000e-01 -2.0186000e+00 4.5710000e-01 -2.7395000e+00 9.7400000e-02 -3.1360000e-01 -1.8397000e+00 -3.3300000e-02 -1.1064000e+00 + -1.2481000e+00 -1.8000000e-03 -1.3790000e-01 1.2816000e+00 -9.3120000e-01 -2.0277000e+00 -1.5930000e-01 2.5412000e+00 1.4105000e+00 2.0174000e+00 1.9753000e+00 1.6292000e+00 1.2854000e+00 3.8000000e-01 1.1822000e+00 -1.6033000e+00 1.0360000e+00 1.4330000e-01 4.4730000e-01 -2.4180000e-01 + 5.4920000e-01 2.2345000e+00 -5.0830000e-01 1.1524000e+00 2.0429000e+00 1.6033000e+00 2.1000000e-02 1.0638000e+00 1.2752000e+00 -1.1200000e-02 1.3484000e+00 -4.0100000e-01 2.1457000e+00 4.6540000e-01 -1.0912000e+00 -6.5900000e-01 1.1041000e+00 3.7610000e-01 -1.9120000e-01 -3.6220000e-01 + -4.8370000e-01 1.0007000e+00 1.3182000e+00 8.1940000e-01 -9.0240000e-01 -7.6950000e-01 1.0143000e+00 2.5436000e+00 -2.6020000e-01 9.4360000e-01 -2.5390000e-01 -1.2461000e+00 6.6570000e-01 -1.2026000e+00 -8.7950000e-01 1.3059000e+00 8.1500000e-02 1.2718000e+00 1.7325000e+00 2.7600000e-02 + 1.0900000e-02 -1.3946000e+00 -1.3479000e+00 -1.5120000e+00 3.1840000e-01 1.4253000e+00 -1.6552000e+00 -1.6365000e+00 -2.1166000e+00 -1.4090000e-01 -2.0985000e+00 -1.1147000e+00 -2.7060000e-01 -1.1940000e+00 -9.2550000e-01 -3.9680000e-01 -6.8900000e-02 1.3744000e+00 5.1300000e-02 -2.3340000e-01 + 2.0663000e+00 -4.0370000e-01 2.6930000e-01 7.8030000e-01 -3.9470000e-01 7.4260000e-01 1.7837000e+00 2.0791000e+00 5.4980000e-01 1.7194000e+00 5.7210000e-01 -3.7450000e-01 1.1139000e+00 -3.3330000e-01 -1.6756000e+00 9.4850000e-01 -5.0040000e-01 -4.9590000e-01 1.4473000e+00 2.1135000e+00 + -1.5986000e+00 4.1450000e-01 -3.4120000e-01 -1.5110000e+00 2.3530000e+00 1.0851000e+00 -3.3140000e-01 -1.2258000e+00 -4.1280000e-01 -5.0340000e-01 -5.4220000e-01 1.0417000e+00 -1.4304000e+00 1.0678000e+00 -1.6804000e+00 -1.6056000e+00 -8.9760000e-01 -7.9430000e-01 1.2830000e+00 -1.6712000e+00 + -4.5280000e-01 -9.5690000e-01 1.2694000e+00 5.7580000e-01 8.4180000e-01 3.0480000e-01 -1.4926000e+00 -6.1180000e-01 -4.0380000e-01 6.4780000e-01 5.5580000e-01 -5.9990000e-01 -1.8035000e+00 -1.1632000e+00 -1.5033000e+00 -8.3190000e-01 -5.3580000e-01 -2.1380000e-01 -7.2650000e-01 6.1770000e-01 + 7.6330000e-01 -3.4190000e-01 -2.1130000e-01 -9.2280000e-01 -1.5800000e-02 4.1010000e-01 -1.4040000e-01 -6.5460000e-01 -3.5700000e-02 5.9340000e-01 -1.4633000e+00 1.9006000e+00 -1.0805000e+00 8.4820000e-01 2.4700000e-02 1.7356000e+00 -2.5090000e-01 6.4050000e-01 -5.1500000e-02 1.1550000e+00 + 5.4910000e-01 7.9300000e-02 4.8270000e-01 1.4250000e+00 4.0300000e-01 -9.5020000e-01 2.0918000e+00 -9.0780000e-01 8.5820000e-01 -1.2822000e+00 6.0830000e-01 3.3400000e-01 2.6190000e-01 2.6990000e+00 7.2750000e-01 -1.7659000e+00 1.8945000e+00 -6.2850000e-01 7.2650000e-01 3.1470000e-01 + 4.6750000e-01 3.2740000e-01 -1.7147000e+00 1.2278000e+00 7.5210000e-01 3.6270000e-01 8.8400000e-01 -4.8710000e-01 -1.0412000e+00 9.2010000e-01 -2.6290000e-01 -4.0000000e-01 -2.1333000e+00 7.3780000e-01 -7.7150000e-01 -1.4902000e+00 -5.9400000e-01 -3.0938000e+00 -1.2218000e+00 -1.6073000e+00 + -4.5900000e-01 -5.4560000e-01 1.0125000e+00 -6.4590000e-01 6.6640000e-01 1.2780000e-01 -1.0701000e+00 2.7723000e+00 1.0935000e+00 8.1770000e-01 -9.6210000e-01 1.3175000e+00 -1.3389000e+00 2.6050000e-01 2.8635000e+00 2.0465000e+00 1.0670000e+00 4.3140000e-01 2.1075000e+00 3.4730000e-01 + 1.2936000e+00 -1.0503000e+00 7.3020000e-01 1.2487000e+00 -1.1004000e+00 1.1668000e+00 9.7910000e-01 -3.7900000e-02 1.5010000e-01 2.2900000e-02 1.1092000e+00 6.3450000e-01 1.9841000e+00 1.0883000e+00 1.5070000e-01 7.0250000e-01 1.3996000e+00 8.2370000e-01 1.0633000e+00 -3.1480000e-01 + 2.0730000e-01 5.1200000e-01 -5.1800000e-01 3.8640000e-01 -1.4344000e+00 5.3210000e-01 -1.5780000e-01 1.4949000e+00 1.0587000e+00 6.9240000e-01 1.3433000e+00 1.4118000e+00 4.1200000e-02 4.1270000e-01 -1.0522000e+00 -1.1467000e+00 5.8700000e-02 -3.3260000e-01 -8.5030000e-01 5.5880000e-01 + 3.7900000e-02 -7.4460000e-01 -1.2000000e+00 -7.7860000e-01 1.8250000e-01 -9.3170000e-01 -1.7773000e+00 1.3712000e+00 -1.4460000e-01 -5.6200000e-01 -6.3270000e-01 1.8470000e-01 1.5500000e-01 -3.8050000e-01 -2.3980000e-01 -4.9130000e-01 -2.6530000e-01 -2.8430000e-01 4.9050000e-01 -2.3069000e+00 + 1.4665000e+00 2.1744000e+00 -8.6030000e-01 3.9870000e-01 1.2832000e+00 4.9630000e-01 1.6200000e-01 1.8590000e-01 1.4196000e+00 2.4067000e+00 4.3100000e-02 7.0520000e-01 1.1242000e+00 1.7519000e+00 4.9050000e-01 1.3670000e-01 1.0634000e+00 1.4678000e+00 -1.2750000e-01 -3.7100000e-02 + -1.3972000e+00 7.7970000e-01 -9.4780000e-01 -1.0955000e+00 -1.2714000e+00 -1.1786000e+00 2.8220000e-01 -6.4520000e-01 5.6300000e-01 -4.0430000e-01 -5.2700000e-02 -8.2870000e-01 -1.7461000e+00 2.0223000e+00 4.4900000e-01 -2.2545000e+00 1.7220000e-01 -6.5100000e-02 -2.0384000e+00 -6.3700000e-02 + 1.7580000e-01 2.1618000e+00 8.6680000e-01 1.4072000e+00 -5.9650000e-01 1.3358000e+00 -2.5480000e-01 1.0272000e+00 1.7932000e+00 -2.4520000e-01 -1.8290000e-01 1.4402000e+00 -2.5130000e-01 -1.1693000e+00 8.7410000e-01 7.8430000e-01 6.4080000e-01 6.0170000e-01 1.2341000e+00 1.4225000e+00 + -6.0040000e-01 -9.3980000e-01 -1.3224000e+00 -1.6699000e+00 1.0500000e+00 1.0263000e+00 -4.7540000e-01 -1.2445000e+00 -8.6800000e-02 -8.0950000e-01 9.0640000e-01 -1.6319000e+00 -9.1690000e-01 -1.8520000e+00 -3.1650000e-01 2.5113000e+00 -1.9898000e+00 -9.7500000e-02 -3.3720000e-01 1.5970000e+00 + 9.2470000e-01 -1.6620000e+00 -5.1650000e-01 -7.9480000e-01 8.6320000e-01 -2.4405000e+00 -1.0536000e+00 1.3030000e-01 2.0427000e+00 6.2510000e-01 1.1431000e+00 1.9624000e+00 -9.8090000e-01 4.6130000e-01 -2.5059000e+00 -1.5481000e+00 3.7680000e-01 4.0740000e-01 -1.1005000e+00 -8.7630000e-01 + 1.2427000e+00 1.4684000e+00 -1.7718000e+00 -1.8363000e+00 1.2860000e-01 1.9100000e-01 -9.7750000e-01 -1.0746000e+00 1.7401000e+00 1.6690000e-01 -5.8600000e-01 -7.5700000e-02 2.8050000e-01 -1.9298000e+00 -1.0209000e+00 3.0400000e-01 -5.7670000e-01 2.3800000e-01 -1.8839000e+00 -1.3439000e+00 + 7.8100000e-01 -1.8589000e+00 -7.8260000e-01 -9.8000000e-01 9.1630000e-01 1.3210000e-01 -1.8395000e+00 -2.7650000e-01 5.8190000e-01 1.1272000e+00 1.0450000e-01 -9.6300000e-01 1.6480000e-01 5.2960000e-01 -1.4140000e-01 -8.1460000e-01 -7.7840000e-01 -1.2823000e+00 1.2330000e-01 -1.9814000e+00 + 2.6960000e-01 8.6310000e-01 -5.1520000e-01 -4.5100000e-02 1.1675000e+00 3.1170000e-01 -4.4420000e-01 9.4020000e-01 1.2606000e+00 2.4466000e+00 1.8620000e-01 4.9620000e-01 -2.5550000e-01 2.8820000e+00 -7.7000000e-02 7.0360000e-01 1.9850000e+00 -6.1020000e-01 2.4910000e+00 9.9590000e-01 + -1.6898000e+00 -4.5870000e-01 -6.6800000e-01 4.1570000e-01 -7.0900000e-01 2.9540000e-01 -9.4200000e-02 2.1230000e-01 -1.8216000e+00 8.8990000e-01 1.2428000e+00 -1.2207000e+00 -6.0650000e-01 2.1990000e-01 -1.0641000e+00 9.1390000e-01 -1.0258000e+00 -4.5390000e-01 -6.6540000e-01 -1.6660000e-01 + 2.3840000e-01 1.1402000e+00 -1.2062000e+00 -1.3134000e+00 -9.2860000e-01 -1.2511000e+00 1.6787000e+00 -7.2040000e-01 1.2952000e+00 -9.0820000e-01 2.0521000e+00 1.5246000e+00 6.2270000e-01 2.1669000e+00 3.2040000e-01 -4.2360000e-01 6.5670000e-01 9.1780000e-01 1.7697000e+00 -1.8695000e+00 + 1.1019000e+00 1.1200000e+00 7.4010000e-01 3.5140000e+00 1.4512000e+00 -1.7680000e-01 -5.3520000e-01 1.4720000e-01 -1.8830000e-01 -1.5710000e+00 7.8390000e-01 -3.1100000e-01 6.5500000e-01 1.3860000e-01 -1.8271000e+00 -7.5450000e-01 5.1700000e-01 4.5300000e-02 -2.2700000e-01 6.6090000e-01 + -1.6250000e-01 5.6530000e-01 -2.5530000e-01 1.8614000e+00 -9.8940000e-01 1.3834000e+00 7.5700000e-01 1.0342000e+00 2.4366000e+00 8.2210000e-01 -1.1668000e+00 1.0852000e+00 -3.0930000e-01 6.3720000e-01 -8.3380000e-01 2.5616000e+00 1.0602000e+00 6.1580000e-01 9.8750000e-01 4.9680000e-01 + -2.9280000e-01 3.5950000e-01 1.3154000e+00 -9.7450000e-01 -9.3720000e-01 6.7580000e-01 -1.9210000e-01 1.1630000e+00 1.2191000e+00 1.3970000e+00 -1.5723000e+00 1.5271000e+00 9.2720000e-01 7.4400000e-02 4.2130000e-01 -3.6630000e-01 2.4870000e-01 5.7220000e-01 8.6390000e-01 -4.2270000e-01 + 8.9360000e-01 7.5290000e-01 1.4512000e+00 4.3430000e-01 4.9170000e-01 3.5700000e-02 -1.8720000e-01 -1.0762000e+00 2.1350000e-01 4.4900000e-02 1.4034000e+00 8.3410000e-01 3.4195000e+00 1.0424000e+00 -1.2288000e+00 -8.9100000e-02 -9.8500000e-02 -9.1420000e-01 -8.8500000e-02 -7.5990000e-01 + 5.0380000e-01 5.8100000e-01 8.1260000e-01 2.2740000e-01 1.1074000e+00 1.4871000e+00 -3.0150000e-01 -2.7400000e-02 5.0230000e-01 -2.0340000e-01 -1.0865000e+00 -4.0410000e-01 1.4452000e+00 5.4280000e-01 -8.1060000e-01 2.4239000e+00 2.2944000e+00 -6.7000000e-02 -1.2330000e-01 7.1870000e-01 + 1.4534000e+00 1.0830000e+00 -8.4500000e-02 -3.3950000e-01 -5.5710000e-01 -6.6840000e-01 -1.8105000e+00 -5.2960000e-01 -2.5160000e-01 7.4120000e-01 -9.9700000e-02 -4.0620000e-01 -5.2580000e-01 1.7810000e-01 -1.0530000e+00 -9.7110000e-01 2.6910000e-01 -1.6979000e+00 4.8200000e-02 1.2040000e-01 + -1.3872000e+00 1.1569000e+00 6.2270000e-01 -6.6000000e-02 -2.0121000e+00 -1.0908000e+00 -7.4800000e-02 -9.2950000e-01 -1.5506000e+00 -2.5820000e-01 -2.0909000e+00 -7.3950000e-01 -4.4040000e-01 -1.5797000e+00 9.7740000e-01 6.1940000e-01 1.5627000e+00 1.1640000e+00 -5.1540000e-01 -1.7113000e+00 + 1.1650000e-01 8.1430000e-01 -1.0819000e+00 3.2780000e-01 -2.4073000e+00 -1.6588000e+00 1.2500000e-01 4.8720000e-01 3.0860000e-01 -9.1960000e-01 -1.6783000e+00 -1.5703000e+00 -1.3085000e+00 3.4750000e-01 -2.9440000e-01 -1.4587000e+00 -9.5270000e-01 -1.6845000e+00 -7.6010000e-01 7.5270000e-01 + -3.0810000e-01 5.1190000e-01 -9.6510000e-01 -1.2297000e+00 -9.2900000e-02 -2.3300000e-02 -1.2047000e+00 7.1260000e-01 -1.5583000e+00 -4.4800000e-01 -8.7700000e-01 -1.5515000e+00 8.0700000e-01 -1.0980000e+00 -1.4276000e+00 6.1400000e-02 -1.2037000e+00 -1.8185000e+00 -4.3910000e-01 -2.7379000e+00 + -1.3130000e-01 3.2930000e-01 1.4057000e+00 -3.6180000e-01 8.7390000e-01 -9.0090000e-01 1.3012000e+00 -3.1220000e-01 1.1430000e+00 4.8740000e-01 -4.6780000e-01 -7.1150000e-01 2.3450000e-01 -7.2340000e-01 -1.0300000e-02 -2.8741000e+00 2.6980000e-01 -7.5450000e-01 2.1470000e-01 6.9340000e-01 + 1.3092000e+00 6.1620000e-01 5.5250000e-01 -6.2340000e-01 8.8860000e-01 -1.2307000e+00 -2.8510000e-01 -2.8280000e-01 5.2640000e-01 -7.7810000e-01 2.2373000e+00 -2.3520000e-01 5.2710000e-01 -5.9710000e-01 9.8800000e-02 9.3500000e-01 -1.6810000e-01 1.4312000e+00 1.9130000e+00 1.6725000e+00 + -1.0009000e+00 -1.8174000e+00 1.1100000e-02 -4.8750000e-01 -1.8123000e+00 7.4400000e-01 -1.1633000e+00 -1.2644000e+00 -1.3716000e+00 1.2428000e+00 -1.8937000e+00 -4.2520000e-01 -1.1692000e+00 -1.2155000e+00 -3.4700000e-01 -4.7550000e-01 -9.3450000e-01 6.0800000e-02 -2.4500000e-02 -1.3999000e+00 + -3.8600000e-01 -2.3003000e+00 4.7550000e-01 9.3710000e-01 1.1073000e+00 4.6200000e-01 -8.9060000e-01 2.3200000e-01 5.7890000e-01 -9.9490000e-01 2.7180000e-01 2.8723000e+00 4.1620000e-01 -6.3190000e-01 -1.3470000e-01 -9.6900000e-02 9.7430000e-01 2.6292000e+00 1.5042000e+00 8.6750000e-01 + 4.8020000e-01 -8.2070000e-01 1.1663000e+00 3.1920000e-01 1.5726000e+00 7.6790000e-01 7.3020000e-01 7.6500000e-02 1.9320000e-01 -1.9630000e-01 1.4518000e+00 -3.9510000e-01 1.4172000e+00 7.8640000e-01 -1.4310000e-01 1.1125000e+00 2.3730000e-01 1.8604000e+00 -2.0396000e+00 1.1419000e+00 + -1.1214000e+00 -6.6940000e-01 -3.2300000e-01 -6.1550000e-01 -3.9930000e-01 5.5480000e-01 -2.0535000e+00 -1.5523000e+00 -1.1382000e+00 -1.8878000e+00 1.8510000e-01 -2.0888000e+00 6.8970000e-01 6.6800000e-01 -8.1750000e-01 -1.6870000e-01 7.9580000e-01 -3.6310000e-01 1.8770000e-01 -1.1639000e+00 + 7.5200000e-01 4.6580000e-01 5.6610000e-01 1.3400000e-01 5.8240000e-01 6.7010000e-01 5.6760000e-01 2.6540000e-01 5.8160000e-01 1.9610000e-01 1.5362000e+00 -3.0240000e-01 1.8026000e+00 7.3770000e-01 2.1920000e-01 7.6890000e-01 2.8210000e-01 -1.6300000e-02 1.5418000e+00 -6.4380000e-01 + 1.5409000e+00 5.5870000e-01 1.9770000e-01 2.1523000e+00 -1.1580000e-01 1.6781000e+00 4.2370000e-01 1.7571000e+00 1.0806000e+00 6.9860000e-01 9.4800000e-01 4.8500000e-02 -2.6690000e-01 -1.4880000e-01 -5.0660000e-01 -5.6010000e-01 2.3054000e+00 -2.6060000e-01 6.9290000e-01 4.5230000e-01 + -4.7640000e-01 5.0590000e-01 8.7800000e-02 -4.5170000e-01 -5.3650000e-01 -8.9700000e-02 -3.8490000e-01 -1.6936000e+00 -9.0780000e-01 -3.6460000e-01 -1.7517000e+00 -3.3720000e-01 -2.3585000e+00 -8.9570000e-01 2.1019000e+00 -3.7510000e-01 1.0114000e+00 6.1860000e-01 -1.9978000e+00 1.2050000e-01 + 3.3579000e+00 -6.3400000e-02 -1.1079000e+00 -5.8700000e-01 -4.1330000e-01 2.8825000e+00 -2.5000000e-01 2.0663000e+00 2.2380000e-01 1.7675000e+00 -2.5750000e-01 2.9687000e+00 8.7050000e-01 2.7748000e+00 8.1850000e-01 -1.0767000e+00 1.4772000e+00 1.1860000e-01 9.0480000e-01 7.8410000e-01 + 1.1057000e+00 3.1800000e-02 -1.1330000e-01 4.6710000e-01 -1.6118000e+00 -7.2360000e-01 -2.6471000e+00 -1.7355000e+00 1.2619000e+00 -4.5850000e-01 2.0800000e-01 3.7710000e-01 -1.8899000e+00 -2.2427000e+00 -8.8660000e-01 -2.4740000e-01 -9.3900000e-01 6.3700000e-02 1.1975000e+00 -1.1491000e+00 + -2.7900000e-01 -1.5729000e+00 5.6130000e-01 -1.0050000e+00 -1.0196000e+00 -4.0070000e-01 9.3920000e-01 5.8960000e-01 -2.6830000e+00 -1.6261000e+00 -4.9410000e-01 -2.4027000e+00 -2.0308000e+00 1.4139000e+00 -3.0854000e+00 -1.2210000e-01 -4.9430000e-01 1.1290000e-01 -9.9760000e-01 2.1450000e-01 + -8.8030000e-01 -1.2254000e+00 -4.7720000e-01 1.9745000e+00 -2.4430000e-01 -9.5170000e-01 -5.8270000e-01 -1.2777000e+00 -1.3955000e+00 3.4320000e-01 -6.7120000e-01 4.6900000e-02 -2.3070000e-01 4.4540000e-01 -2.1493000e+00 -1.4126000e+00 1.3602000e+00 6.7170000e-01 1.1400000e-01 -6.2720000e-01 + 1.0633000e+00 1.4248000e+00 -3.0610000e-01 2.0310000e-01 -6.3530000e-01 6.5330000e-01 1.2161000e+00 6.0510000e-01 1.4264000e+00 -4.7480000e-01 1.0499000e+00 8.2290000e-01 -4.4790000e-01 -4.7900000e-01 3.5610000e-01 1.2514000e+00 1.4870000e-01 -7.1200000e-02 1.7620000e-01 -9.0400000e-02 + 1.6380000e-01 -2.2569000e+00 -2.4675000e+00 -7.8300000e-02 -3.7760000e-01 -1.2369000e+00 -1.8781000e+00 1.9108000e+00 -1.8160000e+00 -1.1933000e+00 -9.6200000e-01 -7.5930000e-01 -1.2052000e+00 -1.9850000e-01 -5.5940000e-01 -6.5760000e-01 -7.3640000e-01 7.7000000e-02 -1.6389000e+00 -3.3090000e-01 + 8.5920000e-01 -1.7540000e-01 -1.3818000e+00 6.0830000e-01 6.3570000e-01 -1.3629000e+00 9.8930000e-01 6.4750000e-01 8.0500000e-02 2.0549000e+00 1.7027000e+00 1.0294000e+00 7.0700000e-01 1.6247000e+00 2.5280000e-01 1.6266000e+00 7.9210000e-01 -1.3827000e+00 4.9770000e-01 1.9734000e+00 + 1.2080000e-01 8.4540000e-01 6.8870000e-01 -6.7110000e-01 -7.7200000e-01 -5.9080000e-01 -6.1670000e-01 -1.5717000e+00 -1.4447000e+00 -2.2281000e+00 -7.0000000e-02 3.9950000e-01 -1.2828000e+00 -2.0780000e-01 5.7770000e-01 8.9800000e-02 -1.9196000e+00 3.3520000e-01 -8.5780000e-01 1.3600000e+00 + 1.2132000e+00 2.2039000e+00 4.1960000e-01 1.6171000e+00 -8.1120000e-01 2.6586000e+00 9.9370000e-01 -7.9200000e-01 7.4140000e-01 -8.4020000e-01 -5.2990000e-01 -1.1578000e+00 1.4816000e+00 -5.7070000e-01 -2.6960000e-01 -4.2500000e-01 2.0530000e-01 2.5704000e+00 2.3124000e+00 1.7970000e+00 diff --git a/randomforest-matlab/RF_Class_C/data/Y_twonorm.txt b/randomforest-matlab/RF_Class_C/data/Y_twonorm.txt new file mode 100644 index 0000000..3ae4ae5 --- /dev/null +++ b/randomforest-matlab/RF_Class_C/data/Y_twonorm.txt @@ -0,0 +1,300 @@ +2 +2 +2 +2 +1 +2 +1 +2 +2 +1 +1 +2 +1 +1 +1 +1 +1 +2 +1 +2 +1 +2 +1 +2 +2 +2 +1 +1 +2 +1 +1 +1 +2 +2 +2 +1 +1 +2 +1 +2 +2 +2 +1 +2 +1 +2 +2 +1 +2 +1 +2 +2 +1 +1 +2 +2 +1 +1 +2 +1 +1 +1 +2 +1 +1 +2 +2 +1 +1 +2 +1 +2 +1 +2 +1 +2 +2 +1 +2 +1 +1 +1 +2 +2 +1 +2 +2 +1 +1 +1 +2 +1 +1 +1 +1 +1 +2 +1 +2 +1 +1 +1 +1 +1 +1 +2 +1 +1 +2 +1 +1 +1 +2 +2 +1 +1 +2 +1 +1 +2 +1 +2 +2 +1 +1 +1 +1 +2 +1 +2 +2 +2 +2 +1 +2 +2 +2 +2 +2 +2 +1 +1 +2 +2 +1 +1 +1 +1 +1 +1 +1 +2 +1 +1 +1 +1 +2 +2 +1 +2 +1 +2 +1 +1 +1 +2 +2 +2 +1 +2 +2 +2 +2 +1 +2 +1 +2 +2 +1 +1 +2 +2 +1 +1 +2 +2 +2 +2 +2 +1 +2 +1 +2 +1 +1 +1 +1 +2 +2 +1 +2 +2 +2 +1 +1 +2 +1 +2 +2 +1 +1 +1 +1 +2 +1 +2 +2 +1 +1 +2 +1 +1 +2 +2 +1 +1 +1 +2 +2 +1 +1 +2 +1 +2 +1 +1 +1 +2 +1 +2 +1 +1 +1 +1 +1 +2 +2 +2 +1 +2 +2 +2 +1 +2 +1 +1 +2 +2 +1 +2 +2 +2 +1 +2 +1 +2 +1 +1 +1 +1 +2 +1 +2 +2 +2 +2 +2 +2 +1 +1 +1 +1 +2 +2 +1 +2 +2 +1 +2 +2 +1 +2 +1 +1 +1 +2 +1 +2 +1 +2 diff --git a/randomforest-matlab/RF_Class_C/data/twonorm.mat b/randomforest-matlab/RF_Class_C/data/twonorm.mat new file mode 100644 index 0000000..bfd95d9 Binary files /dev/null and b/randomforest-matlab/RF_Class_C/data/twonorm.mat differ diff --git a/randomforest-matlab/RF_Class_C/mexClassRF_predict.mexa64 b/randomforest-matlab/RF_Class_C/mexClassRF_predict.mexa64 new file mode 100644 index 0000000..2d58bfb Binary files /dev/null and b/randomforest-matlab/RF_Class_C/mexClassRF_predict.mexa64 differ diff --git a/randomforest-matlab/RF_Class_C/mexClassRF_predict.mexw64 b/randomforest-matlab/RF_Class_C/mexClassRF_predict.mexw64 new file mode 100644 index 0000000..b970f37 Binary files /dev/null and b/randomforest-matlab/RF_Class_C/mexClassRF_predict.mexw64 differ diff --git a/randomforest-matlab/RF_Class_C/mexClassRF_predict.mexw64.manifest b/randomforest-matlab/RF_Class_C/mexClassRF_predict.mexw64.manifest new file mode 100644 index 0000000..1c06b61 --- /dev/null +++ b/randomforest-matlab/RF_Class_C/mexClassRF_predict.mexw64.manifest @@ -0,0 +1,10 @@ + + + + + + + + + + diff --git a/randomforest-matlab/RF_Class_C/mexClassRF_predict.mexw64.map b/randomforest-matlab/RF_Class_C/mexClassRF_predict.mexw64.map new file mode 100644 index 0000000..e69de29 diff --git a/randomforest-matlab/RF_Class_C/mexClassRF_train.mexw64 b/randomforest-matlab/RF_Class_C/mexClassRF_train.mexw64 new file mode 100644 index 0000000..a66675b Binary files /dev/null and b/randomforest-matlab/RF_Class_C/mexClassRF_train.mexw64 differ diff --git a/randomforest-matlab/RF_Class_C/precompiled_rfsub/linux64/rfsub.o b/randomforest-matlab/RF_Class_C/precompiled_rfsub/linux64/rfsub.o new file mode 100644 index 0000000..53380b1 Binary files /dev/null and b/randomforest-matlab/RF_Class_C/precompiled_rfsub/linux64/rfsub.o differ diff --git a/randomforest-matlab/RF_Class_C/precompiled_rfsub/win32/rfsub.o b/randomforest-matlab/RF_Class_C/precompiled_rfsub/win32/rfsub.o new file mode 100644 index 0000000..79c7aa6 Binary files /dev/null and b/randomforest-matlab/RF_Class_C/precompiled_rfsub/win32/rfsub.o differ diff --git a/randomforest-matlab/RF_Class_C/precompiled_rfsub/win64/rfsub.o b/randomforest-matlab/RF_Class_C/precompiled_rfsub/win64/rfsub.o new file mode 100644 index 0000000..53380b1 Binary files /dev/null and b/randomforest-matlab/RF_Class_C/precompiled_rfsub/win64/rfsub.o differ diff --git a/randomforest-matlab/RF_Class_C/rf.mat b/randomforest-matlab/RF_Class_C/rf.mat new file mode 100644 index 0000000..e493c74 Binary files /dev/null and b/randomforest-matlab/RF_Class_C/rf.mat differ diff --git a/randomforest-matlab/RF_Class_C/rfsub.o b/randomforest-matlab/RF_Class_C/rfsub.o new file mode 100644 index 0000000..a15554f Binary files /dev/null and b/randomforest-matlab/RF_Class_C/rfsub.o differ diff --git a/randomforest-matlab/RF_Class_C/src/classRF.cpp b/randomforest-matlab/RF_Class_C/src/classRF.cpp new file mode 100644 index 0000000..142f00e --- /dev/null +++ b/randomforest-matlab/RF_Class_C/src/classRF.cpp @@ -0,0 +1,876 @@ +/************************************************************** + * mex interface to Andy Liaw et al.'s C code (used in R package randomForest) + * Added by Abhishek Jaiantilal ( abhishek.jaiantilal@colorado.edu ) + * License: GPLv2 + * Version: 0.02 + * + * File: contains all the supporting code for a standalone C or mex for + * Classification RF. + * Copied all the code from the randomForest 4.5-28 or was it -29? + * + * important changes (other than the many commented out printf's) + * 1. realized that instead of changing individual S_allocs to callocs + * a better way is to emulate them + * 2. found some places where memory is not freed in classRF via valgrind so + * added frees + * 3. made sure that C can now interface with brieman's fortran code so added + * externs "C"'s and the F77_* macros + * 4. added cokus's mersenne twister. + * + *************************************************************/ + +/***************************************************************** + * Copyright (C) 2001-7 Leo Breiman, Adele Cutler and Merck & Co., Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * C driver for Breiman & Cutler's random forest code. + * Re-written from the original main program in Fortran. + * Andy Liaw Feb. 7, 2002. + * Modifications to get the forest out Matt Wiener Feb. 26, 2002. + *****************************************************************/ + +#include "stdlib.h" +#include "memory.h" +#include "rf.h" +#include "stdio.h" +#include "math.h" + +#ifndef MATLAB +#define Rprintf printf +#endif + +#ifdef MATLAB +#include "mex.h" +#define Rprintf mexPrintf +#endif + +#define F77_CALL(x) x ## _ +#define F77_NAME(x) F77_CALL(x) +#define F77_SUB(x) F77_CALL(x) + + +#define MAX_UINT_COKUS 4294967295 //basically 2^32-1 + +typedef unsigned long uint32; +extern void seedMT(uint32 seed); +extern uint32 reloadMT(void); +extern uint32 randomMT(void); +/*extern void F77_NAME(buildtree)(int *a, int *b, int *cl, int *cat, + * int *maxcat, int *mdim, int *nsample, + * int *nclass, int *treemap, int *bestvar, + * int *bestsplit, int *bestsplitnext, + * double *tgini, int *nodestatus, int *nodepop, + * int *nodestart, double *classpop, + * double *tclasspop, double *tclasscat, + * int *ta, int *nrnodes, int *, + * int *, int *, int *, int *, int *, int *, + * double *, double *, double *, + * int *, int *, int *); + */ +extern "C"{ + #ifdef WIN64 + void _buildtree_(int *a, int *b, int *cl, int *cat, + #endif + #ifndef WIN64 + void buildtree_(int *a, int *b, int *cl, int *cat, + #endif + int *maxcat, int *mdim, int *nsample, + int *nclass, int *treemap, int *bestvar, + int *bestsplit, int *bestsplitnext, + double *tgini, int *nodestatus, int *nodepop, + int *nodestart, double *classpop, + double *tclasspop, double *tclasscat, + int *ta, int *nrnodes, int *, + int *, int *, int *, int *, int *, int *, + double *, double *, double *, + int *, int *, int *); +} + +extern "C"{ + #ifdef WIN64 + void _rrand_(double *r) ; + #endif + + #ifndef WIN64 + void rrand_(double *r) ; + #endif +} + + +double unif_rand(){ + return (((double)randomMT())/((double)MAX_UINT_COKUS)); +} + +void* S_alloc_alt(int a, int b) { + return(calloc(a, b)); +} + +void GetRNGstate(){}; +void PutRNGstate(){}; + +void oob(int nsample, int nclass, int *jin, int *cl, int *jtr, int *jerr, + int *counttr, int *out, double *errtr, int *jest, double *cutoff); + +void TestSetError(double *countts, int *jts, int *clts, int *jet, int ntest, + int nclass, int nvote, double *errts, + int labelts, int *nclts, double *cutoff); + +/* Define the R RNG for use from Fortran. */ +#ifdef WIN64 +void _rrand_(double *r) { *r = unif_rand(); } +#endif + +#ifndef WIN64 +void rrand_(double *r) { *r = unif_rand(); } +#endif + + +void classRF(double *x, int *dimx, int *cl, int *ncl, int *cat, int *maxcat, + int *sampsize, int *strata, int *Options, int *ntree, int *nvar, + int *ipi, double *classwt, double *cut, int *nodesize, + int *outcl, int *counttr, double *prox, + double *imprt, double *impsd, double *impmat, int *nrnodes, + int *ndbigtree, int *nodestatus, int *bestvar, int *treemap, + int *nodeclass, double *xbestsplit, double *errtr, + int *testdat, double *xts, int *clts, int *nts, double *countts, + int *outclts, int labelts, double *proxts, double *errts, + int *inbag) { + /****************************************************************** + * C wrapper for random forests: get input from R and drive + * the Fortran routines. + * + * Input: + * + * x: matrix of predictors (transposed!) + * dimx: two integers: number of variables and number of cases + * cl: class labels of the data + * ncl: number of classes in the responsema + * cat: integer vector of number of classes in the predictor; + * 1=continuous + * maxcat: maximum of cat + * Options: 7 integers: (0=no, 1=yes) + * add a second class (for unsupervised RF)? + * 1: sampling from product of marginals + * 2: sampling from product of uniforms + * assess variable importance? + * calculate proximity? + * calculate proximity based on OOB predictions? + * calculate outlying measure? + * how often to print output? + * keep the forest for future prediction? + * ntree: number of trees + * nvar: number of predictors to use for each split + * ipi: 0=use class proportion as prob.; 1=use supplied priors + * pi: double vector of class priors + * nodesize: minimum node size: no node with fewer than ndsize + * cases will be split + * + * Output: + * + * outcl: class predicted by RF + * counttr: matrix of votes (transposed!) + * imprt: matrix of variable importance measures + * impmat: matrix of local variable importance measures + * prox: matrix of proximity (if iprox=1) + ******************************************************************/ + + int nsample0, mdim, nclass, addClass, mtry, ntest, nsample, ndsize, + mimp, nimp, near, nuse, noutall, nrightall, nrightimpall, + keepInbag, nstrata; + int jb, j, n, m, k, idxByNnode, idxByNsample, imp, localImp, iprox, + oobprox, keepf, replace, stratify, trace, *nright, + *nrightimp, *nout, *nclts, Ntree; + + int *out, *bestsplitnext, *bestsplit, *nodepop, *jin, *nodex, + *nodexts, *nodestart, *ta, *ncase, *jerr, *varUsed, + *jtr, *classFreq, *idmove, *jvr, + *at, *a, *b, *mind, *nind, *jts, *oobpair; + int **strata_idx, *strata_size, last, ktmp, anyEmpty, ntry; + + double av=0.0; + + double *tgini, *tx, *wl, *classpop, *tclasscat, *tclasspop, *win, + *tp, *wr; + + //Do initialization for COKUS's Random generator + seedMT(2*rand()+1); //works well with odd number so why don't use that + + addClass = Options[0]; + imp = Options[1]; + localImp = Options[2]; + iprox = Options[3]; + oobprox = Options[4]; + trace = Options[5]; + keepf = Options[6]; + replace = Options[7]; + stratify = Options[8]; + keepInbag = Options[9]; + mdim = dimx[0]; + nsample0 = dimx[1]; + nclass = (*ncl==1) ? 2 : *ncl; + ndsize = *nodesize; + Ntree = *ntree; + mtry = *nvar; + ntest = *nts; + nsample = addClass ? (nsample0 + nsample0) : nsample0; + mimp = imp ? mdim : 1; + nimp = imp ? nsample : 1; + near = iprox ? nsample0 : 1; + if (trace == 0) trace = Ntree + 1; + + /*printf("\nmdim %d, nclass %d, nrnodes %d, nsample %d, ntest %d\n", mdim, nclass, *nrnodes, nsample, ntest); + printf("\noobprox %d, mdim %d, nsample0 %d, Ntree %d, mtry %d, mimp %d", oobprox, mdim, nsample0, Ntree, mtry, mimp); + printf("\nstratify %d, replace %d",stratify,replace); + printf("\n");*/ + tgini = (double *) S_alloc_alt(mdim, sizeof(double)); + wl = (double *) S_alloc_alt(nclass, sizeof(double)); + wr = (double *) S_alloc_alt(nclass, sizeof(double)); + classpop = (double *) S_alloc_alt(nclass* *nrnodes, sizeof(double)); + tclasscat = (double *) S_alloc_alt(nclass*32, sizeof(double)); + tclasspop = (double *) S_alloc_alt(nclass, sizeof(double)); + tx = (double *) S_alloc_alt(nsample, sizeof(double)); + win = (double *) S_alloc_alt(nsample, sizeof(double)); + tp = (double *) S_alloc_alt(nsample, sizeof(double)); + + out = (int *) S_alloc_alt(nsample, sizeof(int)); + bestsplitnext = (int *) S_alloc_alt(*nrnodes, sizeof(int)); + bestsplit = (int *) S_alloc_alt(*nrnodes, sizeof(int)); + nodepop = (int *) S_alloc_alt(*nrnodes, sizeof(int)); + nodestart = (int *) S_alloc_alt(*nrnodes, sizeof(int)); + jin = (int *) S_alloc_alt(nsample, sizeof(int)); + nodex = (int *) S_alloc_alt(nsample, sizeof(int)); + nodexts = (int *) S_alloc_alt(ntest, sizeof(int)); + ta = (int *) S_alloc_alt(nsample, sizeof(int)); + ncase = (int *) S_alloc_alt(nsample, sizeof(int)); + jerr = (int *) S_alloc_alt(nsample, sizeof(int)); + varUsed = (int *) S_alloc_alt(mdim, sizeof(int)); + jtr = (int *) S_alloc_alt(nsample, sizeof(int)); + jvr = (int *) S_alloc_alt(nsample, sizeof(int)); + classFreq = (int *) S_alloc_alt(nclass, sizeof(int)); + jts = (int *) S_alloc_alt(ntest, sizeof(int)); + idmove = (int *) S_alloc_alt(nsample, sizeof(int)); + at = (int *) S_alloc_alt(mdim*nsample, sizeof(int)); + a = (int *) S_alloc_alt(mdim*nsample, sizeof(int)); + b = (int *) S_alloc_alt(mdim*nsample, sizeof(int)); + mind = (int *) S_alloc_alt(mdim, sizeof(int)); + nright = (int *) S_alloc_alt(nclass, sizeof(int)); + nrightimp = (int *) S_alloc_alt(nclass, sizeof(int)); + nout = (int *) S_alloc_alt(nclass, sizeof(int)); + if (oobprox) { + oobpair = (int *) S_alloc_alt(near*near, sizeof(int)); + } + //printf("nsample=%d\n", nsample); + /* Count number of cases in each class. */ + zeroInt(classFreq, nclass); + for (n = 0; n < nsample; ++n) classFreq[cl[n] - 1] ++; + /* Normalize class weights. */ + //Rprintf("ipi %d ",*ipi); + //for(n=0;n nstrata) nstrata = strata[n]; + /* Create the array of pointers, each pointing to a vector + * of indices of where data of each stratum is. */ + strata_size = (int *) S_alloc_alt(nstrata, sizeof(int)); + for (n = 0; n < nsample0; ++n) { + strata_size[strata[n] - 1] ++; + } + strata_idx = (int **) S_alloc_alt(nstrata, sizeof(int *)); + for (n = 0; n < nstrata; ++n) { + strata_idx[n] = (int *) S_alloc_alt(strata_size[n], sizeof(int)); + } + zeroInt(strata_size, nstrata); + for (n = 0; n < nsample0; ++n) { + strata_size[strata[n] - 1] ++; + strata_idx[strata[n] - 1][strata_size[strata[n] - 1] - 1] = n; + } + } else { + nind = replace ? NULL : (int *) S_alloc_alt(nsample, sizeof(int)); + } + + /* INITIALIZE FOR RUN */ + if (*testdat) zeroDouble(countts, ntest * nclass); + zeroInt(counttr, nclass * nsample); + zeroInt(out, nsample); + zeroDouble(tgini, mdim); + zeroDouble(errtr, (nclass + 1) * Ntree); + + if (labelts) { + nclts = (int *) S_alloc_alt(nclass, sizeof(int)); + for (n = 0; n < ntest; ++n) nclts[clts[n]-1]++; + zeroDouble(errts, (nclass + 1) * Ntree); + } + //printf("labelts %d\n",labelts);fflush(stdout); + if (imp) { + zeroDouble(imprt, (nclass+2) * mdim); + zeroDouble(impsd, (nclass+1) * mdim); + if (localImp) zeroDouble(impmat, nsample * mdim); + } + if (iprox) { + zeroDouble(prox, nsample0 * nsample0); + if (*testdat) zeroDouble(proxts, ntest * (ntest + nsample0)); + } + makeA(x, mdim, nsample, cat, at, b); + + //R_CheckUserInterrupt(); + + + /* Starting the main loop over number of trees. */ + GetRNGstate(); + if (trace <= Ntree) { + /* Print header for running output. */ + Rprintf("ntree OOB"); + for (n = 1; n <= nclass; ++n) Rprintf("%7i", n); + if (labelts) { + Rprintf("| Test"); + for (n = 1; n <= nclass; ++n) Rprintf("%7i", n); + } + Rprintf("\n"); + } + idxByNnode = 0; + idxByNsample = 0; + + //Rprintf("addclass %d, ntree %d, cl[300]=%d", addClass,Ntree,cl[299]); + for(jb = 0; jb < Ntree; jb++) { + //Rprintf("addclass %d, ntree %d, cl[300]=%d", addClass,Ntree,cl[299]); + //printf("jb=%d,\n",jb); + /* Do we need to simulate data for the second class? */ + if (addClass) createClass(x, nsample0, nsample, mdim); + do { + zeroInt(nodestatus + idxByNnode, *nrnodes); + zeroInt(treemap + 2*idxByNnode, 2 * *nrnodes); + zeroDouble(xbestsplit + idxByNnode, *nrnodes); + zeroInt(nodeclass + idxByNnode, *nrnodes); + zeroInt(varUsed, mdim); + /* TODO: Put all sampling code into a function. */ + /* drawSample(sampsize, nsample, ); */ + if (stratify) { /* stratified sampling */ + zeroInt(jin, nsample); + zeroDouble(tclasspop, nclass); + zeroDouble(win, nsample); + if (replace) { /* with replacement */ + for (n = 0; n < nstrata; ++n) { + for (j = 0; j < sampsize[n]; ++j) { + ktmp = (int) (unif_rand() * strata_size[n]); + k = strata_idx[n][ktmp]; + tclasspop[cl[k] - 1] += classwt[cl[k] - 1]; + win[k] += classwt[cl[k] - 1]; + jin[k] = 1; + } + } + } else { /* stratified sampling w/o replacement */ + /* re-initialize the index array */ + zeroInt(strata_size, nstrata); + for (j = 0; j < nsample; ++j) { + strata_size[strata[j] - 1] ++; + strata_idx[strata[j] - 1][strata_size[strata[j] - 1] - 1] = j; + } + /* sampling without replacement */ + for (n = 0; n < nstrata; ++n) { + last = strata_size[n] - 1; + for (j = 0; j < sampsize[n]; ++j) { + ktmp = (int) (unif_rand() * (last+1)); + k = strata_idx[n][ktmp]; + swapInt(strata_idx[n][last], strata_idx[n][ktmp]); + last--; + tclasspop[cl[k] - 1] += classwt[cl[k]-1]; + win[k] += classwt[cl[k]-1]; + jin[k] = 1; + } + } + } + } else { /* unstratified sampling */ + anyEmpty = 0; + ntry = 0; + do { + zeroInt(jin, nsample); + zeroDouble(tclasspop, nclass); + zeroDouble(win, nsample); + if (replace) { + for (n = 0; n < *sampsize; ++n) { + k = unif_rand() * nsample; + tclasspop[cl[k] - 1] += classwt[cl[k]-1]; + win[k] += classwt[cl[k]-1]; + jin[k] = 1; + } + } else { + for (n = 0; n < nsample; ++n) nind[n] = n; + last = nsample - 1; + for (n = 0; n < *sampsize; ++n) { + ktmp = (int) (unif_rand() * (last+1)); + k = nind[ktmp]; + swapInt(nind[ktmp], nind[last]); + last--; + tclasspop[cl[k] - 1] += classwt[cl[k]-1]; + win[k] += classwt[cl[k]-1]; + jin[k] = 1; + } + } + /* check if any class is missing in the sample */ + for (n = 0; n < nclass; ++n) { + if (tclasspop[n] == 0) anyEmpty = 1; + } + ntry++; + } while (anyEmpty && ntry <= 10); + } + + /* If need to keep indices of inbag data, do that here. */ + if (keepInbag) { + for (n = 0; n < nsample0; ++n) { + inbag[n + idxByNsample] = jin[n]; + } + } + + /* Copy the original a matrix back. */ + memcpy(a, at, sizeof(int) * mdim * nsample); + modA(a, &nuse, nsample, mdim, cat, *maxcat, ncase, jin); + + #ifdef WIN64 + F77_CALL(_buildtree) + #endif + + #ifndef WIN64 + F77_CALL(buildtree) + #endif + (a, b, cl, cat, maxcat, &mdim, &nsample, + &nclass, + treemap + 2*idxByNnode, bestvar + idxByNnode, + bestsplit, bestsplitnext, tgini, + nodestatus + idxByNnode, nodepop, + nodestart, classpop, tclasspop, tclasscat, + ta, nrnodes, idmove, &ndsize, ncase, + &mtry, varUsed, nodeclass + idxByNnode, + ndbigtree + jb, win, wr, wl, &mdim, + &nuse, mind); + /* if the "tree" has only the root node, start over */ + } while (ndbigtree[jb] == 1); + + Xtranslate(x, mdim, *nrnodes, nsample, bestvar + idxByNnode, + bestsplit, bestsplitnext, xbestsplit + idxByNnode, + nodestatus + idxByNnode, cat, ndbigtree[jb]); + + /* Get test set error */ + if (*testdat) { + predictClassTree(xts, ntest, mdim, treemap + 2*idxByNnode, + nodestatus + idxByNnode, xbestsplit + idxByNnode, + bestvar + idxByNnode, + nodeclass + idxByNnode, ndbigtree[jb], + cat, nclass, jts, nodexts, *maxcat); + TestSetError(countts, jts, clts, outclts, ntest, nclass, jb+1, + errts + jb*(nclass+1), labelts, nclts, cut); + } + + /* Get out-of-bag predictions and errors. */ + predictClassTree(x, nsample, mdim, treemap + 2*idxByNnode, + nodestatus + idxByNnode, xbestsplit + idxByNnode, + bestvar + idxByNnode, + nodeclass + idxByNnode, ndbigtree[jb], + cat, nclass, jtr, nodex, *maxcat); + + zeroInt(nout, nclass); + noutall = 0; + for (n = 0; n < nsample; ++n) { + if (jin[n] == 0) { + /* increment the OOB votes */ + counttr[n*nclass + jtr[n] - 1] ++; + /* count number of times a case is OOB */ + out[n]++; + /* count number of OOB cases in the current iteration. + * nout[n] is the number of OOB cases for the n-th class. + * noutall is the number of OOB cases overall. */ + nout[cl[n] - 1]++; + noutall++; + } + } + + /* Compute out-of-bag error rate. */ + oob(nsample, nclass, jin, cl, jtr, jerr, counttr, out, + errtr + jb*(nclass+1), outcl, cut); + + if ((jb+1) % trace == 0) { + Rprintf("%5i: %6.2f%%", jb+1, 100.0*errtr[jb * (nclass+1)]); + for (n = 1; n <= nclass; ++n) { + Rprintf("%6.2f%%", 100.0 * errtr[n + jb * (nclass+1)]); + } + if (labelts) { + Rprintf("| "); + for (n = 0; n <= nclass; ++n) { + Rprintf("%6.2f%%", 100.0 * errts[n + jb * (nclass+1)]); + } + } + Rprintf("\n"); + + //R_CheckUserInterrupt(); + } + + /* DO VARIABLE IMPORTANCE */ + if (imp) { + nrightall = 0; + /* Count the number of correct prediction by the current tree + * among the OOB samples, by class. */ + zeroInt(nright, nclass); + for (n = 0; n < nsample; ++n) { + /* out-of-bag and predicted correctly: */ + if (jin[n] == 0 && jtr[n] == cl[n]) { + nright[cl[n] - 1]++; + nrightall++; + } + } + for (m = 0; m < mdim; ++m) { + if (varUsed[m]) { + nrightimpall = 0; + zeroInt(nrightimp, nclass); + for (n = 0; n < nsample; ++n) tx[n] = x[m + n*mdim]; + /* Permute the m-th variable. */ + permuteOOB(m, x, jin, nsample, mdim); + /* Predict the modified data using the current tree. */ + predictClassTree(x, nsample, mdim, treemap + 2*idxByNnode, + nodestatus + idxByNnode, + xbestsplit + idxByNnode, + bestvar + idxByNnode, + nodeclass + idxByNnode, ndbigtree[jb], + cat, nclass, jvr, nodex, *maxcat); + /* Count how often correct predictions are made with + * the modified data. */ + for (n = 0; n < nsample; n++) { + if (jin[n] == 0) { + if (jvr[n] == cl[n]) { + nrightimp[cl[n] - 1]++; + nrightimpall++; + } + if (localImp && jvr[n] != jtr[n]) { + if (cl[n] == jvr[n]) { + impmat[m + n*mdim] -= 1.0; + } else { + impmat[m + n*mdim] += 1.0; + } + } + } + /* Restore the original data for that variable. */ + x[m + n*mdim] = tx[n]; + } + /* Accumulate decrease in proportions of correct + * predictions. */ + for (n = 0; n < nclass; ++n) { + if (nout[n] > 0) { + imprt[m + n*mdim] += + ((double) (nright[n] - nrightimp[n])) / + nout[n]; + impsd[m + n*mdim] += + ((double) (nright[n] - nrightimp[n]) * + (nright[n] - nrightimp[n])) / nout[n]; + } + } + if (noutall > 0) { + imprt[m + nclass*mdim] += + ((double)(nrightall - nrightimpall)) / noutall; + impsd[m + nclass*mdim] += + ((double) (nrightall - nrightimpall) * + (nrightall - nrightimpall)) / noutall; + } + } + } + } + + /* DO PROXIMITIES */ + if (iprox) { + computeProximity(prox, oobprox, nodex, jin, oobpair, near); + /* proximity for test data */ + if (*testdat) { + computeProximity(proxts, 0, nodexts, jin, oobpair, ntest); + /* Compute proximity between testset and training set. */ + for (n = 0; n < ntest; ++n) { + for (k = 0; k < near; ++k) { + if (nodexts[n] == nodex[k]) + proxts[n + ntest * (k+ntest)] += 1.0; + } + } + } + } + + if (keepf) idxByNnode += *nrnodes; + if (keepInbag) idxByNsample += nsample0; + } + PutRNGstate(); + + + /* Final processing of variable importance. */ + for (m = 0; m < mdim; m++) tgini[m] /= Ntree; + + if (imp) { + for (m = 0; m < mdim; ++m) { + if (localImp) { /* casewise measures */ + for (n = 0; n < nsample; ++n) impmat[m + n*mdim] /= out[n]; + } + /* class-specific measures */ + for (k = 0; k < nclass; ++k) { + av = imprt[m + k*mdim] / Ntree; + impsd[m + k*mdim] = + sqrt(((impsd[m + k*mdim] / Ntree) - av*av) / Ntree); + imprt[m + k*mdim] = av; + /* imprt[m + k*mdim] = (se <= 0.0) ? -1000.0 - av : av / se; */ + } + /* overall measures */ + av = imprt[m + nclass*mdim] / Ntree; + impsd[m + nclass*mdim] = + sqrt(((impsd[m + nclass*mdim] / Ntree) - av*av) / Ntree); + imprt[m + nclass*mdim] = av; + imprt[m + (nclass+1)*mdim] = tgini[m]; + } + } else { + for (m = 0; m < mdim; ++m) imprt[m] = tgini[m]; + } + + /* PROXIMITY DATA ++++++++++++++++++++++++++++++++*/ + if (iprox) { + for (n = 0; n < near; ++n) { + for (k = n + 1; k < near; ++k) { + prox[near*k + n] /= oobprox ? + (oobpair[near*k + n] > 0 ? oobpair[near*k + n] : 1) : + Ntree; + prox[near*n + k] = prox[near*k + n]; + } + prox[near*n + n] = 1.0; + } + if (*testdat) { + for (n = 0; n < ntest; ++n) + for (k = 0; k < ntest + nsample; ++k) + proxts[ntest*k + n] /= Ntree; + } + } + if (trace <= Ntree){ + printf("\nmdim %d, nclass %d, nrnodes %d, nsample %d, ntest %d\n", mdim, nclass, *nrnodes, nsample, ntest); + printf("\noobprox %d, mdim %d, nsample0 %d, Ntree %d, mtry %d, mimp %d", oobprox, mdim, nsample0, Ntree, mtry, mimp); + printf("\nstratify %d, replace %d",stratify,replace); + printf("\n"); + } + + //frees up the memory + free(tgini);free(wl);free(wr);free(classpop);free(tclasscat); + free(tclasspop);free(tx);free(win);free(tp);free(out); + free(bestsplitnext);free(bestsplit);free(nodepop);free(nodestart);free(jin); + free(nodex);free(nodexts);free(ta);free(ncase);free(jerr); + free(varUsed);free(jtr);free(jvr);free(classFreq);free(jts); + free(idmove);free(at);free(a);free(b);free(mind); + free(nright);free(nrightimp);free(nout); + + if (oobprox) { + free(oobpair); + } + + if (stratify) { + free(strata_size); + for (n = 0; n < nstrata; ++n) { + free(strata_idx[n]); + } + free(strata_idx); + } else { + if (replace) + free(nind); + } + //printf("labelts %d\n",labelts);fflush(stdout); + + if (labelts) { + free(nclts); + } + //printf("stratify %d",stratify);fflush(stdout); +} + + +void classForest(int *mdim, int *ntest, int *nclass, int *maxcat, + int *nrnodes, int *ntree, double *x, double *xbestsplit, + double *pid, double *cutoff, double *countts, int *treemap, + int *nodestatus, int *cat, int *nodeclass, int *jts, + int *jet, int *bestvar, int *node, int *treeSize, + int *keepPred, int *prox, double *proxMat, int *nodes) { + int j, n, n1, n2, idxNodes, offset1, offset2, *junk, ntie; + double crit, cmax; + + zeroDouble(countts, *nclass * *ntest); + idxNodes = 0; + offset1 = 0; + offset2 = 0; + junk = NULL; + + // Rprintf("nclass %d\n", *nclass); + for (j = 0; j < *ntree; ++j) { + // Rprintf("pCT nclass %d \n", *nclass); + /* predict by the j-th tree */ + // Rprintf( "#ntree: %d, idxNodes: %d\n", j, idxNodes ); + predictClassTree(x, *ntest, *mdim, nrnodes, treemap + 2*idxNodes, + nodestatus + idxNodes, xbestsplit + idxNodes, + bestvar + idxNodes, nodeclass + idxNodes, + treeSize[j], cat, *nclass, + jts + offset1, node + offset2, *maxcat); + + //// original code + //predictClassTree(x, *ntest, *mdim, treemap + 2*idxNodes, + // nodestatus + idxNodes, xbestsplit + idxNodes, + // bestvar + idxNodes, nodeclass + idxNodes, + // treeSize[j], cat, *nclass, + // jts + offset1, node + offset2, *maxcat); + + /* accumulate votes: */ + for (n = 0; n < *ntest; ++n) { + countts[jts[n + offset1] - 1 + n * *nclass] += 1.0; + } + + /* if desired, do proximities for this round */ + if (*prox) computeProximity(proxMat, 0, node + offset2, junk, junk, + *ntest); + idxNodes += *nrnodes; + if (*keepPred) offset1 += *ntest; + if (*nodes) offset2 += *ntest; + } + + //Rprintf("ntest %d\n", *ntest); + /* Aggregated prediction is the class with the maximum votes/cutoff */ + for (n = 0; n < *ntest; ++n) { + //Rprintf("Ap: ntest %d\n", *ntest); + cmax = 0.0; + ntie = 1; + for (j = 0; j < *nclass; ++j) { + crit = (countts[j + n * *nclass] / *ntree) / cutoff[j]; + if (crit > cmax) { + jet[n] = j + 1; + cmax = crit; + } + /* Break ties at random: */ + if (crit == cmax) { + ntie++; + if (unif_rand() > 1.0 / ntie) jet[n] = j + 1; + } + } + } + + //Rprintf("ntest %d\n", *ntest); + /* if proximities requested, do the final adjustment + * (division by number of trees) */ + + //Rprintf("prox %d",*prox); + if (*prox) { + //Rprintf("prox: ntest %d\n", *ntest); + for (n1 = 0; n1 < *ntest; ++n1) { + for (n2 = n1 + 1; n2 < *ntest; ++n2) { + proxMat[n1 + n2 * *ntest] /= *ntree; + proxMat[n2 + n1 * *ntest] = proxMat[n1 + n2 * *ntest]; + } + proxMat[n1 + n1 * *ntest] = 1.0; + } + } + //Rprintf("END ntest %d\n", *ntest); + +} + +/* + * Modified by A. Liaw 1/10/2003 (Deal with cutoff) + * Re-written in C by A. Liaw 3/08/2004 + */ +void oob(int nsample, int nclass, int *jin, int *cl, int *jtr, int *jerr, + int *counttr, int *out, double *errtr, int *jest, + double *cutoff) { + int j, n, noob, *noobcl, ntie; + double qq, smax, smaxtr; + + noobcl = (int *) S_alloc_alt(nclass, sizeof(int)); + zeroInt(jerr, nsample); + zeroDouble(errtr, nclass+1); + + noob = 0; + for (n = 0; n < nsample; ++n) { + if (out[n]) { + noob++; + noobcl[cl[n]-1]++; + smax = 0.0; + smaxtr = 0.0; + ntie = 1; + for (j = 0; j < nclass; ++j) { + qq = (((double) counttr[j + n*nclass]) / out[n]) / cutoff[j]; + if (j+1 != cl[n]) smax = (qq > smax) ? qq : smax; + /* if vote / cutoff is larger than current max, re-set max and + * change predicted class to the current class */ + if (qq > smaxtr) { + smaxtr = qq; + jest[n] = j+1; + } + /* break tie at random */ + if (qq == smaxtr) { + ntie++; + if (unif_rand() > 1.0 / ntie) { + smaxtr = qq; + jest[n] = j+1; + } + } + } + if (jest[n] != cl[n]) { + errtr[cl[n]] += 1.0; + errtr[0] += 1.0; + jerr[n] = 1; + } + } + } + errtr[0] /= noob; + for (n = 1; n <= nclass; ++n) errtr[n] /= noobcl[n-1]; + free(noobcl); +} + + +void TestSetError(double *countts, int *jts, int *clts, int *jet, int ntest, + int nclass, int nvote, double *errts, + int labelts, int *nclts, double *cutoff) { + int j, n, ntie; + double cmax, crit; + + for (n = 0; n < ntest; ++n) countts[jts[n]-1 + n*nclass] += 1.0; + + /* Prediction is the class with the maximum votes */ + for (n = 0; n < ntest; ++n) { + cmax=0.0; + ntie = 1; + for (j = 0; j < nclass; ++j) { + crit = (countts[j + n*nclass] / nvote) / cutoff[j]; + if (crit > cmax) { + jet[n] = j+1; + cmax = crit; + } + /* Break ties at random: */ + if (crit == cmax) { + ntie++; + if (unif_rand() > 1.0 / ntie) { + jet[n] = j+1; + cmax = crit; + } + } + } + } + if (labelts) { + zeroDouble(errts, nclass + 1); + for (n = 0; n < ntest; ++n) { + if (jet[n] != clts[n]) { + errts[0] += 1.0; + errts[clts[n]] += 1.0; + } + } + errts[0] /= ntest; + for (n = 1; n <= nclass; ++n) errts[n] /= nclts[n-1]; + } +} + diff --git a/randomforest-matlab/RF_Class_C/src/classTree.cpp b/randomforest-matlab/RF_Class_C/src/classTree.cpp new file mode 100644 index 0000000..f909869 --- /dev/null +++ b/randomforest-matlab/RF_Class_C/src/classTree.cpp @@ -0,0 +1,255 @@ +/************************************************************** + * mex interface to Andy Liaw et al.'s C code (used in R package randomForest) + * Added by Abhishek Jaiantilal ( abhishek.jaiantilal@colorado.edu ) + * License: GPLv2 + * Version: 0.02 + * + * File: contains all the other supporting code for a standalone C or mex for + * Classification RF. + * Copied all the code from the randomForest 4.5-28 or was it -29? + * added externs "C"'s and the F77_* macros + * + *************************************************************/ + +/******************************************************************* + Copyright (C) 2001-7 Leo Breiman, Adele Cutler and Merck & Co., Inc. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. +*******************************************************************/ +#include "rf.h" +#include "memory.h" +#include "stdlib.h" +#include "math.h" + +#ifdef MATLAB +#define Rprintf mexPrintf +#include "mex.h" +#endif + +#ifndef MATLAB +#define Rprintf printf +#include "stdio.h" +#endif + +typedef unsigned long uint32; +extern void seedMT(uint32 seed); +extern uint32 reloadMT(void); +extern uint32 randomMT(void); +extern double unif_rand(); +extern void R_qsort_I(double *v, int *I, int i, int j); + +extern "C"{ + #ifdef WIN64 + void _catmax_(double *parentDen, double *tclasscat, + double *tclasspop, int *nclass, int *lcat, + int *ncatsp, double *critmax, int *nhit, + int *maxcat, int *ncmax, int *ncsplit); + #endif + + #ifndef WIN64 + void catmax_(double *parentDen, double *tclasscat, + double *tclasspop, int *nclass, int *lcat, + int *ncatsp, double *critmax, int *nhit, + int *maxcat, int *ncmax, int *ncsplit); + #endif +} +extern "C"{ + #ifdef WIN64 + void _catmaxb_(double *totalWt, double *tclasscat, double *classCount, + int *nclass, int *nCat, int *nbest, double *critmax, + int *nhit, double *catCount) ; + #endif + + #ifndef WIN64 + void catmaxb_(double *totalWt, double *tclasscat, double *classCount, + int *nclass, int *nCat, int *nbest, double *critmax, + int *nhit, double *catCount) ; + #endif +} + +#ifdef WIN64 +void F77_NAME(_catmax) +#endif + +#ifndef WIN64 +void F77_NAME(catmax) +#endif +(double *parentDen, double *tclasscat, + double *tclasspop, int *nclass, int *lcat, + int *ncatsp, double *critmax, int *nhit, + int *maxcat, int *ncmax, int *ncsplit) { +/* This finds the best split of a categorical variable with lcat + categories and nclass classes, where tclasscat(j, k) is the number + of cases in class j with category value k. The method uses an + exhaustive search over all partitions of the category values if the + number of categories is 10 or fewer. Otherwise ncsplit randomly + selected splits are tested and best used. */ + int j, k, n, icat[32], nsplit; + double leftNum, leftDen, rightNum, decGini, *leftCatClassCount; + + leftCatClassCount = (double *) calloc(*nclass, sizeof(double)); + *nhit = 0; + nsplit = *lcat > *ncmax ? + *ncsplit : (int) pow(2.0, (double) *lcat - 1) - 1; + + for (n = 0; n < nsplit; ++n) { + zeroInt(icat, 32); + if (*lcat > *ncmax) { + /* Generate random split. + TODO: consider changing to generating random bits with more + efficient algorithm */ + for (j = 0; j < *lcat; ++j) icat[j] = unif_rand() > 0.5 ? 1 : 0; + } else { + unpack((unsigned int) n + 1, icat); + } + for (j = 0; j < *nclass; ++j) { + leftCatClassCount[j] = 0; + for (k = 0; k < *lcat; ++k) { + if (icat[k]) { + leftCatClassCount[j] += tclasscat[j + k * *nclass]; + } + } + } + leftNum = 0.0; + leftDen = 0.0; + for (j = 0; j < *nclass; ++j) { + leftNum += leftCatClassCount[j] * leftCatClassCount[j]; + leftDen += leftCatClassCount[j]; + } + /* If either node is empty, try another split. */ + if (leftDen <= 1.0e-8 || *parentDen - leftDen <= 1.0e-5) continue; + rightNum = 0.0; + for (j = 0; j < *nclass; ++j) { + leftCatClassCount[j] = tclasspop[j] - leftCatClassCount[j]; + rightNum += leftCatClassCount[j] * leftCatClassCount[j]; + } + decGini = (leftNum / leftDen) + (rightNum / (*parentDen - leftDen)); + if (decGini > *critmax) { + *critmax = decGini; + *nhit = 1; + *ncatsp = *lcat > *ncmax ? pack((unsigned int) *lcat, icat) : n + 1; + } + } + free(leftCatClassCount); +} + + + +/* Find best split of with categorical variable when there are two classes */ +#ifdef WIN64 +void F77_NAME(_catmaxb) +#endif +#ifndef WIN64 +void F77_NAME(catmaxb) +#endif +(double *totalWt, double *tclasscat, double *classCount, + int *nclass, int *nCat, int *nbest, double *critmax, + int *nhit, double *catCount) { + + double catProportion[32], cp[32], cm[32]; + int kcat[32]; + int i, j; + double bestsplit=0.0, rightDen, leftDen, leftNum, rightNum, crit; + + *nhit = 0; + for (i = 0; i < *nCat; ++i) { + catProportion[i] = catCount[i] ? + tclasscat[i * *nclass] / catCount[i] : 0.0; + kcat[i] = i + 1; + } + R_qsort_I(catProportion, kcat, 1, *nCat); + for (i = 0; i < *nclass; ++i) { + cp[i] = 0; + cm[i] = classCount[i]; + } + rightDen = *totalWt; + leftDen = 0.0; + for (i = 0; i < *nCat - 1; ++i) { + leftDen += catCount[kcat[i]-1]; + rightDen -= catCount[kcat[i]-1]; + leftNum = 0.0; + rightNum = 0.0; + for (j = 0; j < *nclass; ++j) { + cp[j] += tclasscat[j + (kcat[i]-1) * *nclass]; + cm[j] -= tclasscat[j + (kcat[i]-1) * *nclass]; + leftNum += cp[j] * cp[j]; + rightNum += cm[j] * cm[j]; + } + if (catProportion[i] < catProportion[i + 1]) { + /* If neither node is empty, check the split. */ + if (rightDen > 1.0e-5 && leftDen > 1.0e-5) { + crit = (leftNum / leftDen) + (rightNum / rightDen); + if (crit > *critmax) { + *critmax = crit; + bestsplit = .5 * (catProportion[i] + catProportion[i + 1]); + *nhit = 1; + } + } + } + } + if (*nhit == 1) { + zeroInt(kcat, *nCat); + for (i = 0; i < *nCat; ++i) { + catProportion[i] = catCount[i] ? + tclasscat[i * *nclass] / catCount[i] : 0.0; + kcat[i] = catProportion[i] < bestsplit ? 1 : 0; + } + *nbest = pack(*nCat, kcat); + } +} + + + +void predictClassTree(double *x, int n, int mdim, int *treemap, + int *nodestatus, double *xbestsplit, + int *bestvar, int *nodeclass, + int treeSize, int *cat, int nclass, + int *jts, int *nodex, int maxcat) { + int m, i, j, k, *cbestsplit; + unsigned int npack; + + //Rprintf("maxcat %d\n",maxcat); + /* decode the categorical splits */ + if (maxcat > 1) { + cbestsplit = (int *) calloc(maxcat * treeSize, sizeof(int)); + zeroInt(cbestsplit, maxcat * treeSize); + for (i = 0; i < treeSize; ++i) { + if (nodestatus[i] != NODE_TERMINAL) { + if (cat[bestvar[i] - 1] > 1) { + npack = (unsigned int) xbestsplit[i]; + /* unpack `npack' into bits */ + for (j = 0; npack; npack >>= 1, ++j) { + cbestsplit[j + i*maxcat] = npack & 01; + } + } + } + } + } + for (i = 0; i < n; ++i) { + k = 0; + while (nodestatus[k] != NODE_TERMINAL) { + m = bestvar[k] - 1; + if (cat[m] == 1) { + /* Split by a numerical predictor */ + k = (x[m + i * mdim] <= xbestsplit[k]) ? + treemap[k * 2] - 1 : treemap[1 + k * 2] - 1; + } else { + /* Split by a categorical predictor */ + k = cbestsplit[(int) x[m + i * mdim] - 1 + k * maxcat] ? + treemap[k * 2] - 1 : treemap[1 + k * 2] - 1; + } + } + /* Terminal node: assign class label */ + jts[i] = nodeclass[k]; + nodex[i] = k + 1; + } + if (maxcat > 1) free(cbestsplit); +} diff --git a/randomforest-matlab/RF_Class_C/src/classTree2.cpp b/randomforest-matlab/RF_Class_C/src/classTree2.cpp new file mode 100644 index 0000000..dd89c00 --- /dev/null +++ b/randomforest-matlab/RF_Class_C/src/classTree2.cpp @@ -0,0 +1,308 @@ +/************************************************************** +* mex interface to Andy Liaw et al.'s C code (used in R package randomForest) +* Added by Abhishek Jaiantilal ( abhishek.jaiantilal@colorado.edu ) +* License: GPLv2 +* Version: 0.02 +* +* File: contains all the other supporting code for a standalone C or mex for +* Classification RF. +* Copied all the code from the randomForest 4.5-28 or was it -29? +* added externs "C"'s and the F77_* macros +* +*************************************************************/ + +/******************************************************************* +Copyright (C) 2001-7 Leo Breiman, Adele Cutler and Merck & Co., Inc. + +This program is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public License +as published by the Free Software Foundation; either version 2 +of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. +*******************************************************************/ +#include "rf.h" +#include "memory.h" +#include "stdlib.h" +#include "math.h" + +#ifdef MATLAB +#define Rprintf mexPrintf +#include "mex.h" +#endif + +#ifndef MATLAB +#define Rprintf printf +#include "stdio.h" +#endif + +typedef unsigned long uint32; +extern void seedMT(uint32 seed); +extern uint32 reloadMT(void); +extern uint32 randomMT(void); +extern double unif_rand(); +extern void R_qsort_I(double *v, int *I, int i, int j); + +extern "C"{ +#ifdef WIN64 + void _catmax_(double *parentDen, double *tclasscat, + double *tclasspop, int *nclass, int *lcat, + int *ncatsp, double *critmax, int *nhit, + int *maxcat, int *ncmax, int *ncsplit); +#endif + +#ifndef WIN64 + void catmax_(double *parentDen, double *tclasscat, + double *tclasspop, int *nclass, int *lcat, + int *ncatsp, double *critmax, int *nhit, + int *maxcat, int *ncmax, int *ncsplit); +#endif +} +extern "C"{ +#ifdef WIN64 + void _catmaxb_(double *totalWt, double *tclasscat, double *classCount, + int *nclass, int *nCat, int *nbest, double *critmax, + int *nhit, double *catCount) ; +#endif + +#ifndef WIN64 + void catmaxb_(double *totalWt, double *tclasscat, double *classCount, + int *nclass, int *nCat, int *nbest, double *critmax, + int *nhit, double *catCount) ; +#endif +} + +#ifdef WIN64 +void F77_NAME(_catmax) +#endif + +#ifndef WIN64 +void F77_NAME(catmax) +#endif +(double *parentDen, double *tclasscat, + double *tclasspop, int *nclass, int *lcat, + int *ncatsp, double *critmax, int *nhit, + int *maxcat, int *ncmax, int *ncsplit) { + /* This finds the best split of a categorical variable with lcat + categories and nclass classes, where tclasscat(j, k) is the number + of cases in class j with category value k. The method uses an + exhaustive search over all partitions of the category values if the + number of categories is 10 or fewer. Otherwise ncsplit randomly + selected splits are tested and best used. */ + int j, k, n, icat[32], nsplit; + double leftNum, leftDen, rightNum, decGini, *leftCatClassCount; + + leftCatClassCount = (double *) calloc(*nclass, sizeof(double)); + *nhit = 0; + nsplit = *lcat > *ncmax ? + *ncsplit : (int) pow(2.0, (double) *lcat - 1) - 1; + + for (n = 0; n < nsplit; ++n) { + zeroInt(icat, 32); + if (*lcat > *ncmax) { + /* Generate random split. + TODO: consider changing to generating random bits with more + efficient algorithm */ + for (j = 0; j < *lcat; ++j) icat[j] = unif_rand() > 0.5 ? 1 : 0; + } else { + unpack((unsigned int) n + 1, icat); + } + for (j = 0; j < *nclass; ++j) { + leftCatClassCount[j] = 0; + for (k = 0; k < *lcat; ++k) { + if (icat[k]) { + leftCatClassCount[j] += tclasscat[j + k * *nclass]; + } + } + } + leftNum = 0.0; + leftDen = 0.0; + for (j = 0; j < *nclass; ++j) { + leftNum += leftCatClassCount[j] * leftCatClassCount[j]; + leftDen += leftCatClassCount[j]; + } + /* If either node is empty, try another split. */ + if (leftDen <= 1.0e-8 || *parentDen - leftDen <= 1.0e-5) continue; + rightNum = 0.0; + for (j = 0; j < *nclass; ++j) { + leftCatClassCount[j] = tclasspop[j] - leftCatClassCount[j]; + rightNum += leftCatClassCount[j] * leftCatClassCount[j]; + } + decGini = (leftNum / leftDen) + (rightNum / (*parentDen - leftDen)); + if (decGini > *critmax) { + *critmax = decGini; + *nhit = 1; + *ncatsp = *lcat > *ncmax ? pack((unsigned int) *lcat, icat) : n + 1; + } + } + free(leftCatClassCount); +} + + + +/* Find best split of with categorical variable when there are two classes */ +#ifdef WIN64 +void F77_NAME(_catmaxb) +#endif +#ifndef WIN64 +void F77_NAME(catmaxb) +#endif +(double *totalWt, double *tclasscat, double *classCount, + int *nclass, int *nCat, int *nbest, double *critmax, + int *nhit, double *catCount) { + + double catProportion[32], cp[32], cm[32]; + int kcat[32]; + int i, j; + double bestsplit=0.0, rightDen, leftDen, leftNum, rightNum, crit; + + *nhit = 0; + for (i = 0; i < *nCat; ++i) { + catProportion[i] = catCount[i] ? + tclasscat[i * *nclass] / catCount[i] : 0.0; + kcat[i] = i + 1; + } + R_qsort_I(catProportion, kcat, 1, *nCat); + for (i = 0; i < *nclass; ++i) { + cp[i] = 0; + cm[i] = classCount[i]; + } + rightDen = *totalWt; + leftDen = 0.0; + for (i = 0; i < *nCat - 1; ++i) { + leftDen += catCount[kcat[i]-1]; + rightDen -= catCount[kcat[i]-1]; + leftNum = 0.0; + rightNum = 0.0; + for (j = 0; j < *nclass; ++j) { + cp[j] += tclasscat[j + (kcat[i]-1) * *nclass]; + cm[j] -= tclasscat[j + (kcat[i]-1) * *nclass]; + leftNum += cp[j] * cp[j]; + rightNum += cm[j] * cm[j]; + } + if (catProportion[i] < catProportion[i + 1]) { + /* If neither node is empty, check the split. */ + if (rightDen > 1.0e-5 && leftDen > 1.0e-5) { + crit = (leftNum / leftDen) + (rightNum / rightDen); + if (crit > *critmax) { + *critmax = crit; + bestsplit = .5 * (catProportion[i] + catProportion[i + 1]); + *nhit = 1; + } + } + } + } + if (*nhit == 1) { + zeroInt(kcat, *nCat); + for (i = 0; i < *nCat; ++i) { + catProportion[i] = catCount[i] ? + tclasscat[i * *nclass] / catCount[i] : 0.0; + kcat[i] = catProportion[i] < bestsplit ? 1 : 0; + } + *nbest = pack(*nCat, kcat); + } +} + + + +void predictClassTree(double *x, int n, int mdim, int *nrnodes, int *treemap, + int *nodestatus, double *xbestsplit, + int *bestvar, int *nodeclass, + int treeSize, int *cat, int nclass, + int *jts, int *nodex, int maxcat) { + int m, i, j, k, *cbestsplit; + unsigned int npack; + + //Rprintf("maxcat %d\n",maxcat); + /* decode the categorical splits */ + if (maxcat > 1) { + cbestsplit = (int *) calloc(maxcat * treeSize, sizeof(int)); + zeroInt(cbestsplit, maxcat * treeSize); + for (i = 0; i < treeSize; ++i) { + if (nodestatus[i] != NODE_TERMINAL) { + if (cat[bestvar[i] - 1] > 1) { + npack = (unsigned int) xbestsplit[i]; + /* unpack `npack' into bits */ + for (j = 0; npack; npack >>= 1, ++j) { + cbestsplit[j + i*maxcat] = npack & 01; + } + } + } + } + } + // Rprintf( "nrnodes: %d\n", *nrnodes ); + for (i = 0; i < n; ++i) { + // Rprintf( "#sample: %d\n", i ); + k = 0; + while (nodestatus[k] != NODE_TERMINAL) { + m = bestvar[k] - 1; + if (cat[m] == 1) { + /* Split by a numerical predictor */ + // Rprintf( "\t***x: %.3f\tsplit: %.3f\tbestvar: %.d\n", x[m + i * mdim], xbestsplit[k], m); + k = (x[m + i * mdim] <= xbestsplit[k]) ? + treemap[k] - 1 : treemap[k + *nrnodes] - 1; + // Rprintf( "k: %d\n", k+1 ); + } else { + /* Split by a categorical predictor */ + k = cbestsplit[(int) x[m + i * mdim] - 1 + k * maxcat] ? + treemap[k] - 1 : treemap[k + *nrnodes] - 1; + } + } + /* Terminal node: assign class label */ + jts[i] = nodeclass[k]; + nodex[i] = k + 1; + } + if (maxcat > 1) free(cbestsplit); +} + +void predictClassTree(double *x, int n, int mdim, int *treemap, + int *nodestatus, double *xbestsplit, + int *bestvar, int *nodeclass, + int treeSize, int *cat, int nclass, + int *jts, int *nodex, int maxcat) { + int m, i, j, k, *cbestsplit; + unsigned int npack; + + //Rprintf("maxcat %d\n",maxcat); + /* decode the categorical splits */ + if (maxcat > 1) { + cbestsplit = (int *) calloc(maxcat * treeSize, sizeof(int)); + zeroInt(cbestsplit, maxcat * treeSize); + for (i = 0; i < treeSize; ++i) { + if (nodestatus[i] != NODE_TERMINAL) { + if (cat[bestvar[i] - 1] > 1) { + npack = (unsigned int) xbestsplit[i]; + /* unpack `npack' into bits */ + for (j = 0; npack; npack >>= 1, ++j) { + cbestsplit[j + i*maxcat] = npack & 01; + } + } + } + } + } + for (i = 0; i < n; ++i) { + // Rprintf( "#sample: %d\n", i ); + k = 0; + while (nodestatus[k] != NODE_TERMINAL) { + m = bestvar[k] - 1; + if (cat[m] == 1) { + /* Split by a numerical predictor */ + // Rprintf( "\t***x: %.3f\tsplit: %.3f\tbestvar: %d\n", x[m + i * mdim], xbestsplit[k], m); + k = (x[m + i * mdim] <= xbestsplit[k]) ? + treemap[k * 2] - 1 : treemap[1 + k * 2] - 1; + // Rprintf( "k: %d\n", k+1 ); + } else { + /* Split by a categorical predictor */ + k = cbestsplit[(int) x[m + i * mdim] - 1 + k * maxcat] ? + treemap[k * 2] - 1 : treemap[1 + k * 2] - 1; + } + } + /* Terminal node: assign class label */ + jts[i] = nodeclass[k]; + nodex[i] = k + 1; + } + if (maxcat > 1) free(cbestsplit); +} diff --git a/randomforest-matlab/RF_Class_C/src/cokus.cpp b/randomforest-matlab/RF_Class_C/src/cokus.cpp new file mode 100644 index 0000000..0c04a05 --- /dev/null +++ b/randomforest-matlab/RF_Class_C/src/cokus.cpp @@ -0,0 +1,196 @@ +// This is the Mersenne Twister random number generator MT19937, which +// generates pseudorandom integers uniformly distributed in 0..(2^32 - 1) +// starting from any odd seed in 0..(2^32 - 1). This version is a recode +// by Shawn Cokus (Cokus@math.washington.edu) on March 8, 1998 of a version by +// Takuji Nishimura (who had suggestions from Topher Cooper and Marc Rieffel in +// July-August 1997). +// +// Effectiveness of the recoding (on Goedel2.math.washington.edu, a DEC Alpha +// running OSF/1) using GCC -O3 as a compiler: before recoding: 51.6 sec. to +// generate 300 million random numbers; after recoding: 24.0 sec. for the same +// (i.e., 46.5% of original time), so speed is now about 12.5 million random +// number generations per second on this machine. +// +// According to the URL +// (and paraphrasing a bit in places), the Mersenne Twister is ``designed +// with consideration of the flaws of various existing generators,'' has +// a period of 2^19937 - 1, gives a sequence that is 623-dimensionally +// equidistributed, and ``has passed many stringent tests, including the +// die-hard test of G. Marsaglia and the load test of P. Hellekalek and +// S. Wegenkittl.'' It is efficient in memory usage (typically using 2506 +// to 5012 bytes of static data, depending on data type sizes, and the code +// is quite short as well). It generates random numbers in batches of 624 +// at a time, so the caching and pipelining of modern systems is exploited. +// It is also divide- and mod-free. +// +// This library is free software; you can redistribute it and/or modify it +// under the terms of the GNU Library General Public License as published by +// the Free Software Foundation (either version 2 of the License or, at your +// option, any later version). This library is distributed in the hope that +// it will be useful, but WITHOUT ANY WARRANTY, without even the implied +// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See +// the GNU Library General Public License for more details. You should have +// received a copy of the GNU Library General Public License along with this +// library; if not, write to the Free Software Foundation, Inc., 59 Temple +// Place, Suite 330, Boston, MA 02111-1307, USA. +// +// The code as Shawn received it included the following notice: +// +// Copyright (C) 1997 Makoto Matsumoto and Takuji Nishimura. When +// you use this, send an e-mail to with +// an appropriate reference to your work. +// +// It would be nice to CC: when you write. +// + +//#include +//#include + +// +// uint32 must be an unsigned integer type capable of holding at least 32 +// bits; exactly 32 should be fastest, but 64 is better on an Alpha with +// GCC at -O3 optimization so try your options and see whats best for you +// + +typedef unsigned long uint32; + +#define N (624) // length of state vector +#define M (397) // a period parameter +#define K (0x9908B0DFU) // a magic constant +#define hiBit(u) ((u) & 0x80000000U) // mask all but highest bit of u +#define loBit(u) ((u) & 0x00000001U) // mask all but lowest bit of u +#define loBits(u) ((u) & 0x7FFFFFFFU) // mask the highest bit of u +#define mixBits(u, v) (hiBit(u)|loBits(v)) // move hi bit of u to hi bit of v + +static uint32 state[N+1]; // state vector + 1 extra to not violate ANSI C +static uint32 *next; // next random value is computed from here +static int left = -1; // can *next++ this many times before reloading + + +void seedMT(uint32 seed) + { + // + // We initialize state[0..(N-1)] via the generator + // + // x_new = (69069 * x_old) mod 2^32 + // + // from Line 15 of Table 1, p. 106, Sec. 3.3.4 of Knuths + // _The Art of Computer Programming_, Volume 2, 3rd ed. + // + // Notes (SJC): I do not know what the initial state requirements + // of the Mersenne Twister are, but it seems this seeding generator + // could be better. It achieves the maximum period for its modulus + // (2^30) iff x_initial is odd (p. 20-21, Sec. 3.2.1.2, Knuth); if + // x_initial can be even, you have sequences like 0, 0, 0, ...; + // 2^31, 2^31, 2^31, ...; 2^30, 2^30, 2^30, ...; 2^29, 2^29 + 2^31, + // 2^29, 2^29 + 2^31, ..., etc. so I force seed to be odd below. + // + // Even if x_initial is odd, if x_initial is 1 mod 4 then + // + // the lowest bit of x is always 1, + // the next-to-lowest bit of x is always 0, + // the 2nd-from-lowest bit of x alternates ... 0 1 0 1 0 1 0 1 ... , + // the 3rd-from-lowest bit of x 4-cycles ... 0 1 1 0 0 1 1 0 ... , + // the 4th-from-lowest bit of x has the 8-cycle ... 0 0 0 1 1 1 1 0 ... , + // ... + // + // and if x_initial is 3 mod 4 then + // + // the lowest bit of x is always 1, + // the next-to-lowest bit of x is always 1, + // the 2nd-from-lowest bit of x alternates ... 0 1 0 1 0 1 0 1 ... , + // the 3rd-from-lowest bit of x 4-cycles ... 0 0 1 1 0 0 1 1 ... , + // the 4th-from-lowest bit of x has the 8-cycle ... 0 0 1 1 1 1 0 0 ... , + // ... + // + // The generators potency (min. s>=0 with (69069-1)^s = 0 mod 2^32) is + // 16, which seems to be alright by p. 25, Sec. 3.2.1.3 of Knuth. It + // also does well in the dimension 2..5 spectral tests, but it could be + // better in dimension 6 (Line 15, Table 1, p. 106, Sec. 3.3.4, Knuth). + // + // Note that the random number user does not see the values generated + // here directly since reloadMT() will always munge them first, so maybe + // none of all of this matters. In fact, the seed values made here could + // even be extra-special desirable if the Mersenne Twister theory says + // so-- thats why the only change I made is to restrict to odd seeds. + // + + register uint32 x = (seed | 1U) & 0xFFFFFFFFU, *s = state; + register int j; + + for(left=0, *s++=x, j=N; --j; + *s++ = (x*=69069U) & 0xFFFFFFFFU); + } + + +uint32 reloadMT(void) + { + register uint32 *p0=state, *p2=state+2, *pM=state+M, s0, s1; + register int j; + + if(left < -1) + seedMT(4357U); + + left=N-1, next=state+1; + + for(s0=state[0], s1=state[1], j=N-M+1; --j; s0=s1, s1=*p2++) + *p0++ = *pM++ ^ (mixBits(s0, s1) >> 1) ^ (loBit(s1) ? K : 0U); + + for(pM=state, j=M; --j; s0=s1, s1=*p2++) + *p0++ = *pM++ ^ (mixBits(s0, s1) >> 1) ^ (loBit(s1) ? K : 0U); + + s1=state[0], *p0 = *pM ^ (mixBits(s0, s1) >> 1) ^ (loBit(s1) ? K : 0U); + s1 ^= (s1 >> 11); + s1 ^= (s1 << 7) & 0x9D2C5680U; + s1 ^= (s1 << 15) & 0xEFC60000U; + return(s1 ^ (s1 >> 18)); + } + + +uint32 randomMT(void) + { + uint32 y; + + if(--left < 0) + return(reloadMT()); + + y = *next++; + y ^= (y >> 11); + y ^= (y << 7) & 0x9D2C5680U; + y ^= (y << 15) & 0xEFC60000U; + y ^= (y >> 18); + return(y); + } + +/* + #define uint32 unsigned long +#define SMALL_INT char +#define SMALL_INT_CLASS mxCHAR_CLASS +void seedMT(uint32 seed); +uint32 randomMT(void); + +#include "stdio.h" +#include "math.h" + +int main(void) + { + int j; + + // you can seed with any uint32, but the best are odds in 0..(2^32 - 1) + + seedMT(4357U); + uint32 MAX=pow(2,32)-1; + // print the first 2,002 random numbers seven to a line as an example + + for(j=0; j<2002; j++) + printf(" %10lu%s", (unsigned long) randomMT(), (j%7)==6 ? "\n" : ""); + + for(j=0; j<2002; j++) + printf(" %f%s", ((double)randomMT()/(double)MAX), (j%7)==6 ? "\n" : ""); + + + return(1); + } +*/ + + diff --git a/randomforest-matlab/RF_Class_C/src/cokus_test.cpp b/randomforest-matlab/RF_Class_C/src/cokus_test.cpp new file mode 100644 index 0000000..b1fb2cb --- /dev/null +++ b/randomforest-matlab/RF_Class_C/src/cokus_test.cpp @@ -0,0 +1,42 @@ +//this is a simple file to test the cokus.cpp mersenne twister code. + +//free code with no guarantee. No restrictions on usage +//written by: Abhishek Jaiantilal ( abhishek.jaiantilal@colorado.edu ) + +#define uint32 unsigned long +#define SMALL_INT char +#define SMALL_INT_CLASS mxCHAR_CLASS +extern void seedMT(uint32 seed); +extern uint32 randomMT(void); + +#include "stdio.h" +#include "math.h" + +//generate lots of random number and check if they are within the limits +//else cry about it + +int main(void) { + int j, k; + + // you can seed with any uint32, but the best are odds in 0..(2^32 - 1) + + seedMT(4357); + uint32 MAX=pow(2, 32)-1; + +// print the first 2,002 random numbers seven to a line as an example +// for(j=0; j<2002; j++) +// printf(" %10lu%s", (unsigned long) randomMT(), (j%7)==6 ? "\n" : ""); + + double test_val; + for(k=0;k<100;k++) + for(j=0; j<2000002; j++) { + test_val = ((double)randomMT()/(double)MAX); + if (test_val>=1.0){ + printf("Problem"); + return(0); + } + //printf(" %f%s", test_val , (j%7)==6 ? "\n" : ""); + } + printf("Success"); + return(1); +} diff --git a/randomforest-matlab/RF_Class_C/src/mex_ClassificationRF_predict.cpp b/randomforest-matlab/RF_Class_C/src/mex_ClassificationRF_predict.cpp new file mode 100644 index 0000000..7cc0931 --- /dev/null +++ b/randomforest-matlab/RF_Class_C/src/mex_ClassificationRF_predict.cpp @@ -0,0 +1,163 @@ +#include +#include "mex.h" +#include "memory.h" + +#define DEBUG_ON 0 +void classForest(int *mdim, int *ntest, int *nclass, int *maxcat, + int *nrnodes, int *ntree, double *x, double *xbestsplit, + double *pid, double *cutoff, double *countts, int *treemap, + int *nodestatus, int *cat, int *nodeclass, int *jts, + int *jet, int *bestvar, int *node, int *treeSize, + int *keepPred, int *prox, double *proxMat, int *nodes); + +void mexFunction( int nlhs, mxArray *plhs[], + int nrhs, const mxArray*prhs[] ) + +{ + if (DEBUG_ON) { mexPrintf("Number of parameters passed %d\n",nrhs);fflush(stdout);} + + int i; + int p_size = mxGetM(prhs[0]);int mdim = p_size; + int n_size = mxGetN(prhs[0]);int nsample=n_size; + int dimx[]={p_size, n_size}; + + if (DEBUG_ON) { mexPrintf("p_size %d, n_size %d\n",p_size,n_size);fflush(stdout);} + + + int nclass = (int)mxGetScalar(prhs[11]); + + int* cat = (int*)calloc(p_size,sizeof(int)); + for(i=0;i +#include +#include "mex.h" + +#define DEBUG_ON 0 +void classRF(double *x, int *dimx, int *cl, int *ncl, int *cat, int *maxcat, + int *sampsize, int *strata, int *Options, int *ntree, int *nvar, + int *ipi, double *classwt, double *cut, int *nodesize, + int *outcl, int *counttr, double *prox, + double *imprt, double *impsd, double *impmat, int *nrnodes, + int *ndbigtree, int *nodestatus, int *bestvar, int *treemap, + int *nodeclass, double *xbestsplit, double *errtr, + int *testdat, double *xts, int *clts, int *nts, double *countts, + int *outclts, int labelts, double *proxts, double *errts, + int *inbag); + +void mexFunction( int nlhs, mxArray *plhs[], + int nrhs, const mxArray*prhs[] ) + +{ + if(nrhs==15); + else{ + printf("Too less parameters: You supplied %d",nrhs); + return; + } + + double *_tmp_d; + + int i; + int p_size = mxGetM(prhs[0]); + int n_size = mxGetN(prhs[0]); + double *x = mxGetPr(prhs[0]); + int *y = (int*)mxGetData(prhs[1]); + int dimx[]={p_size, n_size}; + + if (DEBUG_ON){ + //print few of the values + //for(i=0;i<10;i++) + // mexPrintf("%d,",y[i]); + } + + int nclass = (int)((double)mxGetScalar(prhs[2])); + double nclass_d = ((double)mxGetScalar(prhs[2])); + int* cat = (int*)mxGetData(prhs[5]);//calloc(p_size,sizeof(int)); + //for(i=0;i n_max = 2'097'151 + * now k = 31 -> n_max = 4294'967'295 + */ + NUMERIC vt, vtt; + double R = 0.375; + int ii, ij, k, l, m; + #ifdef qsort_Index + int it, tt; + #endif + + + /* 1-indexing for I[], v[] (and `i' and `j') : */ + --v; + #ifdef qsort_Index + --I; + #endif + + ii = i;/* save */ + m = 1; + + L10: + if (i < j) { + if (R < 0.5898437) R += 0.0390625; else R -= 0.21875; + L20: + k = i; + /* ij = (j + i) >> 1; midpoint */ + ij = i + (int)((j - i)*R); + #ifdef qsort_Index + it = I[ij]; + #endif + vt = v[ij]; + if (v[i] > vt) { + #ifdef qsort_Index + I[ij] = I[i]; I[i] = it; it = I[ij]; + #endif + v[ij] = v[i]; v[i] = vt; vt = v[ij]; + } + /* L30:*/ + l = j; + if (v[j] < vt) { + #ifdef qsort_Index + I[ij] = I[j]; I[j] = it; it = I[ij]; + #endif + v[ij] = v[j]; v[j] = vt; vt = v[ij]; + if (v[i] > vt) { + #ifdef qsort_Index + I[ij] = I[i]; I[i] = it; it = I[ij]; + #endif + v[ij] = v[i]; v[i] = vt; vt = v[ij]; + } + } + + for(;;) { /*L50:*/ + //do l--; while (v[l] > vt); + l--;for(;v[l]>vt;l--); + + + #ifdef qsort_Index + tt = I[l]; + #endif + vtt = v[l]; + /*L60:*/ + //do k++; while (v[k] < vt); + k=k+1;for(;v[k] l) break; + + /* else (k <= l) : */ + #ifdef qsort_Index + I[l] = I[k]; I[k] = tt; + #endif + v[l] = v[k]; v[k] = vtt; + } + + m++; + if (l - i <= j - k) { + /*L70: */ + il[m] = k; + iu[m] = j; + j = l; + } + else { + il[m] = i; + iu[m] = l; + i = k; + } + }else { /* i >= j : */ + + L80: + if (m == 1) return; + + /* else */ + i = il[m]; + j = iu[m]; + m--; + } + + if (j - i > 10) goto L20; + + if (i == ii) goto L10; + + --i; + L100: + do { + ++i; + if (i == j) { + goto L80; + } + #ifdef qsort_Index + it = I[i + 1]; + #endif + vt = v[i + 1]; + } while (v[i] <= vt); + + k = i; + + do { /*L110:*/ + #ifdef qsort_Index + I[k + 1] = I[k]; + #endif + v[k + 1] = v[k]; + --k; + } while (vt < v[k]); + + #ifdef qsort_Index + I[k + 1] = it; + #endif + v[k + 1] = vt; + goto L100; +} /* R_qsort{i} */ diff --git a/randomforest-matlab/RF_Class_C/src/rf.h b/randomforest-matlab/RF_Class_C/src/rf.h new file mode 100644 index 0000000..ce500bc --- /dev/null +++ b/randomforest-matlab/RF_Class_C/src/rf.h @@ -0,0 +1,123 @@ +/************************************************************** + * mex interface to Andy Liaw et al.'s C code (used in R package randomForest) + * Added by Abhishek Jaiantilal ( abhishek.jaiantilal@colorado.edu ) + * License: GPLv2 + * Version: 0.02 + * + * other than adding the macros for F77_* and adding this message + * nothing changed . + *************************************************************/ + +/******************************************************************* + Copyright (C) 2001-7 Leo Breiman, Adele Cutler and Merck & Co., Inc. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. +*******************************************************************/ +#ifndef RF_H +#define RF_H + +/* test if the bit at position pos is turned on */ +#define isBitOn(x,pos) (((x) & (1 << (pos))) > 0) +/* swap two integers */ +#define swapInt(a, b) ((a ^= b), (b ^= a), (a ^= b)) +/* +void classRF(double *x, int *dimx, int *cl, int *ncl, int *cat, int *maxcat, + int *sampsize, int *Options, int *ntree, int *nvar, + int *ipi, double *pi, double *cut, int *nodesize, + int *outcl, int *counttr, double *prox, + double *imprt, double *, double *impmat, int *nrnodes, int *ndbigtree, + int *nodestatus, int *bestvar, int *treemap, int *nodeclass, + double *xbestsplit, double *pid, double *errtr, + int *testdat, double *xts, int *clts, int *nts, double *countts, + int *outclts, int *labelts, double *proxts, double *errts); +*/ + +#define F77_CALL(x) x ## _ +#define F77_NAME(x) F77_CALL(x) +#define F77_SUB(x) F77_CALL(x) + + +void normClassWt(int *cl, const int nsample, const int nclass, + const int useWt, double *classwt, int *classFreq); + +void classForest(int *mdim, int *ntest, int *nclass, int *maxcat, + int *nrnodes, int *jbt, double *xts, double *xbestsplit, + double *pid, double *cutoff, double *countts, int *treemap, + int *nodestatus, int *cat, int *nodeclass, int *jts, + int *jet, int *bestvar, int *nodexts, int *ndbigtree, + int *keepPred, int *prox, double *proxmatrix, int *nodes); + +void regTree(double *x, double *y, int mdim, int nsample, + int *lDaughter, int *rDaughter, double *upper, double *avnode, + int *nodestatus, int nrnodes, int *treeSize, int nthsize, + int mtry, int *mbest, int *cat, double *tgini, int *varUsed); + +void findBestSplit(double *x, int *jdex, double *y, int mdim, int nsample, + int ndstart, int ndend, int *msplit, double *decsplit, + double *ubest, int *ndendl, int *jstat, int mtry, + double sumnode, int nodecnt, int *cat); + +void predictRegTree(double *x, int nsample, int mdim, + int *lDaughter, int *rDaughter, int *nodestatus, + double *ypred, double *split, double *nodepred, + int *splitVar, int treeSize, int *cat, int maxcat, + int *nodex); + +void predictClassTree(double *x, int n, int mdim, int *treemap, + int *nodestatus, double *xbestsplit, + int *bestvar, int *nodeclass, + int ndbigtree, int *cat, int nclass, + int *jts, int *nodex, int maxcat); + +void predictClassTree(double *x, int n, int mdim, int *nrnodes, int *treemap, + int *nodestatus, double *xbestsplit, + int *bestvar, int *nodeclass, + int ndbigtree, int *cat, int nclass, + int *jts, int *nodex, int maxcat); + +int pack(int l, int *icat); +void unpack(unsigned int npack, int *icat); + +void zeroInt(int *x, int length); +void zeroDouble(double *x, int length); +void createClass(double *x, int realN, int totalN, int mdim); +void prepare(int *cl, const int nsample, const int nclass, const int ipi, + double *pi, double *pid, int *nc, double *wtt); +void makeA(double *x, const int mdim, const int nsample, int *cat, int *a, + int *b); +void modA(int *a, int *nuse, const int nsample, const int mdim, int *cat, + const int maxcat, int *ncase, int *jin); +void Xtranslate(double *x, int mdim, int nrnodes, int nsample, + int *bestvar, int *bestsplit, int *bestsplitnext, + double *xbestsplit, int *nodestatus, int *cat, int treeSize); +void permuteOOB(int m, double *x, int *in, int nsample, int mdim); +void computeProximity(double *prox, int oobprox, int *node, int *inbag, + int *oobpair, int n); + +/* Template of Fortran subroutines to be called from the C wrapper */ +/*extern void F77_NAME(buildtree)(int *a, int *b, int *cl, int *cat, + int *maxcat, int *mdim, int *nsample, + int *nclass, int *treemap, int *bestvar, + int *bestsplit, int *bestsplitnext, + double *tgini, int *nodestatus, int *nodepop, + int *nodestart, double *classpop, + double *tclasspop, double *tclasscat, + int *ta, int *nrnodes, int *, + int *, int *, int *, int *, int *, int *, + double *, double *, double *, + int *, int *, int *); +*/ +/* Node status */ +#define NODE_TERMINAL -1 +#define NODE_TOSPLIT -2 +#define NODE_INTERIOR -3 + +#endif /* RF_H */ diff --git a/randomforest-matlab/RF_Class_C/src/rfsub.f b/randomforest-matlab/RF_Class_C/src/rfsub.f new file mode 100644 index 0000000..8a34901 --- /dev/null +++ b/randomforest-matlab/RF_Class_C/src/rfsub.f @@ -0,0 +1,477 @@ +c Copyright (C) 2001-7 Leo Breiman and Adele Cutler and Merck & Co, Inc. +c This program is free software; you can redistribute it and/or +c modify it under the terms of the GNU General Public License +c as published by the Free Software Foundation; either version 2 +c of the License, or (at your option) any later version. + +c This program is distributed in the hope that it will be useful, +c but WITHOUT ANY WARRANTY; without even the implied warranty of +c MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +c GNU General Public License for more details. +c +c Modified by Andy Liaw and Matt Wiener: +c The main program is re-written as a C function to be called from R. +c All calls to the uniform RNG is replaced with R's RNG. Some subroutines +c not called are excluded. Variables and arrays declared as double as +c needed. Unused variables are deleted. +c +c SUBROUTINE BUILDTREE + + subroutine buildtree(a, b, cl, cat, maxcat, mdim, nsample, + 1 nclass, treemap, bestvar, bestsplit, bestsplitnext, tgini, + 1 nodestatus,nodepop, nodestart, classpop, tclasspop, + 1 tclasscat,ta,nrnodes, idmove, ndsize, ncase, mtry, iv, + 1 nodeclass, ndbigtree, win, wr, wl, mred, nuse, mind) + +c Buildtree consists of repeated calls to two subroutines, Findbestsplit +c and Movedata. Findbestsplit does just that--it finds the best split of +c the current node. Movedata moves the data in the split node right and +c left so that the data corresponding to each child node is contiguous. +c The buildtree bookkeeping is different from that in Friedman's original +c CART program. ncur is the total number of nodes to date. +c nodestatus(k)=1 if the kth node has been split. nodestatus(k)=2 if the +c node exists but has not yet been split, and =-1 of the node is terminal. +c A node is terminal if its size is below a threshold value, or if it is +c all one class, or if all the x-values are equal. If the current node k +c is split, then its children are numbered ncur+1 (left), and +c ncur+2(right), ncur increases to ncur+2 and the next node to be split is +c numbered k+1. When no more nodes can be split, buildtree returns to the +c main program. + + implicit double precision(a-h,o-z) + integer a(mdim, nsample), cl(nsample), cat(mdim), + 1 treemap(2,nrnodes), bestvar(nrnodes), + 1 bestsplit(nrnodes), nodestatus(nrnodes), ta(nsample), + 1 nodepop(nrnodes), nodestart(nrnodes), + 1 bestsplitnext(nrnodes), idmove(nsample), + 1 ncase(nsample), b(mdim,nsample), + 1 iv(mred), nodeclass(nrnodes), mind(mred) + + double precision tclasspop(nclass), classpop(nclass, nrnodes), + 1 tclasscat(nclass, 32), win(nsample), wr(nclass), + 1 wl(nclass), tgini(mdim), xrand + integer msplit, ntie + + msplit = 0 + call zerv(nodestatus,nrnodes) + call zerv(nodestart,nrnodes) + call zerv(nodepop,nrnodes) + call zermr(classpop,nclass,nrnodes) + + do j=1,nclass + classpop(j, 1) = tclasspop(j) + end do + ncur = 1 + nodestart(1) = 1 + nodepop(1) = nuse + nodestatus(1) = 2 +c start main loop + do 30 kbuild = 1, nrnodes + if (kbuild .gt. ncur) goto 50 + if (nodestatus(kbuild) .ne. 2) goto 30 +c initialize for next call to findbestsplit + ndstart = nodestart(kbuild) + ndend = ndstart + nodepop(kbuild) - 1 + do j = 1, nclass + tclasspop(j) = classpop(j,kbuild) + end do + jstat = 0 + + call findbestsplit(a,b,cl,mdim,nsample,nclass,cat,maxcat, + 1 ndstart, ndend,tclasspop,tclasscat,msplit, decsplit, + 1 nbest,ncase, jstat,mtry,win,wr,wl,mred,mind) + if (jstat .eq. -1) then + nodestatus(kbuild) = -1 + goto 30 + else + bestvar(kbuild) = msplit + iv(msplit) = 1 + if (decsplit .lt. 0.0) decsplit = 0.0 + tgini(msplit) = tgini(msplit) + decsplit + if (cat(msplit) .eq. 1) then + bestsplit(kbuild) = a(msplit,nbest) + bestsplitnext(kbuild) = a(msplit,nbest+1) + else + bestsplit(kbuild) = nbest + bestsplitnext(kbuild) = 0 + endif + endif + + call movedata(a,ta,mdim,nsample,ndstart,ndend,idmove,ncase, + 1 msplit,cat,nbest,ndendl) + +c leftnode no.= ncur+1, rightnode no. = ncur+2. + nodepop(ncur+1) = ndendl - ndstart + 1 + nodepop(ncur+2) = ndend - ndendl + nodestart(ncur+1) = ndstart + nodestart(ncur+2) = ndendl + 1 + +c find class populations in both nodes + do n = ndstart, ndendl + nc = ncase(n) + j=cl(nc) + classpop(j,ncur+1) = classpop(j,ncur+1) + win(nc) + end do + do n = ndendl+1, ndend + nc = ncase(n) + j = cl(nc) + classpop(j,ncur+2) = classpop(j,ncur+2) + win(nc) + end do +c check on nodestatus + nodestatus(ncur+1) = 2 + nodestatus(ncur+2) = 2 + if (nodepop(ncur+1).le.ndsize) nodestatus(ncur+1) = -1 + if (nodepop(ncur+2).le.ndsize) nodestatus(ncur+2) = -1 + popt1 = 0 + popt2 = 0 + do j = 1, nclass + popt1 = popt1 + classpop(j,ncur+1) + popt2 = popt2 + classpop(j,ncur+2) + end do + + do j=1,nclass + if (classpop(j,ncur+1).eq.popt1) nodestatus(ncur+1) = -1 + if (classpop(j,ncur+2).eq.popt2) nodestatus(ncur+2) = -1 + end do + + treemap(1,kbuild) = ncur + 1 + treemap(2,kbuild) = ncur + 2 + nodestatus(kbuild) = 1 + ncur = ncur+2 + if (ncur.ge.nrnodes) goto 50 + + 30 continue + 50 continue + + ndbigtree = nrnodes + do k=nrnodes, 1, -1 + if (nodestatus(k) .eq. 0) ndbigtree = ndbigtree - 1 + if (nodestatus(k) .eq. 2) nodestatus(k) = -1 + end do + +c form prediction in terminal nodes + do kn = 1, ndbigtree + if(nodestatus(kn) .eq. -1) then + pp = 0 + ntie = 1 + do j = 1, nclass + if (classpop(j,kn) .gt. pp) then + nodeclass(kn) = j + pp = classpop(j,kn) + end if +c Break ties at random: + if (classpop(j,kn) .eq. pp) then + ntie = ntie + 1 + call rrand(xrand) + if (xrand .lt. 1.0 / ntie) then + nodeclass(kn)=j + pp=classpop(j,kn) + end if + end if + end do + end if + end do + + end + +c SUBROUTINE FINDBESTSPLIT +c For the best split, msplit is the variable split on. decsplit is the +c dec. in impurity. If msplit is numerical, nsplit is the case number +c of value of msplit split on, and nsplitnext is the case number of the +c next larger value of msplit. If msplit is categorical, then nsplit is +c the coding into an integer of the categories going left. + subroutine findbestsplit(a, b, cl, mdim, nsample, nclass, cat, + 1 maxcat, ndstart, ndend, tclasspop, tclasscat, msplit, + 2 decsplit, nbest, ncase, jstat, mtry, win, wr, wl, + 3 mred, mind) + implicit double precision(a-h,o-z) + integer a(mdim,nsample), cl(nsample), cat(mdim), + 1 ncase(nsample), b(mdim,nsample), nn, j + double precision tclasspop(nclass), tclasscat(nclass,32), dn(32), + 1 win(nsample), wr(nclass), wl(nclass), xrand + integer mind(mred), ncmax, ncsplit,nhit, ntie + ncmax = 10 + ncsplit = 512 +c compute initial values of numerator and denominator of Gini + pno = 0.0 + pdo = 0.0 + do j = 1, nclass + pno = pno + tclasspop(j) * tclasspop(j) + pdo = pdo + tclasspop(j) + end do + crit0 = pno / pdo + jstat = 0 + +c start main loop through variables to find best split + critmax = -1.0e25 + do k = 1, mred + mind(k) = k + end do + nn = mred +c sampling mtry variables w/o replacement. + do mt = 1, mtry + call rrand(xrand) + j = int(nn * xrand) + 1 + mvar = mind(j) + mind(j) = mind(nn) + mind(nn) = mvar + nn = nn - 1 + lcat = cat(mvar) + if (lcat .eq. 1) then +c Split on a numerical predictor. + rrn = pno + rrd = pdo + rln = 0 + rld = 0 + call zervr(wl, nclass) + do j = 1, nclass + wr(j) = tclasspop(j) + end do + ntie = 1 + do nsp = ndstart, ndend-1 + nc = a(mvar, nsp) + u = win(nc) + k = cl(nc) + rln = rln + u * (2 * wl(k) + u) + rrn = rrn + u * (-2 * wr(k) + u) + rld = rld + u + rrd = rrd - u + wl(k) = wl(k) + u + wr(k) = wr(k) - u + if (b(mvar, nc) .lt. b(mvar, a(mvar, nsp + 1))) then +c If neither nodes is empty, check the split. + if (dmin1(rrd, rld) .gt. 1.0e-5) then + crit = (rln / rld) + (rrn / rrd) + if (crit .gt. critmax) then + nbest = nsp + critmax = crit + msplit = mvar + end if +c Break ties at random: + if (crit .eq. critmax) then + ntie = ntie + 1 + call rrand(xrand) + if (xrand .lt. 1.0 / ntie) then + nbest = nsp + critmax = crit + msplit = mvar + end if + end if + end if + end if + end do + else +c Split on a categorical predictor. Compute the decrease in impurity. + call zermr(tclasscat, nclass, 32) + do nsp = ndstart, ndend + nc = ncase(nsp) + l = a(mvar, ncase(nsp)) + tclasscat(cl(nc), l) = tclasscat(cl(nc), l) + win(nc) + end do + nnz = 0 + do i = 1, lcat + su = 0 + do j = 1, nclass + su = su + tclasscat(j, i) + end do + dn(i) = su + if(su .gt. 0) nnz = nnz + 1 + end do + nhit = 0 + if (nnz .gt. 1) then + if (nclass .eq. 2 .and. lcat .gt. ncmax) then + call catmaxb(pdo, tclasscat, tclasspop, nclass, + & lcat, nbest, critmax, nhit, dn) + else + call catmax(pdo, tclasscat, tclasspop, nclass, lcat, + & nbest, critmax, nhit, maxcat, ncmax, ncsplit) + end if + if (nhit .eq. 1) msplit = mvar +c else +c critmax = -1.0e25 + end if + end if + end do + if (critmax .lt. -1.0e10 .or. msplit .eq. 0) jstat = -1 + decsplit = critmax - crit0 + return + end + +C ============================================================== +c SUBROUTINE MOVEDATA +c This subroutine is the heart of the buildtree construction. Based on the +c best split the data in the part of the a matrix corresponding to the +c current node is moved to the left if it belongs to the left child and +c right if it belongs to the right child. + + subroutine movedata(a,ta,mdim,nsample,ndstart,ndend,idmove, + 1 ncase,msplit,cat,nbest,ndendl) + implicit double precision(a-h,o-z) + integer a(mdim,nsample),ta(nsample),idmove(nsample), + 1 ncase(nsample),cat(mdim),icat(32) + +c compute idmove=indicator of case nos. going left + + if (cat(msplit).eq.1) then + do nsp=ndstart,nbest + nc=a(msplit,nsp) + idmove(nc)=1 + end do + do nsp=nbest+1, ndend + nc=a(msplit,nsp) + idmove(nc)=0 + end do + ndendl=nbest + else + ndendl=ndstart-1 + l=cat(msplit) + call myunpack(l,nbest,icat) + do nsp=ndstart,ndend + nc=ncase(nsp) + if (icat(a(msplit,nc)).eq.1) then + idmove(nc)=1 + ndendl=ndendl+1 + else + idmove(nc)=0 + endif + end do + endif + +c shift case. nos. right and left for numerical variables. + + do 40 msh=1,mdim + if (cat(msh).eq.1) then + k=ndstart-1 + do 50 n=ndstart,ndend + ih=a(msh,n) + if (idmove(ih).eq.1) then + k=k+1 + ta(k)=a(msh,n) + endif + 50 continue + do 60 n=ndstart,ndend + ih=a(msh,n) + if (idmove(ih).eq.0) then + k=k+1 + ta(k)=a(msh,n) + endif + 60 continue + + do 70 k=ndstart,ndend + a(msh,k)=ta(k) + 70 continue + endif + + 40 continue + ndo=0 + if (ndo.eq.1) then + do 140 msh = 1, mdim + if (cat(msh) .gt. 1) then + k = ndstart - 1 + do 150 n = ndstart, ndend + ih = ncase(n) + if (idmove(ih) .eq. 1) then + k = k + 1 + ta(k) = a(msh, ih) + endif + 150 continue + do 160 n = ndstart, ndend + ih = ncase(n) + if (idmove(ih) .eq. 0) then + k = k + 1 + ta(k) = a(msh,ih) + endif + 160 continue + + do 170 k = ndstart, ndend + a(msh,k) = ta(k) + 170 continue + endif + + 140 continue + end if + +c compute case nos. for right and left nodes. + + if (cat(msplit).eq.1) then + do 80 n=ndstart,ndend + ncase(n)=a(msplit,n) + 80 continue + else + k=ndstart-1 + do 90 n=ndstart, ndend + if (idmove(ncase(n)).eq.1) then + k=k+1 + ta(k)=ncase(n) + endif + 90 continue + do 100 n=ndstart,ndend + if (idmove(ncase(n)).eq.0) then + k=k+1 + ta(k)=ncase(n) + endif + 100 continue + do 110 k=ndstart,ndend + ncase(k)=ta(k) + 110 continue + endif + + end + + subroutine myunpack(l,npack,icat) + +c npack is a long integer. The sub. returns icat, an integer of zeroes and +c ones corresponding to the coefficients in the binary expansion of npack. + + integer icat(32),npack + do j=1,32 + icat(j)=0 + end do + n=npack + icat(1)=mod(n,2) + do k=2,l + n=(n-icat(k-1))/2 + icat(k)=mod(n,2) + end do + end + + subroutine zerv(ix,m1) + integer ix(m1) + do n=1,m1 + ix(n)=0 + end do + end + + subroutine zervr(rx,m1) + double precision rx(m1) + do n=1,m1 + rx(n)=0.0d0 + end do + end + + subroutine zerm(mx,m1,m2) + integer mx(m1,m2) + do i=1,m1 + do j=1,m2 + mx(i,j)=0 + end do + end do + end + + subroutine zermr(rx,m1,m2) + double precision rx(m1,m2) + do i=1,m1 + do j=1,m2 + rx(i,j)=0.0d0 + end do + end do + end + + subroutine zermd(rx,m1,m2) + double precision rx(m1,m2) + do i=1,m1 + do j=1,m2 + rx(i,j)=0.0d0 + end do + end do + end diff --git a/randomforest-matlab/RF_Class_C/src/rfsub.o b/randomforest-matlab/RF_Class_C/src/rfsub.o new file mode 100644 index 0000000..535c613 Binary files /dev/null and b/randomforest-matlab/RF_Class_C/src/rfsub.o differ diff --git a/randomforest-matlab/RF_Class_C/src/rfutils.cpp b/randomforest-matlab/RF_Class_C/src/rfutils.cpp new file mode 100644 index 0000000..8de5c69 --- /dev/null +++ b/randomforest-matlab/RF_Class_C/src/rfutils.cpp @@ -0,0 +1,308 @@ +/******************************************************************* + Copyright (C) 2001-7 Leo Breiman, Adele Cutler and Merck & Co., Inc. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. +*******************************************************************/ +//#include +#include "rf.h" +#include "memory.h" +#include "stdlib.h" +#include "qsort.c" + +#define MAX_UINT_COKUS 4294967295 //basically 2^32-1 + +typedef unsigned long uint32; +extern void seedMT(uint32 seed); +extern uint32 reloadMT(void); +extern uint32 randomMT(void); +extern double unif_rand(); + +void zeroInt(int *x, int length) { + memset(x, 0, length * sizeof(int)); +} + +void zeroDouble(double *x, int length) { + memset(x, 0, length * sizeof(double)); +} + +void createClass(double *x, int realN, int totalN, int mdim) { +/* Create the second class by bootstrapping each variable independently. */ + int i, j, k; + for (i = realN; i < totalN; ++i) { + for (j = 0; j < mdim; ++j) { + k = (int) unif_rand() * realN; + x[j + i * mdim] = x[j + k * mdim]; + } + } +} + +#include "stdio.h" +void normClassWt(int *cl, const int nsample, const int nclass, + const int useWt, double *classwt, int *classFreq) { + int i; + double sumwt = 0.0; + //printf("useWt %d",useWt); + if (useWt) { + //printf("User supplied via priors classwt"); + /* Normalize user-supplied weights so they sum to one. */ + for (i = 0; i < nclass; ++i) sumwt += classwt[i]; + //printf("\n sumwt %f",sumwt); + for (i = 0; i < nclass; ++i) classwt[i] /= sumwt; + } else { + for (i = 0; i < nclass; ++i) { + classwt[i] = ((double) classFreq[i]) / nsample; + } + } + for (i = 0; i < nclass; ++i) { + classwt[i] = classFreq[i] ? classwt[i] * nsample / classFreq[i] : 0.0; + } +} + +void makeA(double *x, const int mdim, const int nsample, int *cat, int *a, + int *b) { + /* makeA() constructs the mdim by nsample integer array a. For each + numerical variable with values x(m, n), n=1, ...,nsample, the x-values + are sorted from lowest to highest. Denote these by xs(m, n). Then + a(m,n) is the case number in which xs(m, n) occurs. The b matrix is + also contructed here. If the mth variable is categorical, then + a(m, n) is the category of the nth case number. */ + int i, j, n1, n2; + double *v= (double *) calloc(nsample, sizeof(double)); + int *index = (int *) calloc(nsample, sizeof(int)); + + for (i = 0; i < mdim; ++i) { + if (cat[i] == 1) { /* numerical predictor */ + for (j = 0; j < nsample; ++j) { + v[j] = x[i + j * mdim]; + index[j] = j + 1; + } + R_qsort_I(v, index, 1, nsample); + + /* this sorts the v(n) in ascending order. index(n) is the case + number of that v(n) nth from the lowest (assume the original + case numbers are 1,2,...). */ + for (j = 0; j < nsample-1; ++j) { + n1 = index[j]; + n2 = index[j + 1]; + a[i + j * mdim] = n1; + if (j == 0) b[i + (n1-1) * mdim] = 1; + b[i + (n2-1) * mdim] = (v[j] < v[j + 1]) ? + b[i + (n1-1) * mdim] + 1 : b[i + (n1-1) * mdim]; + } + a[i + (nsample-1) * mdim] = index[nsample-1]; + } else { /* categorical predictor */ + for (j = 0; j < nsample; ++j) + a[i + j*mdim] = (int) x[i + j * mdim]; + } + } + free(index); + free(v); +} + + +void modA(int *a, int *nuse, const int nsample, const int mdim, + int *cat, const int maxcat, int *ncase, int *jin) { + int i, j, k, m, nt; + + *nuse = 0; + for (i = 0; i < nsample; ++i) if (jin[i]) (*nuse)++; + + for (i = 0; i < mdim; ++i) { + k = 0; + nt = 0; + if (cat[i] == 1) { + for (j = 0; j < nsample; ++j) { + if (jin[a[i + k * mdim] - 1]) { + a[i + nt * mdim] = a[i + k * mdim]; + k++; + } else { + for (m = 0; m < nsample - k; ++m) { + if (jin[a[i + (k + m) * mdim] - 1]) { + a[i + nt * mdim] = a[i + (k + m) * mdim]; + k += m + 1; + break; + } + } + } + nt++; + if (nt >= *nuse) break; + } + } + } + if (maxcat > 1) { + k = 0; + nt = 0; + for (i = 0; i < nsample; ++i) { + if (jin[k]) { + k++; + ncase[nt] = k; + } else { + for (j = 0; j < nsample - k; ++j) { + if (jin[k + j]) { + ncase[nt] = k + j + 1; + k += j + 1; + break; + } + } + } + nt++; + if (nt >= *nuse) break; + } + } +} + +void Xtranslate(double *x, int mdim, int nrnodes, int nsample, + int *bestvar, int *bestsplit, int *bestsplitnext, + double *xbestsplit, int *nodestatus, int *cat, int treeSize) { +/* + this subroutine takes the splits on numerical variables and translates them + back into x-values. It also unpacks each categorical split into a + 32-dimensional vector with components of zero or one--a one indicates that + the corresponding category goes left in the split. +*/ + + int i, m; + + for (i = 0; i < treeSize; ++i) { + if (nodestatus[i] == 1) { + m = bestvar[i] - 1; + if (cat[m] == 1) { + xbestsplit[i] = 0.5 * (x[m + (bestsplit[i] - 1) * mdim] + + x[m + (bestsplitnext[i] - 1) * mdim]); + } else { + xbestsplit[i] = (double) bestsplit[i]; + } + } + } +} + +void permuteOOB(int m, double *x, int *in, int nsample, int mdim) { +/* Permute the OOB part of a variable in x. + * Argument: + * m: the variable to be permuted + * x: the data matrix (variables in rows) + * in: vector indicating which case is OOB + * nsample: number of cases in the data + * mdim: number of variables in the data + */ + double *tp, tmp; + int i, last, k, nOOB = 0; + + tp = (double *) calloc(nsample, sizeof(double)); + + for (i = 0; i < nsample; ++i) { + /* make a copy of the OOB part of the data into tp (for permuting) */ + if (in[i] == 0) { + tp[nOOB] = x[m + i*mdim]; + nOOB++; + } + } + /* Permute tp */ + last = nOOB; + for (i = 0; i < nOOB; ++i) { + k = (int) last * unif_rand(); + tmp = tp[last - 1]; + tp[last - 1] = tp[k]; + tp[k] = tmp; + last--; + } + + /* Copy the permuted OOB data back into x. */ + nOOB = 0; + for (i = 0; i < nsample; ++i) { + if (in[i] == 0) { + x[m + i*mdim] = tp[nOOB]; + nOOB++; + } + } + free(tp); +} + +/* Compute proximity. */ +void computeProximity(double *prox, int oobprox, int *node, int *inbag, + int *oobpair, int n) { +/* Accumulate the number of times a pair of points fall in the same node. + prox: n x n proximity matrix + oobprox: should the accumulation only count OOB cases? (0=no, 1=yes) + node: vector of terminal node labels + inbag: indicator of whether a case is in-bag + oobpair: matrix to accumulate the number of times a pair is OOB together + n: total number of cases +*/ + int i, j; + for (i = 0; i < n; ++i) { + for (j = i+1; j < n; ++j) { + if (oobprox) { + /* if (jin[k] == 0 && jin[n] == 0) { */ + if ((inbag[i] > 0) ^ (inbag[j] > 0)) { + oobpair[j*n + i] ++; + oobpair[i*n + j] ++; + if (node[i] == node[j]) { + prox[j*n + i] += 1.0; + prox[i*n + j] += 1.0; + } + } + } else { + if (node[i] == node[j]) { + prox[j*n + i] += 1.0; + prox[i*n + j] += 1.0; + } + } + } + } +} + +int pack(int nBits, int *bits) { + int i = nBits, pack = 0; + while (--i >= 0) pack += bits[i] << i; + return(pack); +} + +void unpack(unsigned int pack, int *bits) { +/* pack is a 4-byte integer. The sub. returns icat, an integer array of + zeroes and ones corresponding to the coefficients in the binary expansion + of pack. */ + int i; + for (i = 0; pack != 0; pack >>= 1, ++i) bits[i] = pack & 1; +} + +#ifdef OLD + +double oldpack(int l, int *icat) { + /* icat is a binary integer with ones for categories going left + * and zeroes for those going right. The sub returns npack- the integer */ + int k; + double pack = 0.0; + + for (k = 0; k < l; ++k) { + if (icat[k]) pack += R_pow_di(2.0, k); + } + return(pack); +} + + +void oldunpack(int l, int npack, int *icat) { +/* + * npack is a long integer. The sub. returns icat, an integer of zeroes and + * ones corresponding to the coefficients in the binary expansion of npack. + */ + int i; + zeroInt(icat, 32); + icat[0] = npack % 2; + for (i = 1; i < l; ++i) { + npack = (npack - icat[i-1]) / 2; + icat[i] = npack % 2; + } +} + + + +#endif /* OLD */ diff --git a/randomforest-matlab/RF_Class_C/src/twonorm_C_wrapper.cpp b/randomforest-matlab/RF_Class_C/src/twonorm_C_wrapper.cpp new file mode 100644 index 0000000..f8c0a32 --- /dev/null +++ b/randomforest-matlab/RF_Class_C/src/twonorm_C_wrapper.cpp @@ -0,0 +1,316 @@ +/******************************************************************** + * Standalone interface to Andy Liaw et al.'s C code (used in R package randomForest) + * Added by Abhishek Jaiantilal ( abhishek.jaiantilal@colorado.edu ) + * License: GPLv2 + * Version: 0.02 + * + * What this barely has: link to Random forest code, setting parameters for + * number of tree, nvar/mtry. No support for almost everything else. + * + * Uses the twonorm dataset, creating the RF models and + * testing on the dataset. This is just a simple code to show the interface + * with the code from Andy Liaw et al. Modify as needed for custom datasets + * + * In this default form it will output the predictions out. + * use DEBUG_ON to control verbosity + * + * to compile on linux: use the Makefile command 'make twonorm' + * + * to compile on windows: use either cygwin or VC++ ( represents the compiler) + * to compile these 5 files "cokus.cpp classRF.cpp twonorm_C_wrapper.cpp rfutils.cpp classTree.cpp" + * and also either compile or link with rfsub.f (need a fortran compiler) or rfsub.o (precompiled) + * + * Errata: the file reading part will be needed to tweaked per requirement + * + * Generating the ascii files: + * the data files can be generated from matlab as + * save('filename.mat','variable','-ascii') + *******************************************************************/ + + +#include +#include +#include +#include + +#define DEBUG_ON 1 + +void classRF(double *x, int *dimx, int *cl, int *ncl, int *cat, int *maxcat, + int *sampsize, int *strata, int *Options, int *ntree, int *nvar, + int *ipi, double *classwt, double *cut, int *nodesize, + int *outcl, int *counttr, double *prox, + double *imprt, double *impsd, double *impmat, int *nrnodes, + int *ndbigtree, int *nodestatus, int *bestvar, int *treemap, + int *nodeclass, double *xbestsplit, double *errtr, + int *testdat, double *xts, int *clts, int *nts, double *countts, + int *outclts, int labelts, double *proxts, double *errts, + int *inbag); + + +void classForest(int *mdim, int *ntest, int *nclass, int *maxcat, + int *nrnodes, int *ntree, double *x, double *xbestsplit, + double *pid, double *cutoff, double *countts, int *treemap, + int *nodestatus, int *cat, int *nodeclass, int *jts, + int *jet, int *bestvar, int *node, int *treeSize, + int *keepPred, int *prox, double *proxMat, int *nodes); + +int main(){ + char X_filename[100], Y_filename[100]; + FILE *fp_X, *fp_Y, *fp; + + //set the number of examples in rows and number of dimensions in cols + int cols=20,rows=300,i,j; + + /***START: NO NEED TO CHANGE ANYTHING FROM HERE TO THERE***************/ + int p_size=cols,n_size=rows; + int nsample=n_size; + + //need the below for some string ops + char dum_str[100]; + + //allocate some memory for the data + double* X=(double*)calloc(rows*cols,sizeof(double)); + int* Y=(int*)calloc(rows,sizeof(int)); + + + //the classifcation version requires {D,N}, where D=(num) dimensions, N=(num) examples + int dimx[2]; + dimx[0]=p_size; + dimx[1]=n_size; + + int* cat = (int*)calloc(p_size,sizeof(int)); + + + /***END: NO NEED TO CHANGE ANYTHING FROM HERE TO THERE*****************/ + + + //save the file to open in some string variables + strcpy(X_filename,"data/X_twonorm.txt"); + strcpy(Y_filename,"data/Y_twonorm.txt"); + + + //write prediction OUTPUT into Y_hat.txt +// fp = fopen("Y_hat.txt","w"); + + + /**read and save to the X and Y variable** + **this file grabbing is different in the regression version as it seemed to work there + *but not here :( */ + fp_X = fopen(X_filename,"r"); + fp_Y = fopen(Y_filename,"r"); + + if (fp_X !=NULL) + { + if(DEBUG_ON) printf("file opened: %s\n",X_filename); + }else{ + printf("cannot find files for data\n");exit(0); + } + + if (fp_Y !=NULL) + { + if(DEBUG_ON) printf("file opened: %s\n",Y_filename); + }else{ + printf("cannot find files for data\n");exit(0); + } + + fflush(stdout); + + for(i=0;i + int nclass=2; + + //need to do set this else everything blows up, represents the number of categories for + //every dimension - + for(i=0;i + + int sampsize=n_size; //if replace then sampsize=n_size or sampsize=0.632*n_size + + //no need to change this + int nsum = sampsize; + + int strata = 1; + //other options + int addclass = 0; + int importance=0; + int localImp=0; + int proximity=0; + int oob_prox=0; + int do_trace; //this variable prints verbosely each step + if(DEBUG_ON) + do_trace=1; + else + do_trace=0; + int keep_forest=1; + int replace=1; + int stratify=0; + int keep_inbag=0; + int Options[]={addclass,importance,localImp,proximity,oob_prox + ,do_trace,keep_forest,replace,stratify,keep_inbag}; + + + //ntree= number of tree. mtry=mtry :) + int ntree=500; int nt=ntree; + int mtry=(int)floor(sqrt(p_size)); // - + if(DEBUG_ON) printf("ntree %d, mtry %d\n",ntree,mtry); + + int ipi=0; + double* classwt=(double*)calloc(nclass,sizeof(double)); + double* cutoff=(double*)calloc(nclass,sizeof(double)); + for(i=0;i size(X,2) + mtry = max(floor(size(X,2)/3),1); + DEFAULTS_ON=1; + end + addclass=0; + + + [N D] = size(X); + + if length(unique(Y))<=5, warning('Do you want regression? there are just 5 or less unique values'); end + if N==0, error('Data (X) has 0 rows'); end + if mtry<1 || mtry>D , warning('Invalid mtry. reset to within valid range'); DEFAULTS_ON=1; end + mtry = max(1, min(D,round(mtry))); + + if DEFAULTS_ON + fprintf('\tSetting to defaults %d trees and mtry=%d\n',ntree,mtry); + end + + if length(Y)~=N || length(Y)==0 + error('length of Y not the same as X or Y is null'); + end + + if ~isempty(find(isnan(X))); error('NaNs in X'); end + if ~isempty(find(isnan(Y))); error('NaNs in Y'); end + + %now handle categories. Problem is that categories in R are more + %enhanced. In this i ask the user to specify the column/features to + %consider as categories, 1 if all the values are real values else + %specify the number of categories here + if exist ('extra_options','var') && isfield(extra_options,'categories') + ncat = extra_options.categories; + else + ncat = ones(1,D); + end + + maxcat = max(ncat); + if maxcat>32 + error('Can not handle categorical predictors with more than 32 categories'); + end + + %classRF - line 88 in randomForest.default.R + nclass = length(unique(Y)); + addclass = FALSE; + + if ~exist('proximity','var') + proximity = addclass; + oob_prox = proximity; + end + + if ~exist('oob_prox','var') + oob_prox = proximity; + end + + %i handle the below in the mex file +% if proximity +% prox = zeros(N,N); +% proxts = 1; +% else +% prox = 1; +% proxts = 1; +% end + + %i handle the below in the mex file + if localImp + importance = TRUE; +% impmat = zeors(D,N); + else +% impmat = 1; + end + + if importance + if (nPerm<1) + nPerm = int32(1); + else + nPerm = int32(nPerm); + end + + %regRF +% impout = zeros(D,2); +% impSD = zeros(D,1); + else +% impout = zeros(D,1); +% impSD = 1; + end + + %i handle the below in the mex file + %somewhere near line 157 in randomForest.default.R + if addclass +% nsample = 2*n; + else +% nsample = n; + end + + Stratify = (length(sampsize)>1); + if (~Stratify && sampsize>N) + error('Sampsize too large') + end + + if Stratify + error('Sampsize should be of length one') + end + + %i handle the below in the mex file + % nrnodes = 2*floor(sampsize/max(1,nodesize-4))+1; + % xtest = 1; + % ytest = 1; + % ntest = 1; + % labelts = FALSE; + % nt = ntree; + + Options = int32([importance,localImp,nPerm]); + + if DEBUG_ON + %print the parameters that i am sending in + fprintf('size(x) %d\n',size(X)); + fprintf('size(y) %d\n',size(Y)); + fprintf('nclass %d\n',nclass); + fprintf('size(ncat) %d\n',size(ncat)); + fprintf('maxcat %d\n',maxcat); + fprintf('size(sampsize) %d\n',size(sampsize)); + fprintf('sampsize[0] %d\n',sampsize(1)); + fprintf('Stratify %d\n',Stratify); + fprintf('Proximity %d\n',proximity); + fprintf('oob_prox %d\n',oob_prox); + fprintf('ntree %d\n',ntree); + fprintf('mtry %d\n',mtry); + fprintf('nodesize %f\n',nodesize); + fprintf('replace %f\n',replace); + end + + + + + [ldau,rdau,nodestatus,nrnodes,upper,avnode,... + mbest,ndtree,ypred,mse,impout,impmat,... + impSD,prox,coef,oob_times,inbag]... + = mexRF_train (X',Y,ntree,mtry,sampsize,nodesize,... + int32(Options),int32(ncat),int32(maxcat),int32(do_trace), int32(proximity), int32(oob_prox), ... + int32(corr_bias), keep_inbag, replace ); + + %done in R file so doing it too. + ypred(oob_times==0)=NaN; + + model.lDau=ldau; + model.rDau=rdau; + model.nodestatus=nodestatus; + model.nrnodes=nrnodes; + model.upper=upper; + model.avnode=avnode; + model.mbest=mbest; + model.ndtree=ndtree; + model.ntree = ntree; + model.Y_hat = ypred; + model.mse = mse; + model.importance = impout; + model.importanceSD = impSD; + model.localImp = impmat; + model.proximity = prox; + model.coef = coef; + model.oob_times = oob_times; + model.inbag = inbag; + model.nPerm = nPerm; + model.biasCorr = corr_bias; + model.rsq = 1 - mse / (var(Y) * (N-1) / N); + clear mexRF_train \ No newline at end of file diff --git a/randomforest-matlab/RF_Reg_C/src/cokus.cpp b/randomforest-matlab/RF_Reg_C/src/cokus.cpp new file mode 100644 index 0000000..0c04a05 --- /dev/null +++ b/randomforest-matlab/RF_Reg_C/src/cokus.cpp @@ -0,0 +1,196 @@ +// This is the Mersenne Twister random number generator MT19937, which +// generates pseudorandom integers uniformly distributed in 0..(2^32 - 1) +// starting from any odd seed in 0..(2^32 - 1). This version is a recode +// by Shawn Cokus (Cokus@math.washington.edu) on March 8, 1998 of a version by +// Takuji Nishimura (who had suggestions from Topher Cooper and Marc Rieffel in +// July-August 1997). +// +// Effectiveness of the recoding (on Goedel2.math.washington.edu, a DEC Alpha +// running OSF/1) using GCC -O3 as a compiler: before recoding: 51.6 sec. to +// generate 300 million random numbers; after recoding: 24.0 sec. for the same +// (i.e., 46.5% of original time), so speed is now about 12.5 million random +// number generations per second on this machine. +// +// According to the URL +// (and paraphrasing a bit in places), the Mersenne Twister is ``designed +// with consideration of the flaws of various existing generators,'' has +// a period of 2^19937 - 1, gives a sequence that is 623-dimensionally +// equidistributed, and ``has passed many stringent tests, including the +// die-hard test of G. Marsaglia and the load test of P. Hellekalek and +// S. Wegenkittl.'' It is efficient in memory usage (typically using 2506 +// to 5012 bytes of static data, depending on data type sizes, and the code +// is quite short as well). It generates random numbers in batches of 624 +// at a time, so the caching and pipelining of modern systems is exploited. +// It is also divide- and mod-free. +// +// This library is free software; you can redistribute it and/or modify it +// under the terms of the GNU Library General Public License as published by +// the Free Software Foundation (either version 2 of the License or, at your +// option, any later version). This library is distributed in the hope that +// it will be useful, but WITHOUT ANY WARRANTY, without even the implied +// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See +// the GNU Library General Public License for more details. You should have +// received a copy of the GNU Library General Public License along with this +// library; if not, write to the Free Software Foundation, Inc., 59 Temple +// Place, Suite 330, Boston, MA 02111-1307, USA. +// +// The code as Shawn received it included the following notice: +// +// Copyright (C) 1997 Makoto Matsumoto and Takuji Nishimura. When +// you use this, send an e-mail to with +// an appropriate reference to your work. +// +// It would be nice to CC: when you write. +// + +//#include +//#include + +// +// uint32 must be an unsigned integer type capable of holding at least 32 +// bits; exactly 32 should be fastest, but 64 is better on an Alpha with +// GCC at -O3 optimization so try your options and see whats best for you +// + +typedef unsigned long uint32; + +#define N (624) // length of state vector +#define M (397) // a period parameter +#define K (0x9908B0DFU) // a magic constant +#define hiBit(u) ((u) & 0x80000000U) // mask all but highest bit of u +#define loBit(u) ((u) & 0x00000001U) // mask all but lowest bit of u +#define loBits(u) ((u) & 0x7FFFFFFFU) // mask the highest bit of u +#define mixBits(u, v) (hiBit(u)|loBits(v)) // move hi bit of u to hi bit of v + +static uint32 state[N+1]; // state vector + 1 extra to not violate ANSI C +static uint32 *next; // next random value is computed from here +static int left = -1; // can *next++ this many times before reloading + + +void seedMT(uint32 seed) + { + // + // We initialize state[0..(N-1)] via the generator + // + // x_new = (69069 * x_old) mod 2^32 + // + // from Line 15 of Table 1, p. 106, Sec. 3.3.4 of Knuths + // _The Art of Computer Programming_, Volume 2, 3rd ed. + // + // Notes (SJC): I do not know what the initial state requirements + // of the Mersenne Twister are, but it seems this seeding generator + // could be better. It achieves the maximum period for its modulus + // (2^30) iff x_initial is odd (p. 20-21, Sec. 3.2.1.2, Knuth); if + // x_initial can be even, you have sequences like 0, 0, 0, ...; + // 2^31, 2^31, 2^31, ...; 2^30, 2^30, 2^30, ...; 2^29, 2^29 + 2^31, + // 2^29, 2^29 + 2^31, ..., etc. so I force seed to be odd below. + // + // Even if x_initial is odd, if x_initial is 1 mod 4 then + // + // the lowest bit of x is always 1, + // the next-to-lowest bit of x is always 0, + // the 2nd-from-lowest bit of x alternates ... 0 1 0 1 0 1 0 1 ... , + // the 3rd-from-lowest bit of x 4-cycles ... 0 1 1 0 0 1 1 0 ... , + // the 4th-from-lowest bit of x has the 8-cycle ... 0 0 0 1 1 1 1 0 ... , + // ... + // + // and if x_initial is 3 mod 4 then + // + // the lowest bit of x is always 1, + // the next-to-lowest bit of x is always 1, + // the 2nd-from-lowest bit of x alternates ... 0 1 0 1 0 1 0 1 ... , + // the 3rd-from-lowest bit of x 4-cycles ... 0 0 1 1 0 0 1 1 ... , + // the 4th-from-lowest bit of x has the 8-cycle ... 0 0 1 1 1 1 0 0 ... , + // ... + // + // The generators potency (min. s>=0 with (69069-1)^s = 0 mod 2^32) is + // 16, which seems to be alright by p. 25, Sec. 3.2.1.3 of Knuth. It + // also does well in the dimension 2..5 spectral tests, but it could be + // better in dimension 6 (Line 15, Table 1, p. 106, Sec. 3.3.4, Knuth). + // + // Note that the random number user does not see the values generated + // here directly since reloadMT() will always munge them first, so maybe + // none of all of this matters. In fact, the seed values made here could + // even be extra-special desirable if the Mersenne Twister theory says + // so-- thats why the only change I made is to restrict to odd seeds. + // + + register uint32 x = (seed | 1U) & 0xFFFFFFFFU, *s = state; + register int j; + + for(left=0, *s++=x, j=N; --j; + *s++ = (x*=69069U) & 0xFFFFFFFFU); + } + + +uint32 reloadMT(void) + { + register uint32 *p0=state, *p2=state+2, *pM=state+M, s0, s1; + register int j; + + if(left < -1) + seedMT(4357U); + + left=N-1, next=state+1; + + for(s0=state[0], s1=state[1], j=N-M+1; --j; s0=s1, s1=*p2++) + *p0++ = *pM++ ^ (mixBits(s0, s1) >> 1) ^ (loBit(s1) ? K : 0U); + + for(pM=state, j=M; --j; s0=s1, s1=*p2++) + *p0++ = *pM++ ^ (mixBits(s0, s1) >> 1) ^ (loBit(s1) ? K : 0U); + + s1=state[0], *p0 = *pM ^ (mixBits(s0, s1) >> 1) ^ (loBit(s1) ? K : 0U); + s1 ^= (s1 >> 11); + s1 ^= (s1 << 7) & 0x9D2C5680U; + s1 ^= (s1 << 15) & 0xEFC60000U; + return(s1 ^ (s1 >> 18)); + } + + +uint32 randomMT(void) + { + uint32 y; + + if(--left < 0) + return(reloadMT()); + + y = *next++; + y ^= (y >> 11); + y ^= (y << 7) & 0x9D2C5680U; + y ^= (y << 15) & 0xEFC60000U; + y ^= (y >> 18); + return(y); + } + +/* + #define uint32 unsigned long +#define SMALL_INT char +#define SMALL_INT_CLASS mxCHAR_CLASS +void seedMT(uint32 seed); +uint32 randomMT(void); + +#include "stdio.h" +#include "math.h" + +int main(void) + { + int j; + + // you can seed with any uint32, but the best are odds in 0..(2^32 - 1) + + seedMT(4357U); + uint32 MAX=pow(2,32)-1; + // print the first 2,002 random numbers seven to a line as an example + + for(j=0; j<2002; j++) + printf(" %10lu%s", (unsigned long) randomMT(), (j%7)==6 ? "\n" : ""); + + for(j=0; j<2002; j++) + printf(" %f%s", ((double)randomMT()/(double)MAX), (j%7)==6 ? "\n" : ""); + + + return(1); + } +*/ + + diff --git a/randomforest-matlab/RF_Reg_C/src/cokus_test.cpp b/randomforest-matlab/RF_Reg_C/src/cokus_test.cpp new file mode 100644 index 0000000..b1fb2cb --- /dev/null +++ b/randomforest-matlab/RF_Reg_C/src/cokus_test.cpp @@ -0,0 +1,42 @@ +//this is a simple file to test the cokus.cpp mersenne twister code. + +//free code with no guarantee. No restrictions on usage +//written by: Abhishek Jaiantilal ( abhishek.jaiantilal@colorado.edu ) + +#define uint32 unsigned long +#define SMALL_INT char +#define SMALL_INT_CLASS mxCHAR_CLASS +extern void seedMT(uint32 seed); +extern uint32 randomMT(void); + +#include "stdio.h" +#include "math.h" + +//generate lots of random number and check if they are within the limits +//else cry about it + +int main(void) { + int j, k; + + // you can seed with any uint32, but the best are odds in 0..(2^32 - 1) + + seedMT(4357); + uint32 MAX=pow(2, 32)-1; + +// print the first 2,002 random numbers seven to a line as an example +// for(j=0; j<2002; j++) +// printf(" %10lu%s", (unsigned long) randomMT(), (j%7)==6 ? "\n" : ""); + + double test_val; + for(k=0;k<100;k++) + for(j=0; j<2000002; j++) { + test_val = ((double)randomMT()/(double)MAX); + if (test_val>=1.0){ + printf("Problem"); + return(0); + } + //printf(" %f%s", test_val , (j%7)==6 ? "\n" : ""); + } + printf("Success"); + return(1); +} diff --git a/randomforest-matlab/RF_Reg_C/src/diabetes_C_wrapper.cpp b/randomforest-matlab/RF_Reg_C/src/diabetes_C_wrapper.cpp new file mode 100644 index 0000000..d796272 --- /dev/null +++ b/randomforest-matlab/RF_Reg_C/src/diabetes_C_wrapper.cpp @@ -0,0 +1,382 @@ +/******************************************************************** + * Standalone interface to Andy Liaw et al.'s C code (used in R package randomForest) + * Added by Abhishek Jaiantilal ( abhishek.jaiantilal@colorado.edu ) + * License: GPLv2 + * Version: 0.02 + * + * What this barely has: link to Random forest code, setting parameters for + * number of tree, nvar/mtry. No support for almost everything else. + * + * Uses the pima indian diabetes dataset, creating the RF models and + * testing on the dataset. This is just a simple code to show the interface + * with the code from Andy Liaw et al. Modify as needed for custom datasets + * + * In this default form it will output the predictions out. + * use DEBUG_ON to control verbosity + * + * to compile on linux: use the Makefile command 'make diabetes' + * + * to compile on windows: use either cygwin or VC++ ( represents the compiler) + * to compile these 3 files "cokus.cpp reg_RF.cpp diabetes_C_wrapper.cpp" + * + * Errata: the file reading part will be needed to tweaked per requirement + * + * Generating the ascii files: + * the data files can be generated from matlab as + * save('filename.mat','variable','-ascii') + *******************************************************************/ + +#include "stdio.h" +#include "string.h" +#include "memory.h" +#include "math.h" +#include "stdlib.h" +#include "reg_RF.h" + + +void regRF(double *x, double *y, int *xdim, int *sampsize, + int *nthsize, int *nrnodes, int *nTree, int *mtry, int *imp, + int *cat, int maxcat, int *jprint, int doProx, int oobprox, + int biasCorr, double *yptr, double *errimp, double *impmat, + double *impSD, double *prox, int *treeSize, SMALL_INT *nodestatus, + int *lDaughter, int *rDaughter, double *avnode, int *mbest, + double *upper, double *mse, const int *keepf, int *replace, + int testdat, double *xts, int *nts, double *yts, int labelts, + double *yTestPred, double *proxts, double *msets, double *coef, + int *nout, int *inbag) ; + + +void regForest(double *x, double *ypred, int *mdim, int *n, + int *ntree, int *lDaughter, int *rDaughter, + SMALL_INT *nodestatus, int *nrnodes, double *xsplit, + double *avnodes, int *mbest, int *treeSize, int *cat, + int maxcat, int *keepPred, double *allpred, int doProx, + double *proxMat, int *nodes, int *nodex) ; + +//pima indian diabetes dataset used here in this example has 442 examples and +//20 features or dimensions. We are reading the data matrix into X (442x20 size) matrix +//and the target values into Y (442 size) vector + +// first the models are trained in regRF and then tested in regForest + +//if you want to print debug messages set the below to 1. + +#define DEBUG_ON 0 + +int main(){ + char X_filename[100], Y_filename[100]; + FILE *fp_X, *fp_Y; + int cols=10,rows=442,i,j; + int p_size=cols,n_size=rows; + char dum_str[100]; + + double X[rows*cols], Y[rows]; + int dimx[2]; + dimx[0]=n_size; + dimx[1]=p_size; + + /**************READ DATA***********************/ + strcpy(X_filename,"data//X_diabetes.txt"); + strcpy(Y_filename,"data//Y_diabetes.txt"); + + + fp_X = fopen(X_filename,"r"); + fp_Y = fopen(Y_filename,"r"); + + if (fp_X !=NULL) + { + if (DEBUG_ON) printf("file opened: %s\n",X_filename); + }else{ + printf("cannot find files for data\n");exit(0); + } + + if (fp_Y !=NULL) + { + if (DEBUG_ON) printf("file opened: %s\n",Y_filename); + }else{ + printf("cannot find files for data\n");exit(0); + } + fflush(stdout); + + for(i=0;i(nodesize - 4)?1:(nodesize - 4)))))+ 1; + + if (DEBUG_ON) printf("nrnodes %d\n", nrnodes); + int ntree=500; + + //mtry = nvar + int nvar=(floor((float)(p_size/3))>1)?floor((float)(p_size/3)):1; + int imp[] = {0, 0, 1}; + + int *cat; cat = (int*) calloc(p_size, sizeof(int)); + if (DEBUG_ON) printf("cat %d\n", p_size); + for ( i=0;i +********************************************************************/ + +/******************************************************************* + * Copyright (C) 2001-7 Leo Breiman, Adele Cutler and Merck & Co., Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + *******************************************************************/ + +/****************************************************************** + * buildtree and findbestsplit routines translated from Leo's + * original Fortran code. + * + * copyright 1999 by leo Breiman + * this is free software and can be used for any purpose. + * It comes with no guarantee. + * + ******************************************************************/ + + +#ifdef MATLAB +#include "mex.h" +#endif +#include "math.h" +#include "reg_RF.h" + +#define DEBUG_ON 0 + +void regForest(double *x, double *ypred, int *mdim, int *n, + int *ntree, int *lDaughter, int *rDaughter, + SMALL_INT *nodestatus, int *nrnodes, double *xsplit, + double *avnodes, int *mbest, int *treeSize, int *cat, + int maxcat, int *keepPred, double *allpred, int doProx, + double *proxMat, int *nodes, int *nodex) ; +#ifdef MATLAB +void mexFunction( int nlhs, mxArray *plhs[], + int nrhs, const mxArray*prhs[] ) + +{ + int i; + if (nrhs!=10) + mexErrMsgIdAndTxt("mex_regressionRF_predict", + "I am stupid, I need 9 parameters"); + + int p_size = mxGetM(prhs[0]); + int n_size = mxGetN(prhs[0]); + + printf("n %d, p %d\n",n_size, p_size); + + double *x = (double*)mxGetData(prhs[0]); + int *lDaughter=(int*)mxGetData(prhs[1]); + int *rDaughter=(int*)mxGetData(prhs[2]); + SMALL_INT *nodestatus=(SMALL_INT*)mxGetData(prhs[3]); + int nrnodes=mxGetScalar(prhs[4]); + double* xsplit=(double*)mxGetData(prhs[5]); + double* avnodes=(double*)mxGetData(prhs[6]); + int* mbest = (int*)mxGetData(prhs[7]); + int* treeSize = (int*)mxGetData(prhs[8]); + int ntree=mxGetScalar(prhs[9]); + + plhs[0]=mxCreateNumericMatrix(n_size,1,mxDOUBLE_CLASS,0); + double* ypred = (double*)mxGetData(plhs[0]); + + int mdim = p_size; + int *cat; cat = (int*) calloc(p_size, sizeof(int)); for ( i=0;i +********************************************************************/ + +/******************************************************************* + * Copyright (C) 2001-7 Leo Breiman, Adele Cutler and Merck & Co., Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + *******************************************************************/ + +/****************************************************************** + * buildtree and findbestsplit routines translated from Leo's + * original Fortran code. + * + * copyright 1999 by leo Breiman + * this is free software and can be used for any purpose. + * It comes with no guarantee. + * + ******************************************************************/ + +#ifdef MATLAB +#include "mex.h" +#endif +#include "math.h" +#include "reg_RF.h" + +#define DEBUG_ON 0 + +void regRF(double *x, double *y, int *xdim, int *sampsize, + int *nthsize, int *nrnodes, int *nTree, int *mtry, int *imp, + int *cat, int maxcat, int *jprint, int doProx, int oobprox, + int biasCorr, double *yptr, double *errimp, double *impmat, + double *impSD, double *prox, int *treeSize, SMALL_INT *nodestatus, + int *lDaughter, int *rDaughter, double *avnode, int *mbest, + double *upper, double *mse, const int *keepf, int *replace, + int testdat, double *xts, int *nts, double *yts, int labelts, + double *yTestPred, double *proxts, double *msets, double *coef, + int *nout, int *inbag) ; + + +#ifdef MATLAB +void mexFunction( int nlhs, mxArray *plhs[], + int nrhs, const mxArray*prhs[] ) + +{ + //handle output + + int i; + int p_size = mxGetM(prhs[0]); + int n_size = mxGetN(prhs[0]); + double *x = mxGetPr(prhs[0]); + double *y = mxGetPr(prhs[1]); + int dimx[2]; + dimx[0]=n_size; + dimx[1]=p_size; + if (DEBUG_ON) printf("\n\n\n n %d, p %d\n", dimx[0], dimx[1]); + + int sampsize=(int)mxGetScalar(prhs[4]); + if (DEBUG_ON) printf("sampsize %d\n", sampsize); + int nodesize=(int)mxGetScalar(prhs[5]); + if (DEBUG_ON) printf("nodesize %d\n", nodesize); + + plhs[3] = mxCreateDoubleScalar(2 * (int)(floor((float)(sampsize / (1>(nodesize - 4)?1:(nodesize - 4))) ))+ 1); + int nrnodes = 2 * (int)((float)floor((float)(sampsize / (1>(nodesize - 4)?1:(nodesize - 4)))))+ 1; + + if (DEBUG_ON) printf("nrnodes %d\n", nrnodes); + int ntree; + int nvar; + + //correctness handled in .m file + ntree=(int)mxGetScalar(prhs[2]); + nvar=(int)mxGetScalar(prhs[3]); + + + if (DEBUG_ON) mexPrintf("\nntree %d, mtry=%d\n",ntree,nvar); + + if (ntree<=0) + mexErrMsgIdAndTxt("mex_regressionRF_train", + "Cannot fathom creating 0 trees :), put the right option"); + + //printf("ntree %d\n", ntree); + //if (DEBUG_ON) + //printf("nvar %d\n", nvar); + int *imp = (int*)mxGetData(prhs[6]); + + //int cat[p_size]; + int *cat = (int*) mxGetData(prhs[7]); + + if (DEBUG_ON) printf("cat %d\n", p_size); + //for ( i=0;i n_max = 2'097'151 + * now k = 31 -> n_max = 4294'967'295 + */ + NUMERIC vt, vtt; + double R = 0.375; + int ii, ij, k, l, m; + #ifdef qsort_Index + int it, tt; + #endif + + + /* 1-indexing for I[], v[] (and `i' and `j') : */ + --v; + #ifdef qsort_Index + --I; + #endif + + ii = i;/* save */ + m = 1; + + L10: + if (i < j) { + if (R < 0.5898437) R += 0.0390625; else R -= 0.21875; + L20: + k = i; + /* ij = (j + i) >> 1; midpoint */ + ij = i + (int)((j - i)*R); + #ifdef qsort_Index + it = I[ij]; + #endif + vt = v[ij]; + if (v[i] > vt) { + #ifdef qsort_Index + I[ij] = I[i]; I[i] = it; it = I[ij]; + #endif + v[ij] = v[i]; v[i] = vt; vt = v[ij]; + } + /* L30:*/ + l = j; + if (v[j] < vt) { + #ifdef qsort_Index + I[ij] = I[j]; I[j] = it; it = I[ij]; + #endif + v[ij] = v[j]; v[j] = vt; vt = v[ij]; + if (v[i] > vt) { + #ifdef qsort_Index + I[ij] = I[i]; I[i] = it; it = I[ij]; + #endif + v[ij] = v[i]; v[i] = vt; vt = v[ij]; + } + } + + for(;;) { /*L50:*/ + //do l--; while (v[l] > vt); + l--;for(;v[l]>vt;l--); + + + #ifdef qsort_Index + tt = I[l]; + #endif + vtt = v[l]; + /*L60:*/ + //do k++; while (v[k] < vt); + k=k+1;for(;v[k] l) break; + + /* else (k <= l) : */ + #ifdef qsort_Index + I[l] = I[k]; I[k] = tt; + #endif + v[l] = v[k]; v[k] = vtt; + } + + m++; + if (l - i <= j - k) { + /*L70: */ + il[m] = k; + iu[m] = j; + j = l; + } + else { + il[m] = i; + iu[m] = l; + i = k; + } + }else { /* i >= j : */ + + L80: + if (m == 1) return; + + /* else */ + i = il[m]; + j = iu[m]; + m--; + } + + if (j - i > 10) goto L20; + + if (i == ii) goto L10; + + --i; + L100: + do { + ++i; + if (i == j) { + goto L80; + } + #ifdef qsort_Index + it = I[i + 1]; + #endif + vt = v[i + 1]; + } while (v[i] <= vt); + + k = i; + + do { /*L110:*/ + #ifdef qsort_Index + I[k + 1] = I[k]; + #endif + v[k + 1] = v[k]; + --k; + } while (vt < v[k]); + + #ifdef qsort_Index + I[k + 1] = it; + #endif + v[k + 1] = vt; + goto L100; +} /* R_qsort{i} */ diff --git a/randomforest-matlab/RF_Reg_C/src/reg_RF.cpp b/randomforest-matlab/RF_Reg_C/src/reg_RF.cpp new file mode 100644 index 0000000..6ca21fd --- /dev/null +++ b/randomforest-matlab/RF_Reg_C/src/reg_RF.cpp @@ -0,0 +1,1088 @@ +/************************************************************** + * mex interface to Andy Liaw et al.'s C code (used in R package randomForest) + * Added by Abhishek Jaiantilal ( abhishek.jaiantilal@colorado.edu ) + * License: GPLv2 + * Version: 0.02 + * + * File: contains all the supporting code for a standalone C or mex for + * Regression RF. + * Copied all the code from the randomForest 4.5-28 or was it -29? + * + * important changes (other than the many commented out PRINTF's) + * 1. Added function to print up parameter values + * 2. emulated all R calls. so instead of S_alloc, used calloc + * 3. checked with valgrind so that memory is not lost + * 4. substituted random number generator from R's to mersenne twister from + * Matsumoto et al and Shawn Cokus. + * 5. Instead of allocating and deallocating memory every time for findBestSplit + * and regTree, used two globals with prefix in_, where funcname + * are the aforementioned functions; so that the globals are set to 0 initially + * and then when the function is called for the first time they are set to 1 + * and memory is allocated in static variables (which stays put even if function + * exits) and when ending the program the functions are called (dummy call which + * only does deallocation) with setting the globals to -99 which says to + * dellocate and exit + * 6. A minor change to decrease the memory footprint was to use NODESTATUS as + * a Char rather than an Int which shouldn't change any of the logic + * 7. Other changes include compounding all the functions required into this + * single file reg_RF.cpp and adding this comment. + * + *************************************************************/ + +/******************************************************************* + * Copyright (C) 2001-7 Leo Breiman, Adele Cutler and Merck & Co., Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + *******************************************************************/ + +/****************************************************************** + * buildtree and findbestsplit routines translated from Leo's + * original Fortran code. + * + * copyright 1999 by leo Breiman + * this is free software and can be used for any purpose. + * It comes with no guarantee. + * + ******************************************************************/ +#define NODE_TERMINAL -1 +#define NODE_TOSPLIT -2 +#define NODE_INTERIOR -3 +#define NULL 0 + +#include "memory.h" +#include "stdio.h" +#include "math.h" +#include "time.h" +#include "stdlib.h" +#include "qsort.c" +#include "reg_RF.h" +#define swapInt(a, b) ((a ^= b), (b ^= a), (a ^= b)) + +#define INT_SMALL + +#define MAX_UINT_COKUS 4294967295 //basically 2^32-1 + +#include "mex.h" +#define PRINTF mexPrintf + +//Global to handle mem in findBestSplit +int in_findBestSplit=0; // 0 -initialize and normal. 1-normal , -99 release +int in_regTree=0; //// 0 -initialize and normal. 1-normal , -99 release + +//cruft emulating rand() routinges from R. keep it +void GetRNGstate(){} +void PutRNGstate(){} + + + +double unif_rand(){ + //double value=((double)rand())/RAND_MAX; + //mexPrintf("%f, RAND_MAX %d\n", value, RAND_MAX); + //return value; + //return ((double)rand())/RAND_MAX; + return (((double)randomMT())/((double)MAX_UINT_COKUS)); +} +void zeroSMALLInt(void *x, int length) { + memset(x, 0, length * sizeof(SMALL_INT)); +} +void zeroInt(int *x, int length) { + memset(x, 0, length * sizeof(int)); +} + +void zeroDouble(double *x, int length) { + memset(x, 0, length * sizeof(double)); +} + +int imax2(int x, int y) { + return (x < y) ? y : x; +} + + +int pack(int nBits, int *bits) { + int i = nBits, pack = 0; + while (--i >= 0) pack += bits[i] << i; + return(pack); +} + +void unpack(unsigned int pack, int *bits) { + /* pack is a 4-byte integer. The sub. returns icat, an integer array of + zeroes and ones corresponding to the coefficients in the binary expansion + of pack. */ + int i; + for (i = 0; pack != 0; pack >>= 1, ++i) bits[i] = pack & 1; +} + +/* Compute proximity. */ +void computeProximity(double *prox, int oobprox, int *node, int *inbag, + int *oobpair, int n) { + /* Accumulate the number of times a pair of points fall in the same node. + prox: n x n proximity matrix + oobprox: should the accumulation only count OOB cases? (0=no, 1=yes) + node: vector of terminal node labels + inbag: indicator of whether a case is in-bag + oobpair: matrix to accumulate the number of times a pair is OOB together + n: total number of cases + */ + int i, j; + for (i = 0; i < n; ++i) { + for (j = i+1; j < n; ++j) { + if (oobprox) { + /* if (jin[k] == 0 && jin[n] == 0) { */ + if ((inbag[i] > 0) ^ (inbag[j] > 0)) { + oobpair[j*n + i] ++; + oobpair[i*n + j] ++; + if (node[i] == node[j]) { + prox[j*n + i] += 1.0; + prox[i*n + j] += 1.0; + } + } + } else { + if (node[i] == node[j]) { + prox[j*n + i] += 1.0; + prox[i*n + j] += 1.0; + } + } + } + } +} + +void permuteOOB(int m, double *x, int *in, int nsample, int mdim) { + /* Permute the OOB part of a variable in x. + * Argument: + * m: the variable to be permuted + * x: the data matrix (variables in rows) + * in: vector indicating which case is OOB + * nsample: number of cases in the data + * mdim: number of variables in the data + */ + double *tp, tmp; + int i, last, k, nOOB = 0; + + tp = (double *) calloc(nsample , sizeof(double)); + + for (i = 0; i < nsample; ++i) { + /* make a copy of the OOB part of the data into tp (for permuting) */ + if (in[i] == 0) { + tp[nOOB] = x[m + i*mdim]; + nOOB++; + } + } + /* Permute tp */ + last = nOOB; + for (i = 0; i < nOOB; ++i) { + k = (int) last * unif_rand(); + tmp = tp[last - 1]; + tp[last - 1] = tp[k]; + tp[k] = tmp; + last--; + } + + /* Copy the permuted OOB data back into x. */ + nOOB = 0; + for (i = 0; i < nsample; ++i) { + if (in[i] == 0) { + x[m + i*mdim] = tp[nOOB]; + nOOB++; + } + } + free(tp); +} + + + +void simpleLinReg(int nsample, double *x, double *y, double *coef, + double *mse, int *hasPred); +void predictRegTree(double *x, int nsample, int mdim, + int *lDaughter, int *rDaughter, SMALL_INT *nodestatus, + double *ypred, double *split, double *nodepred, + int *splitVar, int treeSize, int *cat, int maxcat, + int *nodex) ; +void regRF(double *x, double *y, int *xdim, int *sampsize, + int *nthsize, int *nrnodes, int *nTree, int *mtry, int *imp, + int *cat, int maxcat, int *jprint, int doProx, int oobprox, + int biasCorr, double *yptr, double *errimp, double *impmat, + double *impSD, double *prox, int *treeSize, SMALL_INT *nodestatus, + int *lDaughter, int *rDaughter, double *avnode, int *mbest, + double *upper, double *mse, const int *keepf, int *replace, + int testdat, double *xts, int *nts, double *yts, int labelts, + double *yTestPred, double *proxts, double *msets, double *coef, + int *nout, int *inbag) ; + +void regForest(double *x, double *ypred, int *mdim, int *n, + int *ntree, int *lDaughter, int *rDaughter, + SMALL_INT *nodestatus, int *nrnodes, double *xsplit, + double *avnodes, int *mbest, int *treeSize, int *cat, + int maxcat, int *keepPred, double *allpred, int doProx, + double *proxMat, int *nodes, int *nodex) ; + +void regTree(double *x, double *y, int mdim, int nsample, int *lDaughter, + int *rDaughter, + double *upper, double *avnode, SMALL_INT *nodestatus, int nrnodes, + int *treeSize, int nthsize, int mtry, int *mbest, int *cat, + double *tgini, int *varUsed) ; + +void findBestSplit(double *x, int *jdex, double *y, int mdim, int nsample, + int ndstart, int ndend, int *msplit, double *decsplit, + double *ubest, int *ndendl, int *jstat, int mtry, + double sumnode, int nodecnt, int *cat) ; + + + + + +void regRF(double *x, double *y, int *xdim, int *sampsize, + int *nthsize, int *nrnodes, int *nTree, int *mtry, int *imp, + int *cat, int maxcat, int *jprint, int doProx, int oobprox, + int biasCorr, double *yptr, double *errimp, double *impmat, + double *impSD, double *prox, int *treeSize, SMALL_INT *nodestatus, + int *lDaughter, int *rDaughter, double *avnode, int *mbest, + double *upper, double *mse, const int *keepf, int *replace, + int testdat, double *xts, int *nts, double *yts, int labelts, + double *yTestPred, double *proxts, double *msets, double *coef, + int *nout, int *inbag) { + /************************************************************************* + * Input: + * mdim=number of variables in data set + * nsample=number of cases + * + * nthsize=number of cases in a node below which the tree will not split, + * setting nthsize=5 generally gives good results. + * + * nTree=number of trees in run. 200-500 gives pretty good results + * + * mtry=number of variables to pick to split on at each node. mdim/3 + * seems to give genrally good performance, but it can be + * altered up or down + * + * imp=1 turns on variable importance. This is computed for the + * mth variable as the percent rise in the test set mean sum-of- + * squared errors when the mth variable is randomly permuted. + * + *************************************************************************/ + + PRINTF( "*jprint: %d\n", *jprint ); + mexEvalString( "pause(0.0001)" ); + + double errts = 0.0, averrb, meanY, meanYts, varY, varYts, r, xrand, + errb = 0.0, resid=0.0, ooberr, ooberrperm, delta, *resOOB; + + double *yb, *xtmp, *xb, *ytr, *ytree, *tgini; + + int k, m, mr, n, nOOB, j, jout, idx, ntest, last, ktmp, nPerm, + nsample, mdim, keepF, keepInbag; + int *oobpair, varImp, localImp, *varUsed; + + int *in, *nind, *nodex, *nodexts; + + //Abhi:temp variable + double tmp_d; + int tmp_i; + SMALL_INT tmp_c; + + //Do initialization for COKUS's Random generator + seedMT(2*rand()+1); //works well with odd number so why don't use that + + nsample = xdim[0]; + mdim = xdim[1]; + ntest = *nts; + varImp = imp[0]; + localImp = imp[1]; + nPerm = imp[2]; //PRINTF("nPerm %d\n",nPerm); + keepF = keepf[0]; + keepInbag = keepf[1]; + + if (*jprint == 0) *jprint = *nTree + 1; + + yb = (double *) calloc(*sampsize, sizeof(double)); + xb = (double *) calloc(mdim * *sampsize, sizeof(double)); + ytr = (double *) calloc(nsample, sizeof(double)); + xtmp = (double *) calloc(nsample, sizeof(double)); + resOOB = (double *) calloc(nsample, sizeof(double)); + + in = (int *) calloc(nsample, sizeof(int)); + nodex = (int *) calloc(nsample, sizeof(int)); + varUsed = (int *) calloc(mdim, sizeof(int)); + nind = *replace ? NULL : (int *) calloc(nsample, sizeof(int)); + + if (testdat) { + ytree = (double *) calloc(ntest, sizeof(double)); + nodexts = (int *) calloc(ntest, sizeof(int)); + } + oobpair = (doProx && oobprox) ? + (int *) calloc(nsample * nsample, sizeof(int)) : NULL; + + /* If variable importance is requested, tgini points to the second + "column" of errimp, otherwise it's just the same as errimp. */ + tgini = varImp ? errimp + mdim : errimp; + + averrb = 0.0; + meanY = 0.0; + varY = 0.0; + + zeroDouble(yptr, nsample); + zeroInt(nout, nsample); + for (n = 0; n < nsample; ++n) { + varY += n * (y[n] - meanY)*(y[n] - meanY) / (n + 1); + meanY = (n * meanY + y[n]) / (n + 1); + } + varY /= nsample; + + varYts = 0.0; + meanYts = 0.0; + if (testdat) { + for (n = 0; n < ntest; ++n) { + varYts += n * (yts[n] - meanYts)*(yts[n] - meanYts) / (n + 1); + meanYts = (n * meanYts + yts[n]) / (n + 1); + } + varYts /= ntest; + } + + if (doProx) { + zeroDouble(prox, nsample * nsample); + if (testdat) zeroDouble(proxts, ntest * (nsample + ntest)); + } + + if (varImp) { + zeroDouble(errimp, mdim * 2); + if (localImp) zeroDouble(impmat, nsample * mdim); + } else { + zeroDouble(errimp, mdim); + } + if (labelts) zeroDouble(yTestPred, ntest); + + /* print header for running output */ + if (*jprint <= *nTree) { + PRINTF(" | Out-of-bag "); + if (testdat) PRINTF("| Test set "); + PRINTF("|\n"); + PRINTF("Tree | MSE %%Var(y) "); + if (testdat) PRINTF("| MSE %%Var(y) "); + PRINTF("|\n"); + mexEvalString( "pause(0.001)" ); + } + GetRNGstate(); + /************************************* + * Start the loop over trees. + *************************************/ + for (j = 0; j < *nTree; ++j) { + //PRINTF("tree num %d\n",j);fflush(stdout); + //PRINTF("1. maxcat %d, jprint %d, doProx %d, oobProx %d, biasCorr %d\n", *maxcat, *jprint, doProx, oobprox, biasCorr); + + idx = keepF ? j * *nrnodes : 0; + zeroInt(in, nsample); + zeroInt(varUsed, mdim); + /* Draw a random sample for growing a tree. */ +// PRINTF("1.8. maxcat %d, jprint %d, doProx %d, oobProx %d, biasCorr %d testdat %d\n", maxcat, *jprint, doProx, oobprox, biasCorr,testdat); + + if (*replace) { /* sampling with replacement */ + for (n = 0; n < *sampsize; ++n) { + xrand = unif_rand(); + k = xrand * nsample; + in[k] = 1; + yb[n] = y[k]; + for(m = 0; m < mdim; ++m) { + xb[m + n * mdim] = x[m + k * mdim]; + } + } + } else { /* sampling w/o replacement */ + for (n = 0; n < nsample; ++n) nind[n] = n; + last = nsample - 1; + for (n = 0; n < *sampsize; ++n) { + ktmp = (int) (unif_rand() * (last+1)); + k = nind[ktmp]; + swapInt(nind[ktmp], nind[last]); + last--; + in[k] = 1; + yb[n] = y[k]; + for(m = 0; m < mdim; ++m) { + xb[m + n * mdim] = x[m + k * mdim]; + } + } + } + if (keepInbag) { + for (n = 0; n < nsample; ++n) inbag[n + j * nsample] = in[n]; + } +// PRINTF("1.9. maxcat %d, jprint %d, doProx %d, oobProx %d, biasCorr %d testdat %d\n", maxcat, *jprint, doProx, oobprox, biasCorr,testdat); + + /* grow the regression tree */ + regTree(xb, yb, mdim, *sampsize, lDaughter + idx, rDaughter + idx, + upper + idx, avnode + idx, nodestatus + idx, *nrnodes, + treeSize + j, *nthsize, *mtry, mbest + idx, cat, tgini, + varUsed); + /* predict the OOB data with the current tree */ + /* ytr is the prediction on OOB data by the current tree */ + +// PRINTF("2. maxcat %d, jprint %d, doProx %d, oobProx %d, biasCorr %d testdat %d\n", maxcat, *jprint, doProx, oobprox, biasCorr,testdat); + + predictRegTree(x, nsample, mdim, lDaughter + idx, + rDaughter + idx, nodestatus + idx, ytr, upper + idx, + avnode + idx, mbest + idx, treeSize[j], cat, maxcat, + nodex); + /* yptr is the aggregated prediction by all trees grown so far */ + errb = 0.0; + ooberr = 0.0; + jout = 0; /* jout is the number of cases that has been OOB so far */ + nOOB = 0; /* nOOB is the number of OOB samples for this tree */ + for (n = 0; n < nsample; ++n) { + if (in[n] == 0) { + nout[n]++; + nOOB++; + yptr[n] = ((nout[n]-1) * yptr[n] + ytr[n]) / nout[n]; + resOOB[n] = ytr[n] - y[n]; + ooberr += resOOB[n] * resOOB[n]; + } + if (nout[n]) { + jout++; + errb += (y[n] - yptr[n]) * (y[n] - yptr[n]); + } + } + errb /= jout; + /* Do simple linear regression of y on yhat for bias correction. */ + if (biasCorr) simpleLinReg(nsample, yptr, y, coef, &errb, nout); +//PRINTF("2.5.maxcat %d, jprint %d, doProx %d, oobProx %d, biasCorr %d\n", maxcat, *jprint, doProx, oobprox, biasCorr); + + /* predict testset data with the current tree */ + if (testdat) { + predictRegTree(xts, ntest, mdim, lDaughter + idx, + rDaughter + idx, nodestatus + idx, ytree, + upper + idx, avnode + idx, + mbest + idx, treeSize[j], cat, maxcat, nodexts); + /* ytree is the prediction for test data by the current tree */ + /* yTestPred is the average prediction by all trees grown so far */ + errts = 0.0; + for (n = 0; n < ntest; ++n) { + yTestPred[n] = (j * yTestPred[n] + ytree[n]) / (j + 1); + } + /* compute testset MSE */ + if (labelts) { + for (n = 0; n < ntest; ++n) { + resid = biasCorr ? + yts[n] - (coef[0] + coef[1]*yTestPred[n]) : + yts[n] - yTestPred[n]; + errts += resid * resid; + } + errts /= ntest; + } + } +//PRINTF("2.6.maxcat %d, jprint %d, doProx %d, oobProx %d, biasCorr %d, testdat %d\n", maxcat, *jprint, doProx, oobprox, biasCorr,testdat); + + /* Print running output. */ + if ((j + 1) % *jprint == 0) { + PRINTF("%4d |", j + 1); + PRINTF(" %8.4g %8.2f ", errb, 100 * errb / varY); + if(labelts == 1) PRINTF("| %8.4g %8.2f ", + errts, 100.0 * errts / varYts); + PRINTF("|\n"); + fflush(stdout); + mexEvalString("pause(.001);"); // to dump string. + } + +//PRINTF("2.7.maxcat %d, jprint %d, doProx %d, oobProx %d, biasCorr %d, testdat %d\n", maxcat, *jprint, doProx, oobprox, biasCorr,testdat); + + mse[j] = errb; + if (labelts) msets[j] = errts; +//PRINTF("2.701 j %d, nTree %d, errts %f errb %f \n", j, *nTree, errts,errb); +//PRINTF("2.71.maxcat %d, jprint %d, doProx %d, oobProx %d, biasCorr %d, testdat %d\n", maxcat, *jprint, doProx, oobprox, biasCorr,testdat); + + /* DO PROXIMITIES */ + if (doProx) { + computeProximity(prox, oobprox, nodex, in, oobpair, nsample); + /* proximity for test data */ + if (testdat) { + /* In the next call, in and oobpair are not used. */ + computeProximity(proxts, 0, nodexts, in, oobpair, ntest); + for (n = 0; n < ntest; ++n) { + for (k = 0; k < nsample; ++k) { + if (nodexts[n] == nodex[k]) { + proxts[n + ntest * (k+ntest)] += 1.0; + } + } + } + } + } +//PRINTF("2.8.maxcat %d, jprint %d, doProx %d, oobProx %d, biasCorr %d, testdat %d\n", maxcat, *jprint, doProx, oobprox, biasCorr,testdat); + + /* Variable importance */ + if (varImp) { + for (mr = 0; mr < mdim; ++mr) { + if (varUsed[mr]) { /* Go ahead if the variable is used */ + /* make a copy of the m-th variable into xtmp */ + for (n = 0; n < nsample; ++n) + xtmp[n] = x[mr + n * mdim]; + ooberrperm = 0.0; + for (k = 0; k < nPerm; ++k) { + permuteOOB(mr, x, in, nsample, mdim); + predictRegTree(x, nsample, mdim, lDaughter + idx, + rDaughter + idx, nodestatus + idx, ytr, + upper + idx, avnode + idx, mbest + idx, + treeSize[j], cat, maxcat, nodex); + for (n = 0; n < nsample; ++n) { + if (in[n] == 0) { + r = ytr[n] - y[n]; + ooberrperm += r * r; + if (localImp) { + impmat[mr + n * mdim] += + (r*r - resOOB[n]*resOOB[n]) / nPerm; + } + } + } + } + delta = (ooberrperm / nPerm - ooberr) / nOOB; + errimp[mr] += delta; + impSD[mr] += delta * delta; + /* copy original data back */ + for (n = 0; n < nsample; ++n) + x[mr + n * mdim] = xtmp[n]; + } + + } + + } +// PRINTF("3. maxcat %d, jprint %d, doProx %d, oobProx %d, biasCorr %d testdat %d\n", maxcat, *jprint, doProx, oobprox, biasCorr,testdat); + + } + PutRNGstate(); + /* end of tree iterations=======================================*/ + + if (biasCorr) { /* bias correction for predicted values */ + for (n = 0; n < nsample; ++n) { + if (nout[n]) yptr[n] = coef[0] + coef[1] * yptr[n]; + } + if (testdat) { + for (n = 0; n < ntest; ++n) { + yTestPred[n] = coef[0] + coef[1] * yTestPred[n]; + } + } + } + + if (doProx) { + for (n = 0; n < nsample; ++n) { + for (k = n + 1; k < nsample; ++k) { + prox[nsample*k + n] /= oobprox ? + (oobpair[nsample*k + n] > 0 ? oobpair[nsample*k + n] : 1) : + *nTree; + prox[nsample * n + k] = prox[nsample * k + n]; + } + prox[nsample * n + n] = 1.0; + } + if (testdat) { + for (n = 0; n < ntest; ++n) + for (k = 0; k < ntest + nsample; ++k) + proxts[ntest*k + n] /= *nTree; + } + } + + if (varImp) { + for (m = 0; m < mdim; ++m) { + errimp[m] = errimp[m] / *nTree; + impSD[m] = sqrt( ((impSD[m] / *nTree) - + (errimp[m] * errimp[m])) / *nTree ); + if (localImp) { + for (n = 0; n < nsample; ++n) { + impmat[m + n * mdim] /= nout[n]; + } + } + } + } + for (m = 0; m < mdim; ++m) tgini[m] /= *nTree; + + + //addition by abhi + //in order to release the space stored by the variable in findBestSplit + // call by setting + in_findBestSplit=-99; + findBestSplit(&tmp_d, &tmp_i, &tmp_d, tmp_i, tmp_i, + tmp_i, tmp_i, &tmp_i, &tmp_d, + &tmp_d, &tmp_i, &tmp_i, tmp_i, + tmp_d, tmp_i, &tmp_i); + + //do the same freeing of space by calling with -99 + in_regTree=-99; + regTree(&tmp_d, &tmp_d, tmp_i, tmp_i, &tmp_i, + &tmp_i, + &tmp_d, &tmp_d, &tmp_c, tmp_i, + &tmp_i, tmp_i, tmp_i, &tmp_i, &tmp_i, + &tmp_d, &tmp_i); + + + free(yb); + free(xb); + free(ytr); + free(xtmp); + free(resOOB); + free(in); + free(nodex); + free(varUsed); + if (!(*replace) ) + free(nind); + + if (testdat) { + free(ytree); + free(nodexts); + } + + if (doProx && oobprox) + free(oobpair) ; +} + +/*----------------------------------------------------------------------*/ +void regForest(double *x, double *ypred, int *mdim, int *n, + int *ntree, int *lDaughter, int *rDaughter, + SMALL_INT *nodestatus, int *nrnodes, double *xsplit, + double *avnodes, int *mbest, int *treeSize, int *cat, + int maxcat, int *keepPred, double *allpred, int doProx, + double *proxMat, int *nodes, int *nodex) { + int i, j, idx1, idx2, *junk; + double *ytree; + + junk = NULL; + ytree = (double *) calloc(*n, sizeof(double)); + if (*nodes) { + zeroInt(nodex, *n * *ntree); + } else { + zeroInt(nodex, *n); + } + if (doProx) zeroDouble(proxMat, *n * *n); + if (*keepPred) zeroDouble(allpred, *n * *ntree); + idx1 = 0; + idx2 = 0; + for (i = 0; i < *ntree; ++i) { + zeroDouble(ytree, *n); + predictRegTree(x, *n, *mdim, lDaughter + idx1, rDaughter + idx1, + nodestatus + idx1, ytree, xsplit + idx1, + avnodes + idx1, mbest + idx1, treeSize[i], cat, maxcat, + nodex + idx2); + + for (j = 0; j < *n; ++j) ypred[j] += ytree[j]; + if (*keepPred) { + for (j = 0; j < *n; ++j) allpred[j + i * *n] = ytree[j]; + } + /* if desired, do proximities for this round */ + if (doProx) computeProximity(proxMat, 0, nodex + idx2, junk, + junk, *n); + idx1 += *nrnodes; /* increment the offset */ + if (*nodes) idx2 += *n; + } + for (i = 0; i < *n; ++i) ypred[i] /= *ntree; + if (doProx) { + for (i = 0; i < *n; ++i) { + for (j = i + 1; j < *n; ++j) { + proxMat[i + j * *n] /= *ntree; + proxMat[j + i * *n] = proxMat[i + j * *n]; + } + proxMat[i + i * *n] = 1.0; + } + } + free(ytree); +} + +void simpleLinReg(int nsample, double *x, double *y, double *coef, + double *mse, int *hasPred) { + /* Compute simple linear regression of y on x, returning the coefficients, + the average squared residual, and the predicted values (overwriting y). */ + int i, nout = 0; + double sxx=0.0, sxy=0.0, xbar=0.0, ybar=0.0; + double dx = 0.0, dy = 0.0, py=0.0; + + for (i = 0; i < nsample; ++i) { + if (hasPred[i]) { + nout++; + xbar += x[i]; + ybar += y[i]; + } + } + xbar /= nout; + ybar /= nout; + + for (i = 0; i < nsample; ++i) { + if (hasPred[i]) { + dx = x[i] - xbar; + dy = y[i] - ybar; + sxx += dx * dx; + sxy += dx * dy; + } + } + coef[1] = sxy / sxx; + coef[0] = ybar - coef[1] * xbar; + + *mse = 0.0; + for (i = 0; i < nsample; ++i) { + if (hasPred[i]) { + py = coef[0] + coef[1] * x[i]; + dy = y[i] - py; + *mse += dy * dy; + /* y[i] = py; */ + } + } + *mse /= nout; + return; +} + + +void regTree(double *x, double *y, int mdim, int nsample, int *lDaughter, + int *rDaughter, + double *upper, double *avnode, SMALL_INT *nodestatus, int nrnodes, + int *treeSize, int nthsize, int mtry, int *mbest, int *cat, + double *tgini, int *varUsed) { + int i, j, k, m, ncur; + static int *jdex, *nodestart, *nodepop; + int ndstart, ndend, ndendl, nodecnt, jstat, msplit; + double d, ss, av, decsplit, ubest, sumnode; + + if (in_regTree==-99){ + free(nodestart); + free(jdex); + free(nodepop); +// PRINTF("giving up mem in in_regTree\n"); + return; + } + + if (in_regTree==0){ + in_regTree=1; + nodestart = (int *) calloc(nrnodes, sizeof(int)); + nodepop = (int *) calloc(nrnodes, sizeof(int)); + jdex = (int *) calloc(nsample, sizeof(int)); + } + + /* initialize some arrays for the tree */ + zeroSMALLInt(nodestatus, nrnodes); + zeroInt(nodestart, nrnodes); + zeroInt(nodepop, nrnodes); + zeroDouble(avnode, nrnodes); + + for (i = 1; i <= nsample; ++i) jdex[i-1] = i; + + ncur = 0; + nodestart[0] = 0; + nodepop[0] = nsample; + nodestatus[0] = NODE_TOSPLIT; + + /* compute mean and sum of squares for Y */ + av = 0.0; + ss = 0.0; + for (i = 0; i < nsample; ++i) { + d = y[jdex[i] - 1]; + ss += i * (av - d) * (av - d) / (i + 1); + av = (i * av + d) / (i + 1); + } + avnode[0] = av; + + /* start main loop */ + for (k = 0; k < nrnodes - 2; ++k) { + if (k > ncur || ncur >= nrnodes - 2) break; + /* skip if the node is not to be split */ + if (nodestatus[k] != NODE_TOSPLIT) continue; + + /* initialize for next call to findbestsplit */ + ndstart = nodestart[k]; + ndend = ndstart + nodepop[k] - 1; + nodecnt = nodepop[k]; + sumnode = nodecnt * avnode[k]; + jstat = 0; + decsplit = 0.0; + + findBestSplit(x, jdex, y, mdim, nsample, ndstart, ndend, &msplit, + &decsplit, &ubest, &ndendl, &jstat, mtry, sumnode, + nodecnt, cat); + if (jstat == 1) { + /* Node is terminal: Mark it as such and move on to the next. */ + nodestatus[k] = NODE_TERMINAL; + continue; + } + /* Found the best split. */ + mbest[k] = msplit; + varUsed[msplit - 1] = 1; + upper[k] = ubest; + tgini[msplit - 1] += decsplit; + nodestatus[k] = NODE_INTERIOR; + + /* leftnode no.= ncur+1, rightnode no. = ncur+2. */ + nodepop[ncur + 1] = ndendl - ndstart + 1; + nodepop[ncur + 2] = ndend - ndendl; + nodestart[ncur + 1] = ndstart; + nodestart[ncur + 2] = ndendl + 1; + + /* compute mean and sum of squares for the left daughter node */ + av = 0.0; + ss = 0.0; + for (j = ndstart; j <= ndendl; ++j) { + d = y[jdex[j]-1]; + m = j - ndstart; + ss += m * (av - d) * (av - d) / (m + 1); + av = (m * av + d) / (m+1); + } + avnode[ncur+1] = av; + nodestatus[ncur+1] = NODE_TOSPLIT; + if (nodepop[ncur + 1] <= nthsize) { + nodestatus[ncur + 1] = NODE_TERMINAL; + } + + /* compute mean and sum of squares for the right daughter node */ + av = 0.0; + ss = 0.0; + for (j = ndendl + 1; j <= ndend; ++j) { + d = y[jdex[j]-1]; + m = j - (ndendl + 1); + ss += m * (av - d) * (av - d) / (m + 1); + av = (m * av + d) / (m + 1); + } + avnode[ncur + 2] = av; + nodestatus[ncur + 2] = NODE_TOSPLIT; + if (nodepop[ncur + 2] <= nthsize) { + nodestatus[ncur + 2] = NODE_TERMINAL; + } + + /* map the daughter nodes */ + lDaughter[k] = ncur + 1 + 1; + rDaughter[k] = ncur + 2 + 1; + /* Augment the tree by two nodes. */ + ncur += 2; + } + *treeSize = nrnodes; + for (k = nrnodes - 1; k >= 0; --k) { + if (nodestatus[k] == 0) (*treeSize)--; + if (nodestatus[k] == NODE_TOSPLIT) { + nodestatus[k] = NODE_TERMINAL; + } + } + +} + +/*--------------------------------------------------------------*/ + +void findBestSplit(double *x, int *jdex, double *y, int mdim, int nsample, + int ndstart, int ndend, int *msplit, double *decsplit, + double *ubest, int *ndendl, int *jstat, int mtry, + double sumnode, int nodecnt, int *cat) { + int last, ncat[32], icat[32], lc, nl, nr, npopl, npopr; + int i, j, kv, l; + static int *mind, *ncase; + static double *xt, *ut, *v, *yl; + double sumcat[32], avcat[32], tavcat[32], ubestt; + double crit, critmax, critvar, suml, sumr, d, critParent; + + + if (in_findBestSplit==-99){ + free(ncase); + free(mind); //had to remove this so that it wont crash for when mdim=0, strangely happened for replace=0 + free(v); + free(yl); + free(xt); + free(ut); + // PRINTF("giving up mem in findBestSplit\n"); + return; + } + + if (in_findBestSplit==0){ + in_findBestSplit=1; + ut = (double *) calloc(nsample, sizeof(double)); + xt = (double *) calloc(nsample, sizeof(double)); + v = (double *) calloc(nsample, sizeof(double)); + yl = (double *) calloc(nsample, sizeof(double)); + mind = (int *) calloc(mdim+1, sizeof(int)); //seems that the sometimes i am asking for kv[10] and that causes problesmms + //so allocate 1 more. helps with not crashing in windows + ncase = (int *) calloc(nsample, sizeof(int)); + } + zeroDouble(ut, nsample); + zeroDouble(xt, nsample); + zeroDouble(v, nsample); + zeroDouble(yl, nsample); + zeroInt(mind, mdim); + zeroInt(ncase, nsample); + + zeroDouble(avcat, 32); + zeroDouble(tavcat, 32); + + /* START BIG LOOP */ + *msplit = -1; + *decsplit = 0.0; + critmax = 0.0; + ubestt = 0.0; + for (i=0; i < mdim; ++i) mind[i] = i; + + last = mdim - 1; + for (i = 0; i < mtry; ++i) { + critvar = 0.0; + j = (int) (unif_rand() * (last+1)); + //PRINTF("j=%d, last=%d mind[j]=%d\n", j, last, mind[j]);fflush(stdout); + kv = mind[j]; + //if(kv>100){ + // 1; + // getchar(); + //} + swapInt(mind[j], mind[last]); + /* mind[j] = mind[last]; + * mind[last] = kv; */ + last--; + + lc = cat[kv]; + if (lc == 1) { + /* numeric variable */ + for (j = ndstart; j <= ndend; ++j) { + xt[j] = x[kv + (jdex[j] - 1) * mdim]; + yl[j] = y[jdex[j] - 1]; + } + } else { + /* categorical variable */ + zeroInt(ncat, 32); + zeroDouble(sumcat, 32); + for (j = ndstart; j <= ndend; ++j) { + l = (int) x[kv + (jdex[j] - 1) * mdim]; + sumcat[l - 1] += y[jdex[j] - 1]; + ncat[l - 1] ++; + } + /* Compute means of Y by category. */ + for (j = 0; j < lc; ++j) { + avcat[j] = ncat[j] ? sumcat[j] / ncat[j] : 0.0; + } + /* Make the category mean the `pseudo' X data. */ + for (j = 0; j < nsample; ++j) { + xt[j] = avcat[(int) x[kv + (jdex[j] - 1) * mdim] - 1]; + yl[j] = y[jdex[j] - 1]; + } + } + /* copy the x data in this node. */ + for (j = ndstart; j <= ndend; ++j) v[j] = xt[j]; + for (j = 1; j <= nsample; ++j) ncase[j - 1] = j; + R_qsort_I(v, ncase, ndstart + 1, ndend + 1); + if (v[ndstart] >= v[ndend]) continue; + /* ncase(n)=case number of v nth from bottom */ + /* Start from the right and search to the left. */ + critParent = sumnode * sumnode / nodecnt; + suml = 0.0; + sumr = sumnode; + npopl = 0; + npopr = nodecnt; + crit = 0.0; + /* Search through the "gaps" in the x-variable. */ + for (j = ndstart; j <= ndend - 1; ++j) { + d = yl[ncase[j] - 1]; + suml += d; + sumr -= d; + npopl++; + npopr--; + if (v[j] < v[j+1]) { + crit = (suml * suml / npopl) + (sumr * sumr / npopr) - + critParent; + if (crit > critvar) { + ubestt = (v[j] + v[j+1]) / 2.0; + critvar = crit; + } + } + } + if (critvar > critmax) { + *ubest = ubestt; + *msplit = kv + 1; + critmax = critvar; + for (j = ndstart; j <= ndend; ++j) { + ut[j] = xt[j]; + } + if (cat[kv] > 1) { + for (j = 0; j < cat[kv]; ++j) tavcat[j] = avcat[j]; + } + } + } + *decsplit = critmax; + + /* If best split can not be found, set to terminal node and return. */ + if (*msplit != -1) { + nl = ndstart; + for (j = ndstart; j <= ndend; ++j) { + if (ut[j] <= *ubest) { + nl++; + ncase[nl-1] = jdex[j]; + } + } + *ndendl = imax2(nl - 1, ndstart); + nr = *ndendl + 1; + for (j = ndstart; j <= ndend; ++j) { + if (ut[j] > *ubest) { + if (nr >= nsample) break; + nr++; + ncase[nr - 1] = jdex[j]; + } + } + if (*ndendl >= ndend) *ndendl = ndend - 1; + for (j = ndstart; j <= ndend; ++j) jdex[j] = ncase[j]; + + lc = cat[*msplit - 1]; + if (lc > 1) { + for (j = 0; j < lc; ++j) { + icat[j] = (tavcat[j] < *ubest) ? 1 : 0; + } + *ubest = pack(lc, icat); + } + } else *jstat = 1; + +} +/*====================================================================*/ +void predictRegTree(double *x, int nsample, int mdim, + int *lDaughter, int *rDaughter, SMALL_INT *nodestatus, + double *ypred, double *split, double *nodepred, + int *splitVar, int treeSize, int *cat, int maxcat, + int *nodex) { + int i, j, k, m, *cbestsplit; + unsigned int npack; + + /* decode the categorical splits */ + if (maxcat > 1) { + cbestsplit = (int *) calloc(maxcat * treeSize, sizeof(int)); + zeroInt(cbestsplit, maxcat * treeSize); + for (i = 0; i < treeSize; ++i) { + if (nodestatus[i] != NODE_TERMINAL && cat[splitVar[i] - 1] > 1) { + npack = (unsigned int) split[i]; + /* unpack `npack' into bits */ + for (j = 0; npack; npack >>= 1, ++j) { + cbestsplit[j + i*maxcat] = npack & 1; + } + } + } + } + + for (i = 0; i < nsample; ++i) { + k = 0; + while (nodestatus[k] != NODE_TERMINAL) { /* go down the tree */ + m = splitVar[k] - 1; + if (cat[m] == 1) { + k = (x[m + i*mdim] <= split[k]) ? + lDaughter[k] - 1 : rDaughter[k] - 1; + } else { + /* Split by a categorical predictor */ + k = cbestsplit[(int) x[m + i * mdim] - 1 + k * maxcat] ? + lDaughter[k] - 1 : rDaughter[k] - 1; + } + } + /* terminal node: assign prediction and move on to next */ + ypred[i] = nodepred[k]; + nodex[i] = k + 1; + } + if (maxcat > 1) free(cbestsplit); +} + + + +void print_regRF_params( int *xdim, int *sampsize, + int *nthsize, int *nrnodes, int *nTree, int *mtry, int *imp, + int *cat, int maxcat, int *jprint, int doProx, int oobprox, + int biasCorr, double *yptr, double *errimp, double *impmat, + double *impSD, double *prox, int *treeSize, SMALL_INT *nodestatus, + int *lDaughter, int *rDaughter, double *avnode, int *mbest, + double *upper, double *mse, int *keepf, int *replace, + int testdat, double *xts, int *nts, double *yts, int labelts, + double *yTestPred, double *proxts, double *msets, double *coef, + int *nout, int *inbag) { + PRINTF("n_size %d p_size %d\n", xdim[0], xdim[1]); + PRINTF("sampsize %d, nodesize %d nrnodes %d\n", *sampsize, *nthsize, *nrnodes); + PRINTF("ntree %d, mtry/nvar %d, impor %d, localimp %d, nPerm %d\n", *nTree, *mtry, imp[0], imp[1], imp[2]); + PRINTF("maxcat %d, jprint %d, doProx %d, oobProx %d, biasCorr %d\n", maxcat, *jprint, doProx, oobprox, biasCorr); + PRINTF("prox %f, keep.forest %d, keep.inbag %d\n", *prox, keepf[0], keepf[1]); + PRINTF("replace %d, labelts %d, proxts %f\n", *replace, labelts, *proxts); +} diff --git a/randomforest-matlab/RF_Reg_C/src/reg_RF.h b/randomforest-matlab/RF_Reg_C/src/reg_RF.h new file mode 100644 index 0000000..e38cb1b --- /dev/null +++ b/randomforest-matlab/RF_Reg_C/src/reg_RF.h @@ -0,0 +1,19 @@ +/************************************************************** + * mex interface to Andy Liaw et al.'s C code (used in R package randomForest) + * Added by Abhishek Jaiantilal ( abhishek.jaiantilal@colorado.edu ) + * License: GPLv2 + * Version: 0.02 + * + * Supporting file that has some declarations. + *************************************************************/ + +#define uint32 unsigned long +#define SMALL_INT char + +#ifdef MATLAB +#define SMALL_INT_CLASS mxCHAR_CLASS //will be used to allocate memory t +#endif + +void seedMT(uint32 seed); +uint32 randomMT(void); + diff --git a/randomforest-matlab/RF_Reg_C/test_RegRF_extensively.m b/randomforest-matlab/RF_Reg_C/test_RegRF_extensively.m new file mode 100644 index 0000000..b764913 --- /dev/null +++ b/randomforest-matlab/RF_Reg_C/test_RegRF_extensively.m @@ -0,0 +1,48 @@ +%************************************************************** +%* Test of mex interface to Andy Liaw et al.'s C code (used in R package randomForest) +%* Added by Abhishek Jaiantilal ( abhishek.jaiantilal@colorado.edu ) +%* License: GPLv2 +%* Version: 0.02 +% +% This file runs tests about 10 times on the diabetes dataset +% +%************************************************************** + +%compile everything +if strcmpi(computer,'PCWIN') |strcmpi(computer,'PCWIN64') + compile_windows +else + compile_linux +end + +load data/diabetes + +%diabetes, N=442, D=10 +total_train_time=0; +total_test_time=0; +for i=1:10 + fprintf('%d,',i); + tic; + model=regRF_train(diabetes.x,diabetes.y,1000); + total_train_time=toc; + tic; + y_hat =regRF_predict(diabetes.x,model); + total_test_time=total_test_time+toc; +end +fprintf('\nnum_tree %d: Avg train time %d, test time %d\n',1000,total_train_time/100,total_test_time/100); + +%diabetes, N=442, D=64 +%this second version of the diabetes dataset has 64 dimensions rather than +%10 +total_train_time=0; +total_test_time=0; +for i=1:1 + fprintf('%d,',i); + tic; + model=regRF_train(diabetes.x2,diabetes.y,1000); + total_train_time=total_train_time+toc; + tic; + y_hat =regRF_predict(diabetes.x2,model); + total_test_time=total_test_time+toc; +end +fprintf('\nnum_tree %d: Avg train time %d, test time %d\n',1000,total_train_time/100,total_test_time/100); diff --git a/randomforest-matlab/RF_Reg_C/tutorial_RegRF.m b/randomforest-matlab/RF_Reg_C/tutorial_RegRF.m new file mode 100644 index 0000000..13abbab --- /dev/null +++ b/randomforest-matlab/RF_Reg_C/tutorial_RegRF.m @@ -0,0 +1,237 @@ +% A simple tutorial file to interface with RF +% Options copied from http://cran.r-project.org/web/packages/randomForest/randomForest.pdf + +%run plethora of tests +clc +close all + +%compile everything +if strcmpi(computer,'PCWIN') |strcmpi(computer,'PCWIN64') + compile_windows +else + compile_linux +end + +total_train_time=0; +total_test_time=0; + +%diabetes +load data/diabetes + +%modify so that training data is NxD and labels are Nx1, where N=#of +%examples, D=# of features + +X = diabetes.x; +Y = diabetes.y; + +[N D] =size(X); +%randomly split into 400 examples for training and 42 for testing +randvector = randperm(N); + +X_trn = X(randvector(1:400),:); +Y_trn = Y(randvector(1:400)); +X_tst = X(randvector(401:end),:); +Y_tst = Y(randvector(401:end)); + + + +% example 1: simply use with the defaults + model = regRF_train(X_trn,Y_trn); + Y_hat = regRF_predict(X_tst,model); + fprintf('\nexample 1: MSE rate %f\n', sum((Y_hat-Y_tst).^2)); + +% % example 2: set to 100 trees +% model = regRF_train(X_trn,Y_trn, 100); +% Y_hat = regRF_predict(X_tst,model); +% fprintf('\nexample 2: MSE rate %f\n', sum((Y_hat-Y_tst).^2)); +% +% % example 3: set to 100 trees, mtry = 2 +% model = regRF_train(X_trn,Y_trn, 100,2); +% Y_hat = regRF_predict(X_tst,model); +% fprintf('\nexample 3: MSE rate %f\n', sum((Y_hat-Y_tst).^2)); +% +% % example 4: set to defaults trees and mtry by specifying values as 0 +% model = regRF_train(X_trn,Y_trn, 0, 0); +% Y_hat = regRF_predict(X_tst,model); +% fprintf('\nexample 4: MSE rate %f\n', sum((Y_hat-Y_tst).^2)); +% +% % % example 5: set sampling without replacement (default is with replacement) +% extra_options.replace = 0 ; +% model = regRF_train(X_trn,Y_trn, 100, 4, extra_options); +% Y_hat = regRF_predict(X_tst,model); +% fprintf('\nexample 5: MSE rate %f\n', sum((Y_hat-Y_tst).^2)); +% +% % example 6: sampsize example +% % extra_options.sampsize = Size(s) of sample to draw. For classification, +% % if sampsize is a vector of the length the number of strata, then sampling is stratified by strata, +% % and the elements of sampsize indicate the numbers to be drawn from the strata. +% clear extra_options +% extra_options.sampsize = size(X_trn,1)*2/3; +% +% model = regRF_train(X_trn,Y_trn, 100, 4, extra_options); +% Y_hat = regRF_predict(X_tst,model); +% fprintf('\nexample 6: MSE rate %f\n', sum((Y_hat-Y_tst).^2)); +% +% % example 7: nodesize +% % extra_options.nodesize = Minimum size of terminal nodes. Setting this number larger causes smaller trees +% % to be grown (and thus take less time). Note that the default values are different +% % for classification (1) and regression (5). +% clear extra_options +% extra_options.nodesize = 7; +% +% model = regRF_train(X_trn,Y_trn, 100, 4, extra_options); +% Y_hat = regRF_predict(X_tst,model); +% fprintf('\nexample 7: MSE rate %f\n', sum((Y_hat-Y_tst).^2)); +% +% +% % example 8: calculating importance +% clear extra_options +% extra_options.importance = 1; %(0 = (Default) Don't, 1=calculate) +% +% model = regRF_train(X_trn,Y_trn, 100, 4, extra_options); +% Y_hat = regRF_predict(X_tst,model); +% fprintf('\nexample 8: MSE rate %f\n', sum((Y_hat-Y_tst).^2)); +% +% %model will have 3 variables for importance importanceSD and localImp +% %importance = a matrix with nclass + 2 (for classification) or two (for regression) columns. +% % For classification, the first nclass columns are the class-specific measures +% % computed as mean decrease in accuracy. The nclass + 1st column is the +% % mean decrease in accuracy over all classes. The last column is the mean decrease +% % in Gini index. For Regression, the first column is the mean decrease in +% % accuracy and the second the mean decrease in MSE. If importance=FALSE, +% % the last measure is still returned as a vector. +% figure('Name','Importance Plots') +% subplot(3,1,1); +% bar(model.importance(:,end-1));xlabel('feature');ylabel('magnitude'); +% title('Mean decrease in Accuracy'); +% +% subplot(3,1,2); +% bar(model.importance(:,end));xlabel('feature');ylabel('magnitude'); +% title('Mean decrease in Gini index'); +% +% +% %importanceSD = The ?standard errors? of the permutation-based importance measure. For classification, +% % a D by nclass + 1 matrix corresponding to the first nclass + 1 +% % columns of the importance matrix. For regression, a length p vector. +% model.importanceSD +% subplot(3,1,3); +% bar(model.importanceSD);xlabel('feature');ylabel('magnitude'); +% title('Std. errors of importance measure'); +% +% % example 9: calculating local importance +% % extra_options.localImp = Should casewise importance measure be computed? (Setting this to TRUE will +% % override importance.) +% %localImp = a D by N matrix containing the casewise importance measures, the [i,j] element +% % of which is the importance of i-th variable on the j-th case. NULL if +% % localImp=FALSE. +% clear extra_options +% extra_options.localImp = 1; %(0 = (Default) Don't, 1=calculate) +% +% model = regRF_train(X_trn,Y_trn, 100, 4, extra_options); +% Y_hat = regRF_predict(X_tst,model); +% fprintf('\nexample 9: MSE rate %f\n', sum((Y_hat-Y_tst).^2)); +% +% model.localImp +% +% % example 10: calculating proximity +% % extra_options.proximity = Should proximity measure among the rows be calculated? +% clear extra_options +% extra_options.proximity = 1; %(0 = (Default) Don't, 1=calculate) +% +% model = regRF_train(X_trn,Y_trn, 100, 4, extra_options); +% Y_hat = regRF_predict(X_tst,model); +% fprintf('\nexample 10: MSE rate %f\n', sum((Y_hat-Y_tst).^2)); +% +% model.proximity +% +% +% % example 11: use only OOB for proximity +% % extra_options.oob_prox = Should proximity be calculated only on 'out-of-bag' data? +% clear extra_options +% extra_options.proximity = 1; %(0 = (Default) Don't, 1=calculate) +% extra_options.oob_prox = 0; %(Default = 1 if proximity is enabled, Don't 0) +% +% model = regRF_train(X_trn,Y_trn, 100, 4, extra_options); +% Y_hat = regRF_predict(X_tst,model); +% fprintf('\nexample 11: MSE rate %f\n', sum((Y_hat-Y_tst).^2)); +% +% +% % example 12: to see what is going on behind the scenes +% % extra_options.do_trace = If set to TRUE, give a more verbose output as randomForest is run. If set to +% % some integer, then running output is printed for every +% % do_trace trees. +% clear extra_options +% extra_options.do_trace = 1; %(Default = 0) +% +% model = regRF_train(X_trn,Y_trn, 100, 4, extra_options); +% Y_hat = regRF_predict(X_tst,model); +% fprintf('\nexample 12: MSE rate %f\n', sum((Y_hat-Y_tst).^2)); +% +% % example 13: to see what is going on behind the scenes +% % extra_options.keep_inbag Should an n by ntree matrix be returned that keeps track of which samples are +% % 'in-bag' in which trees (but not how many times, if sampling with replacement) +% +% clear extra_options +% extra_options.keep_inbag = 1; %(Default = 0) +% +% model = regRF_train(X_trn,Y_trn, 100, 4, extra_options); +% Y_hat = regRF_predict(X_tst,model); +% fprintf('\nexample 13: MSE rate %f\n', sum((Y_hat-Y_tst).^2)); +% +% model.inbag +% +% +% % example 14: getting the OOB MSE rate. model will have mse field +% model = regRF_train(X_trn,Y_trn); +% Y_hat = regRF_predict(X_tst,model); +% fprintf('\nexample 14: MSE rate %f\n', sum((Y_hat-Y_tst).^2)); +% +% figure('Name','OOB error rate'); +% plot(model.mse); title('OOB MSE error rate'); xlabel('iteration (# trees)'); ylabel('OOB error rate'); +% +% % +% % example 15: nPerm +% % Number of times the OOB data are permuted per tree for assessing variable +% % importance. Number larger than 1 gives slightly more stable estimate, but not +% % very effective. Currently only implemented for regression. +% clear extra_options +% extra_options.importance=1; +% extra_options.nPerm = 1; %(Default = 0) +% model = regRF_train(X_trn,Y_trn,100,2,extra_options); +% Y_hat = regRF_predict(X_tst,model); +% fprintf('\nexample 15: MSE rate %f\n', sum((Y_hat-Y_tst).^2)); +% +% figure('Name','Importance Plots nPerm=1') +% subplot(2,1,1); +% bar(model.importance(:,end-1));xlabel('feature');ylabel('magnitude'); +% title('Mean decrease in Accuracy'); +% +% subplot(2,1,2); +% bar(model.importance(:,end));xlabel('feature');ylabel('magnitude'); +% title('Mean decrease in Gini index'); +% +% %let's now run with nPerm=3 +% clear extra_options +% extra_options.importance=1; +% extra_options.nPerm = 3; %(Default = 0) +% model = regRF_train(X_trn,Y_trn,100,2,extra_options); +% Y_hat = regRF_predict(X_tst,model); +% fprintf('\nexample 15: MSE rate %f\n', sum((Y_hat-Y_tst).^2)); +% +% figure('Name','Importance Plots nPerm=3') +% subplot(2,1,1); +% bar(model.importance(:,end-1));xlabel('feature');ylabel('magnitude'); +% title('Mean decrease in Accuracy'); +% +% subplot(2,1,2); +% bar(model.importance(:,end));xlabel('feature');ylabel('magnitude'); +% title('Mean decrease in Gini index'); +% +% % example 16: corr_bias (not recommended to use) +% clear extra_options +% extra_options.corr_bias=1; +% model = regRF_train(X_trn,Y_trn,100,2,extra_options); +% Y_hat = regRF_predict(X_tst,model); +% fprintf('\nexample 16: MSE rate %f\n', sum((Y_hat-Y_tst).^2)); + + \ No newline at end of file diff --git a/segmentation/APPgetSpStats.m b/segmentation/APPgetSpStats.m new file mode 100644 index 0000000..62dc6e9 --- /dev/null +++ b/segmentation/APPgetSpStats.m @@ -0,0 +1,83 @@ +function imsegs = APPgetSpStats(imsegs) +% imsegs = APPgetSpStats(imsegs) +% Gets basic information about the superpixels +% +% Copyright(C) Derek Hoiem, Carnegie Mellon University, 2005 +% Current Version: 1.0 09/30/2005 + +for ii = 1:length(imsegs) + + nseg = imsegs(ii).nseg; + segimage = double( imsegs(ii).segimage ); +% segimage = imsegs(ii).segimage; + + imh = size(segimage, 1); + + adjmat = eye([nseg nseg]); + + % get adjacency + dx = segimage ~= segimage(:,[2:end end]); + dy = segimage ~= segimage([2:end end], :); + + ind1 = find(dy); + ind2 = ind1 + 1; + s1 = segimage(ind1); + s2 = segimage(ind2); +% adjmat(s1 + nseg*(s2-1)) = 1; +% adjmat(s2 + nseg*(s1-1)) = 1; + adjmat(sub2ind([nseg, nseg], s1, s2)) = 1; + adjmat(sub2ind([nseg, nseg], s2, s1)) = 1; + + ind3 = find(dx); + ind4 = ind3 + imh; + s3 = segimage(ind3); + s4 = segimage(ind4); +% adjmat(s3 + nseg*(s4-1)) = 1; +% adjmat(s4 + nseg*(s3-1)) = 1; + adjmat(sub2ind([nseg, nseg], s3, s4)) = 1; + adjmat(sub2ind([nseg, nseg], s4, s3)) = 1; + + +% slower code +% [height, width] = size(segimage); +% +% for y = 1:height-1 +% for x = 1:width-1 +% s1 = segimage(y, x); +% s2 = segimage(y+1, x); +% s3 = segimage(y, x+1); +% if s1 > 0 +% npixels(s1) = npixels(s1) + 1; +% if s2 > 0 +% adjmat(s1, s2) = 1; +% adjmat(s2, s1) = 1; +% end +% if s3 > 0 +% adjmat(s1, s3) = 1; +% adjmat(s3, s1) = 1; +% end +% end +% end +% end +% +% x = width; +% for y = 1:height +% s1 = segimage(y, x); +% if s1 > 0 +% npixels(s1) = npixels(s1) + 1; +% end +% end +% +% y = height; +% for x = 1:width-1 +% s1 = segimage(y, x); +% if s1 > 0 +% npixels(s1) = npixels(s1) + 1; +% end +% end + + stats = regionprops(segimage, 'Area'); + imsegs(ii).npixels = vertcat(stats(:).Area); + imsegs(ii).adjmat = logical(adjmat); + +end diff --git a/segmentation/im2superpixels.m b/segmentation/im2superpixels.m new file mode 100644 index 0000000..a546f6b --- /dev/null +++ b/segmentation/im2superpixels.m @@ -0,0 +1,52 @@ +function imsegs = im2superpixels(im, method, varargin ) + if nargin < 3 + % default parameters to generate superpixels + switch method + case 'pedro' + sigma = 0.8; + k = 100; + min_size = 150; + case 'SLIC' + num_superpixel = 200; + otherwise + error( 'unknown method to generate superpixels.' ); + end + else + switch method + case 'pedro' + para = varargin{1}; + sigma = para(1); + k = para(2); + min_size = para(3); + case 'SLIC' + num_superpixel = varargin{1}; + otherwise + error( 'unknown method to generate superpixels.' ); + end + end + +% prefix = num2str(floor(rand(1)*10000000)); +% fn1 = ['./tmpim' prefix '.ppm']; +% fn2 = ['./tmpimsp' prefix '.ppm']; +% segcmd = ['E:\playerkk\code\MATLAB\segment\segment ', num2str(seg_para(1)),... +% ' ', num2str(seg_para(2)), ' ', num2str(seg_para(3))]; +% +% imwrite(im, fn1); +% system([segcmd ' ' fn1 ' ' fn2]); + if isa(im, 'uint8') + im = double(im); + end + + if max(im(:)) < 10 + im = double(im * 255); + end + + switch method + case 'pedro' + segim = mexSegment(im, sigma, k, int32(min_size)); + case 'SLIC' + segim = uint8(mexSLIC(uint32(im), num_superpixel)); + otherwise + error( 'unknown method to generate superpixels.' ); + end + imsegs = processSuperpixelImage(segim); diff --git a/segmentation/mcmcGetSuperpixelBoundaries_fast.m b/segmentation/mcmcGetSuperpixelBoundaries_fast.m new file mode 100644 index 0000000..b124ada --- /dev/null +++ b/segmentation/mcmcGetSuperpixelBoundaries_fast.m @@ -0,0 +1,47 @@ +function [boundmap, perim] = mcmcGetSuperpixelBoundaries_fast(imsegs) +% boundmap{nimages}{nseg, nseg}(npixidx) - boundaries between pairs of segs +% perim{nimages}(nseg, nseg) - number of pixels in boundary +% nimages == 1, boundmap{nseg, nseg}(npixidx), perim(nseg, nseg) + +for f = 1:numel(imsegs) + + nseg = imsegs(f).nseg; + segimage = double(imsegs(f).segimage); + [imh, imw] = size(segimage); + + % get adjacency + dx = segimage ~= segimage(:,[2:end end]); + dy = segimage ~= segimage([2:end end], :); + + ind1 = find(dy); + ind2 = ind1 + 1; + s1 = segimage(ind1); + s2 = segimage(ind2); + ind3 = find(dx); + ind4 = ind3 + imh; + s3 = segimage(ind3); + s4 = segimage(ind4); + + % get boundaries + ind = [ind1 ; ind3]; + s12 = [[min([s1 s2], [], 2) max([s1 s2], [], 2)] ; ... + [min([s3 s4], [], 2) max([s3 s4], [], 2)]]; + perim{f} = zeros(nseg, nseg, 'uint16'); + s1 = s12(:, 1); s2 = s12(:, 2); + for k = 1:numel(ind) + perim{f}(s1(k), s2(k)) = perim{f}(s1(k), s2(k))+1; + end + bndind = find(perim{f}>0); + boundmap{f}= cell(nseg, nseg); + for k = bndind' + ts1 = mod(k-1, nseg)+1; + ts2 = floor((k-1)/nseg)+1; + boundmap{f}{k} = ind(s1==ts1 & s2==ts2); + end + +end + +if f==1 + boundmap = boundmap{1}; + perim = perim{1}; +end diff --git a/segmentation/processSuperpixelImage.m b/segmentation/processSuperpixelImage.m new file mode 100644 index 0000000..51cfd65 --- /dev/null +++ b/segmentation/processSuperpixelImage.m @@ -0,0 +1,33 @@ +function imsegs = processSuperpixelImage(fn) +% imsegs = processSuperpixelImage(fn) +% Creates the imsegs structure from a segmentation image +% +% INPUT: +% fn - filenames of segmentation images. Use '/' (not '\') to separate directories. +% Segments are denoted by different RGB colors. +% +% OUTPUT: +% imsegs - image segmentation data +% +% Copyright(C) Derek Hoiem, Carnegie Mellon University, 2006 +% Current Version: 1.0 04/24/2006 + +% if isstr(fn) +% fn = {fn}; +% end + +fn = {fn}; + +imsegs(length(fn)) = struct('imname', '', 'imsize', [0 0]); +for f = 1:length(fn) + im = double(fn{f}); + + imsegs(f).imname = 'empty'; + imsegs(f).imsize = size(im); + imsegs(f).imsize = imsegs(f).imsize(1:2); + im = im(:, :, 1) + im(:, :, 2)*256 + im(:, :, 3)*256^2; + [gid, gn] = grp2idx(im(:)); + imsegs(f).segimage = uint16(reshape(gid, imsegs(f).imsize)); + imsegs(f).nseg = length(gn); +end +imsegs = APPgetSpStats(imsegs); \ No newline at end of file diff --git a/segmentation/segment/COPYING b/segmentation/segment/COPYING new file mode 100644 index 0000000..82fa1da --- /dev/null +++ b/segmentation/segment/COPYING @@ -0,0 +1,339 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Lesser General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + , 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. diff --git a/segmentation/segment/Makefile b/segmentation/segment/Makefile new file mode 100644 index 0000000..e23c688 --- /dev/null +++ b/segmentation/segment/Makefile @@ -0,0 +1,23 @@ +INCDIR = -I. +DBG = -g +OPT = -O3 +CPP = g++ +CFLAGS = $(DBG) $(OPT) $(INCDIR) +LINK = -lm + +.cpp.o: + $(CPP) $(CFLAGS) -c $< -o $@ + +all: segment + +segment: segment.cpp segment-image.h segment-graph.h disjoint-set.h + $(CPP) $(CFLAGS) -o segment segment.cpp $(LINK) + +clean: + /bin/rm -f segment *.o + +clean-all: clean + /bin/rm -f *~ + + + diff --git a/segmentation/segment/README b/segmentation/segment/README new file mode 100644 index 0000000..310c71f --- /dev/null +++ b/segmentation/segment/README @@ -0,0 +1,25 @@ + +Implementation of the segmentation algorithm described in: + +Efficient Graph-Based Image Segmentation +Pedro F. Felzenszwalb and Daniel P. Huttenlocher +International Journal of Computer Vision, 59(2) September 2004. + +The program takes a color image (PPM format) and produces a segmentation +with a random color assigned to each region. + +1) Type "make" to compile "segment". + +2) Run "segment sigma k min input output". + +The parameters are: (see the paper for details) + +sigma: Used to smooth the input image before segmenting it. +k: Value for the threshold function. +min: Minimum component size enforced by post-processing. +input: Input image. +output: Output image. + +Typical parameters are sigma = 0.5, k = 500, min = 20. +Larger values for k result in larger components in the result. + diff --git a/segmentation/segment/convolve.h b/segmentation/segment/convolve.h new file mode 100644 index 0000000..99a4870 --- /dev/null +++ b/segmentation/segment/convolve.h @@ -0,0 +1,69 @@ +/* +Copyright (C) 2006 Pedro Felzenszwalb + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +/* convolution */ + +#ifndef CONVOLVE_H +#define CONVOLVE_H + +#include +#include +#include +#include "image.h" + +/* convolve src with mask. dst is flipped! */ +static void convolve_even(image *src, image *dst, + std::vector &mask) { + int width = src->width(); + int height = src->height(); + int len = mask.size(); + + for (int y = 0; y < height; y++) { + for (int x = 0; x < width; x++) { + float sum = mask[0] * imRef(src, x, y); + for (int i = 1; i < len; i++) { + sum += mask[i] * + (imRef(src, std::max(x-i,0), y) + + imRef(src, std::min(x+i, width-1), y)); + } + imRef(dst, y, x) = sum; + } + } +} + +/* convolve src with mask. dst is flipped! */ +static void convolve_odd(image *src, image *dst, + std::vector &mask) { + int width = src->width(); + int height = src->height(); + int len = mask.size(); + + for (int y = 0; y < height; y++) { + for (int x = 0; x < width; x++) { + float sum = mask[0] * imRef(src, x, y); + for (int i = 1; i < len; i++) { + sum += mask[i] * + (imRef(src, std::max(x-i,0), y) - + imRef(src, std::min(x+i, width-1), y)); + } + imRef(dst, y, x) = sum; + } + } +} + +#endif diff --git a/segmentation/segment/disjoint-set.h b/segmentation/segment/disjoint-set.h new file mode 100644 index 0000000..061b68c --- /dev/null +++ b/segmentation/segment/disjoint-set.h @@ -0,0 +1,79 @@ +/* +Copyright (C) 2006 Pedro Felzenszwalb + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef DISJOINT_SET +#define DISJOINT_SET + +// disjoint-set forests using union-by-rank and path compression (sort of). + +typedef struct { + int rank; + int p; + int size; +} uni_elt; + +class universe { +public: + universe(int elements); + ~universe(); + int find(int x); + void join(int x, int y); + int size(int x) const { return elts[x].size; } + int num_sets() const { return num; } + +private: + uni_elt *elts; + int num; +}; + +universe::universe(int elements) { + elts = new uni_elt[elements]; + num = elements; + for (int i = 0; i < elements; i++) { + elts[i].rank = 0; + elts[i].size = 1; + elts[i].p = i; + } +} + +universe::~universe() { + delete [] elts; +} + +int universe::find(int x) { + int y = x; + while (y != elts[y].p) + y = elts[y].p; + elts[x].p = y; + return y; +} + +void universe::join(int x, int y) { + if (elts[x].rank > elts[y].rank) { + elts[y].p = x; + elts[x].size += elts[y].size; + } else { + elts[x].p = y; + elts[y].size += elts[x].size; + if (elts[x].rank == elts[y].rank) + elts[y].rank++; + } + num--; +} + +#endif diff --git a/segmentation/segment/filter.h b/segmentation/segment/filter.h new file mode 100644 index 0000000..f3c3d30 --- /dev/null +++ b/segmentation/segment/filter.h @@ -0,0 +1,100 @@ +/* +Copyright (C) 2006 Pedro Felzenszwalb + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +/* simple filters */ + +#ifndef FILTER_H +#define FILTER_H + +#include +#include +#include "image.h" +#include "misc.h" +#include "convolve.h" +#include "imconv.h" + +#define WIDTH 4.0 + +/* normalize mask so it integrates to one */ +static void normalize(std::vector &mask) { + int len = mask.size(); + float sum = 0; + for (int i = 1; i < len; i++) { + sum += fabs(mask[i]); + } + sum = 2*sum + fabs(mask[0]); + for (int i = 0; i < len; i++) { + mask[i] /= sum; + } +} + +/* make filters */ +#define MAKE_FILTER(name, fun) \ +static std::vector make_ ## name (float sigma) { \ + sigma = std::max(sigma, 0.01F); \ + int len = (int)ceil(sigma * WIDTH) + 1; \ + std::vector mask(len); \ + for (int i = 0; i < len; i++) { \ + mask[i] = fun; \ + } \ + return mask; \ +} + +MAKE_FILTER(fgauss, exp(-0.5*square(i/sigma))); + +/* convolve image with gaussian filter */ +static image *smooth(image *src, float sigma) { + std::vector mask = make_fgauss(sigma); + normalize(mask); + + image *tmp = new image(src->height(), src->width(), false); + image *dst = new image(src->width(), src->height(), false); + convolve_even(src, tmp, mask); + convolve_even(tmp, dst, mask); + + delete tmp; + return dst; +} + +/* convolve image with gaussian filter */ +image *smooth(image *src, float sigma) { + image *tmp = imageUCHARtoFLOAT(src); + image *dst = smooth(tmp, sigma); + delete tmp; + return dst; +} + +/* compute laplacian */ +static image *laplacian(image *src) { + int width = src->width(); + int height = src->height(); + image *dst = new image(width, height); + + for (int y = 1; y < height-1; y++) { + for (int x = 1; x < width-1; x++) { + float d2x = imRef(src, x-1, y) + imRef(src, x+1, y) - + 2*imRef(src, x, y); + float d2y = imRef(src, x, y-1) + imRef(src, x, y+1) - + 2*imRef(src, x, y); + imRef(dst, x, y) = d2x + d2y; + } + } + return dst; +} + +#endif diff --git a/segmentation/segment/image.h b/segmentation/segment/image.h new file mode 100644 index 0000000..ce5b217 --- /dev/null +++ b/segmentation/segment/image.h @@ -0,0 +1,101 @@ +/* +Copyright (C) 2006 Pedro Felzenszwalb + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +/* a simple image class */ + +#ifndef IMAGE_H +#define IMAGE_H + +#include + +template +class image { + public: + /* create an image */ + image(const int width, const int height, const bool init = true); + + /* delete an image */ + ~image(); + + /* init an image */ + void init(const T &val); + + /* copy an image */ + image *copy() const; + + /* get the width of an image. */ + int width() const { return w; } + + /* get the height of an image. */ + int height() const { return h; } + + /* image data. */ + T *data; + + /* row pointers. */ + T **access; + + private: + int w, h; +}; + +/* use imRef to access image data. */ +#define imRef(im, x, y) (im->access[y][x]) + +/* use imPtr to get pointer to image data. */ +#define imPtr(im, x, y) &(im->access[y][x]) + +template +image::image(const int width, const int height, const bool init) { + w = width; + h = height; + data = new T[w * h]; // allocate space for image data + access = new T*[h]; // allocate space for row pointers + + // initialize row pointers + for (int i = 0; i < h; i++) + access[i] = data + (i * w); + + if (init) + memset(data, 0, w * h * sizeof(T)); +} + +template +image::~image() { + delete [] data; + delete [] access; +} + +template +void image::init(const T &val) { + T *ptr = imPtr(this, 0, 0); + T *end = imPtr(this, w-1, h-1); + while (ptr <= end) + *ptr++ = val; +} + + +template +image *image::copy() const { + image *im = new image(w, h, false); + memcpy(im->data, data, w * h * sizeof(T)); + return im; +} + +#endif + diff --git a/segmentation/segment/imconv.h b/segmentation/segment/imconv.h new file mode 100644 index 0000000..f30be5e --- /dev/null +++ b/segmentation/segment/imconv.h @@ -0,0 +1,177 @@ +/* +Copyright (C) 2006 Pedro Felzenszwalb + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +/* image conversion */ + +#ifndef CONV_H +#define CONV_H + +#include +#include "image.h" +#include "imutil.h" +#include "misc.h" + +#define RED_WEIGHT 0.299 +#define GREEN_WEIGHT 0.587 +#define BLUE_WEIGHT 0.114 + +static image *imageRGBtoGRAY(image *input) { + int width = input->width(); + int height = input->height(); + image *output = new image(width, height, false); + + for (int y = 0; y < height; y++) { + for (int x = 0; x < width; x++) { + imRef(output, x, y) = (uchar) + (imRef(input, x, y).r * RED_WEIGHT + + imRef(input, x, y).g * GREEN_WEIGHT + + imRef(input, x, y).b * BLUE_WEIGHT); + } + } + return output; +} + +static image *imageGRAYtoRGB(image *input) { + int width = input->width(); + int height = input->height(); + image *output = new image(width, height, false); + + for (int y = 0; y < height; y++) { + for (int x = 0; x < width; x++) { + imRef(output, x, y).r = imRef(input, x, y); + imRef(output, x, y).g = imRef(input, x, y); + imRef(output, x, y).b = imRef(input, x, y); + } + } + return output; +} + +static image *imageUCHARtoFLOAT(image *input) { + int width = input->width(); + int height = input->height(); + image *output = new image(width, height, false); + + for (int y = 0; y < height; y++) { + for (int x = 0; x < width; x++) { + imRef(output, x, y) = imRef(input, x, y); + } + } + return output; +} + +static image *imageINTtoFLOAT(image *input) { + int width = input->width(); + int height = input->height(); + image *output = new image(width, height, false); + + for (int y = 0; y < height; y++) { + for (int x = 0; x < width; x++) { + imRef(output, x, y) = imRef(input, x, y); + } + } + return output; +} + +static image *imageFLOATtoUCHAR(image *input, + float min, float max) { + int width = input->width(); + int height = input->height(); + image *output = new image(width, height, false); + + if (max == min) + return output; + + float scale = UCHAR_MAX / (max - min); + for (int y = 0; y < height; y++) { + for (int x = 0; x < width; x++) { + uchar val = (uchar)((imRef(input, x, y) - min) * scale); + imRef(output, x, y) = bound(val, (uchar)0, (uchar)UCHAR_MAX); + } + } + return output; +} + +static image *imageFLOATtoUCHAR(image *input) { + float min, max; + min_max(input, &min, &max); + return imageFLOATtoUCHAR(input, min, max); +} + +static image *imageUCHARtoLONG(image *input) { + int width = input->width(); + int height = input->height(); + image *output = new image(width, height, false); + + for (int y = 0; y < height; y++) { + for (int x = 0; x < width; x++) { + imRef(output, x, y) = imRef(input, x, y); + } + } + return output; +} + +static image *imageLONGtoUCHAR(image *input, long min, long max) { + int width = input->width(); + int height = input->height(); + image *output = new image(width, height, false); + + if (max == min) + return output; + + float scale = UCHAR_MAX / (float)(max - min); + for (int y = 0; y < height; y++) { + for (int x = 0; x < width; x++) { + uchar val = (uchar)((imRef(input, x, y) - min) * scale); + imRef(output, x, y) = bound(val, (uchar)0, (uchar)UCHAR_MAX); + } + } + return output; +} + +static image *imageLONGtoUCHAR(image *input) { + long min, max; + min_max(input, &min, &max); + return imageLONGtoUCHAR(input, min, max); +} + +static image *imageSHORTtoUCHAR(image *input, + short min, short max) { + int width = input->width(); + int height = input->height(); + image *output = new image(width, height, false); + + if (max == min) + return output; + + float scale = UCHAR_MAX / (float)(max - min); + for (int y = 0; y < height; y++) { + for (int x = 0; x < width; x++) { + uchar val = (uchar)((imRef(input, x, y) - min) * scale); + imRef(output, x, y) = bound(val, (uchar)0, (uchar)UCHAR_MAX); + } + } + return output; +} + +static image *imageSHORTtoUCHAR(image *input) { + short min, max; + min_max(input, &min, &max); + return imageSHORTtoUCHAR(input, min, max); +} + +#endif diff --git a/segmentation/segment/imutil.h b/segmentation/segment/imutil.h new file mode 100644 index 0000000..f9e16fa --- /dev/null +++ b/segmentation/segment/imutil.h @@ -0,0 +1,66 @@ +/* +Copyright (C) 2006 Pedro Felzenszwalb + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +/* some image utilities */ + +#ifndef IMUTIL_H +#define IMUTIL_H + +#include "image.h" +#include "misc.h" + +/* compute minimum and maximum value in an image */ +template +void min_max(image *im, T *ret_min, T *ret_max) { + int width = im->width(); + int height = im->height(); + + T min = imRef(im, 0, 0); + T max = imRef(im, 0, 0); + for (int y = 0; y < height; y++) { + for (int x = 0; x < width; x++) { + T val = imRef(im, x, y); + if (min > val) + min = val; + if (max < val) + max = val; + } + } + + *ret_min = min; + *ret_max = max; +} + +/* threshold image */ +template +image *threshold(image *src, int t) { + int width = src->width(); + int height = src->height(); + image *dst = new image(width, height); + + for (int y = 0; y < height; y++) { + for (int x = 0; x < width; x++) { + imRef(dst, x, y) = (imRef(src, x, y) >= t); + } + } + + return dst; +} + +#endif + diff --git a/segmentation/segment/mexSegment.cpp b/segmentation/segment/mexSegment.cpp new file mode 100644 index 0000000..8620b15 --- /dev/null +++ b/segmentation/segment/mexSegment.cpp @@ -0,0 +1,67 @@ +#include +#include +#include +#include + +#include "image.h" +#include "misc.h" +#include "pnmfile.h" +#include "segment-image.h" +#include "filter.h" + +void mexFunction( int nlhs, mxArray *plhs[], int nrhs, + const mxArray *prhs[] ) +{ + float sigma, k; + int min_size; + + //r = mxGetPr( prhs[0] ); + //g = mxGetPr( prhs[1] ); + //b = mxGetPr( prhs[3] ); + double *image = mxGetPr( prhs[0] ); + const mwSize *dims = mxGetDimensions( prhs[0] ); + + sigma = mxGetScalar( prhs[1] ); + k = mxGetScalar( prhs[2] ); + min_size = mxGetScalar( prhs[3] ); + + //mexPrintf( "sigma: %.3f, k: %.3f, min_size: %d\n", sigma, k, min_size ); + + int height = dims[0]; + int width = dims[1]; + int c = dims[2]; + + typedef unsigned char uchar; + imageRGB *input = new imageRGB(width, height); + for (int y = 0; y < height; y++) + { + for (int x = 0; x < width; x++) + { + int index = height*x + y; + imRef(input, x, y).r = static_cast( image[index] ); + imRef(input, x, y).g = static_cast( image[width*height + index] ); + imRef(input, x, y).b = static_cast( image[width*height*2 + index] ); + } + } + + int num_ccs; + imageRGB *seg = segment_image(input, sigma, k, min_size, &num_ccs); + mexPrintf( "number of regions: %d\n", num_ccs ); + + plhs[0] = mxCreateNumericArray( 3, dims, mxUINT8_CLASS, mxREAL ); + uchar *output = static_cast( mxGetData(plhs[0]) ); + + for (int y = 0; y < height; y++) + { + for (int x = 0; x < width; x++) + { + int index = height*x + y; + output[index] = imRef(seg, x, y).r; + output[width*height + index] = imRef(seg, x, y).g; + output[2*width*height + index] = imRef(seg, x, y).b; + } + } + + delete input; + delete seg; +} \ No newline at end of file diff --git a/segmentation/segment/mexSegment.mexa64 b/segmentation/segment/mexSegment.mexa64 new file mode 100644 index 0000000..7e32c00 Binary files /dev/null and b/segmentation/segment/mexSegment.mexa64 differ diff --git a/segmentation/segment/mexSegment.mexw64 b/segmentation/segment/mexSegment.mexw64 new file mode 100644 index 0000000..f52f521 Binary files /dev/null and b/segmentation/segment/mexSegment.mexw64 differ diff --git a/segmentation/segment/misc.h b/segmentation/segment/misc.h new file mode 100644 index 0000000..27f6013 --- /dev/null +++ b/segmentation/segment/misc.h @@ -0,0 +1,65 @@ +/* +Copyright (C) 2006 Pedro Felzenszwalb + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +/* random stuff */ + +#ifndef MISC_H +#define MISC_H + +#include + +#ifndef M_PI +#define M_PI 3.141592653589793 +#endif + +typedef unsigned char uchar; + +typedef struct { uchar r, g, b; } rgb; + +inline bool operator==(const rgb &a, const rgb &b) { + return ((a.r == b.r) && (a.g == b.g) && (a.b == b.b)); +} + +template +inline T abs(const T &x) { return (x > 0 ? x : -x); }; + +template +inline int sign(const T &x) { return (x >= 0 ? 1 : -1); }; + +template +inline T square(const T &x) { return x*x; }; + +template +inline T bound(const T &x, const T &min, const T &max) { + return (x < min ? min : (x > max ? max : x)); +} + +template +inline bool check_bound(const T &x, const T&min, const T &max) { + return ((x < min) || (x > max)); +} + +inline int vlib_round(float x) { return (int)(x + 0.5F); } + +inline int vlib_round(double x) { return (int)(x + 0.5); } + +inline double gaussian(double val, double sigma) { + return exp(-square(val/sigma)/2)/(sqrt(2*M_PI)*sigma); +} + +#endif diff --git a/segmentation/segment/pnmfile.h b/segmentation/segment/pnmfile.h new file mode 100644 index 0000000..2f19731 --- /dev/null +++ b/segmentation/segment/pnmfile.h @@ -0,0 +1,211 @@ +/* +Copyright (C) 2006 Pedro Felzenszwalb + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +/* basic image I/O */ + +#ifndef PNM_FILE_H +#define PNM_FILE_H + +#include +#include +#include +#include +#include "image.h" +#include "misc.h" + +#define BUF_SIZE 256 + +class pnm_error { }; + +static void read_packed(unsigned char *data, int size, std::ifstream &f) { + unsigned char c = 0; + + int bitshift = -1; + for (int pos = 0; pos < size; pos++) { + if (bitshift == -1) { + c = f.get(); + bitshift = 7; + } + data[pos] = (c >> bitshift) & 1; + bitshift--; + } +} + +static void write_packed(unsigned char *data, int size, std::ofstream &f) { + unsigned char c = 0; + + int bitshift = 7; + for (int pos = 0; pos < size; pos++) { + c = c | (data[pos] << bitshift); + bitshift--; + if ((bitshift == -1) || (pos == size-1)) { + f.put(c); + bitshift = 7; + c = 0; + } + } +} + +/* read PNM field, skipping comments */ +static void pnm_read(std::ifstream &file, char *buf) { + char doc[BUF_SIZE]; + char c; + + file >> c; + while (c == '#') { + file.getline(doc, BUF_SIZE); + file >> c; + } + file.putback(c); + + file.width(BUF_SIZE); + file >> buf; + file.ignore(); +} + +static image *loadPBM(const char *name) { + char buf[BUF_SIZE]; + + /* read header */ + std::ifstream file(name, std::ios::in | std::ios::binary); + pnm_read(file, buf); + if (strncmp(buf, "P4", 2)) + throw pnm_error(); + + pnm_read(file, buf); + int width = atoi(buf); + pnm_read(file, buf); + int height = atoi(buf); + + /* read data */ + image *im = new image(width, height); + for (int i = 0; i < height; i++) + read_packed(imPtr(im, 0, i), width, file); + + return im; +} + +static void savePBM(image *im, const char *name) { + int width = im->width(); + int height = im->height(); + std::ofstream file(name, std::ios::out | std::ios::binary); + + file << "P4\n" << width << " " << height << "\n"; + for (int i = 0; i < height; i++) + write_packed(imPtr(im, 0, i), width, file); +} + +static image *loadPGM(const char *name) { + char buf[BUF_SIZE]; + + /* read header */ + std::ifstream file(name, std::ios::in | std::ios::binary); + pnm_read(file, buf); + if (strncmp(buf, "P5", 2)) + throw pnm_error(); + + pnm_read(file, buf); + int width = atoi(buf); + pnm_read(file, buf); + int height = atoi(buf); + + pnm_read(file, buf); + if (atoi(buf) > UCHAR_MAX) + throw pnm_error(); + + /* read data */ + image *im = new image(width, height); + file.read((char *)imPtr(im, 0, 0), width * height * sizeof(uchar)); + + return im; +} + +static void savePGM(image *im, const char *name) { + int width = im->width(); + int height = im->height(); + std::ofstream file(name, std::ios::out | std::ios::binary); + + file << "P5\n" << width << " " << height << "\n" << UCHAR_MAX << "\n"; + file.write((char *)imPtr(im, 0, 0), width * height * sizeof(uchar)); +} + +static image *loadPPM(const char *name) { + char buf[BUF_SIZE], doc[BUF_SIZE]; + + /* read header */ + std::ifstream file(name, std::ios::in | std::ios::binary); + pnm_read(file, buf); + if (strncmp(buf, "P6", 2)) + throw pnm_error(); + + pnm_read(file, buf); + int width = atoi(buf); + pnm_read(file, buf); + int height = atoi(buf); + + pnm_read(file, buf); + if (atoi(buf) > UCHAR_MAX) + throw pnm_error(); + + /* read data */ + image *im = new image(width, height); + file.read((char *)imPtr(im, 0, 0), width * height * sizeof(rgb)); + + return im; +} + +static void savePPM(image *im, const char *name) { + int width = im->width(); + int height = im->height(); + std::ofstream file(name, std::ios::out | std::ios::binary); + + file << "P6\n" << width << " " << height << "\n" << UCHAR_MAX << "\n"; + file.write((char *)imPtr(im, 0, 0), width * height * sizeof(rgb)); +} + +template +void load_image(image **im, const char *name) { + char buf[BUF_SIZE]; + + /* read header */ + std::ifstream file(name, std::ios::in | std::ios::binary); + pnm_read(file, buf); + if (strncmp(buf, "VLIB", 9)) + throw pnm_error(); + + pnm_read(file, buf); + int width = atoi(buf); + pnm_read(file, buf); + int height = atoi(buf); + + /* read data */ + *im = new image(width, height); + file.read((char *)imPtr((*im), 0, 0), width * height * sizeof(T)); +} + +template +void save_image(image *im, const char *name) { + int width = im->width(); + int height = im->height(); + std::ofstream file(name, std::ios::out | std::ios::binary); + + file << "VLIB\n" << width << " " << height << "\n"; + file.write((char *)imPtr(im, 0, 0), width * height * sizeof(T)); +} + +#endif diff --git a/segmentation/segment/segment-graph.h b/segmentation/segment/segment-graph.h new file mode 100644 index 0000000..0768552 --- /dev/null +++ b/segmentation/segment/segment-graph.h @@ -0,0 +1,83 @@ +/* +Copyright (C) 2006 Pedro Felzenszwalb + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef SEGMENT_GRAPH +#define SEGMENT_GRAPH + +#include +#include +#include "disjoint-set.h" + +// threshold function +#define THRESHOLD(size, c) (c/size) + +typedef struct { + float w; + int a, b; +} edge; + +bool operator<(const edge &a, const edge &b) { + return a.w < b.w; +} + +/* + * Segment a graph + * + * Returns a disjoint-set forest representing the segmentation. + * + * num_vertices: number of vertices in graph. + * num_edges: number of edges in graph + * edges: array of edges. + * c: constant for treshold function. + */ +universe *segment_graph(int num_vertices, int num_edges, edge *edges, + float c) { + // sort edges by weight + std::sort(edges, edges + num_edges); + + // make a disjoint-set forest + universe *u = new universe(num_vertices); + + // init thresholds + float *threshold = new float[num_vertices]; + for (int i = 0; i < num_vertices; i++) + threshold[i] = THRESHOLD(1,c); + + // for each edge, in non-decreasing weight order... + for (int i = 0; i < num_edges; i++) { + edge *pedge = &edges[i]; + + // components conected by this edge + int a = u->find(pedge->a); + int b = u->find(pedge->b); + if (a != b) { + if ((pedge->w <= threshold[a]) && + (pedge->w <= threshold[b])) { + u->join(a, b); + a = u->find(a); + threshold[a] = pedge->w + THRESHOLD(u->size(a), c); + } + } + } + + // free up + delete threshold; + return u; +} + +#endif diff --git a/segmentation/segment/segment-image.h b/segmentation/segment/segment-image.h new file mode 100644 index 0000000..b072abf --- /dev/null +++ b/segmentation/segment/segment-image.h @@ -0,0 +1,157 @@ +/* +Copyright (C) 2006 Pedro Felzenszwalb + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef SEGMENT_IMAGE +#define SEGMENT_IMAGE + +#include +#include "image.h" +#include "misc.h" +#include "filter.h" +#include "segment-graph.h" + +typedef image imageRGB; +typedef image imageFloat; + +// random color +rgb random_rgb(){ + rgb c; + double r; + + c.r = (uchar)rand(); + c.g = (uchar)rand(); + c.b = (uchar)rand(); + + return c; +} + +// dissimilarity measure between pixels +static inline float diff(image *r, image *g, image *b, + int x1, int y1, int x2, int y2) { + return sqrt(square(imRef(r, x1, y1)-imRef(r, x2, y2)) + + square(imRef(g, x1, y1)-imRef(g, x2, y2)) + + square(imRef(b, x1, y1)-imRef(b, x2, y2))); +} + +/* + * Segment an image + * + * Returns a color image representing the segmentation. + * + * im: image to segment. + * sigma: to smooth the image. + * c: constant for treshold function. + * min_size: minimum component size (enforced by post-processing stage). + * num_ccs: number of connected components in the segmentation. + */ +image *segment_image(image *im, float sigma, float c, int min_size, + int *num_ccs) { + int width = im->width(); + int height = im->height(); + + image *r = new image(width, height); + image *g = new image(width, height); + image *b = new image(width, height); + + // smooth each color channel + for (int y = 0; y < height; y++) { + for (int x = 0; x < width; x++) { + imRef(r, x, y) = imRef(im, x, y).r; + imRef(g, x, y) = imRef(im, x, y).g; + imRef(b, x, y) = imRef(im, x, y).b; + } + } + image *smooth_r = smooth(r, sigma); + image *smooth_g = smooth(g, sigma); + image *smooth_b = smooth(b, sigma); + delete r; + delete g; + delete b; + + // build graph + edge *edges = new edge[width*height*4]; + int num = 0; + for (int y = 0; y < height; y++) { + for (int x = 0; x < width; x++) { + if (x < width-1) { + edges[num].a = y * width + x; + edges[num].b = y * width + (x+1); + edges[num].w = diff(smooth_r, smooth_g, smooth_b, x, y, x+1, y); + num++; + } + + if (y < height-1) { + edges[num].a = y * width + x; + edges[num].b = (y+1) * width + x; + edges[num].w = diff(smooth_r, smooth_g, smooth_b, x, y, x, y+1); + num++; + } + + if ((x < width-1) && (y < height-1)) { + edges[num].a = y * width + x; + edges[num].b = (y+1) * width + (x+1); + edges[num].w = diff(smooth_r, smooth_g, smooth_b, x, y, x+1, y+1); + num++; + } + + if ((x < width-1) && (y > 0)) { + edges[num].a = y * width + x; + edges[num].b = (y-1) * width + (x+1); + edges[num].w = diff(smooth_r, smooth_g, smooth_b, x, y, x+1, y-1); + num++; + } + } + } + delete smooth_r; + delete smooth_g; + delete smooth_b; + + // segment + universe *u = segment_graph(width*height, num, edges, c); + + // post process small components + for (int i = 0; i < num; i++) { + int a = u->find(edges[i].a); + int b = u->find(edges[i].b); + if ((a != b) && ((u->size(a) < min_size) || (u->size(b) < min_size))) + u->join(a, b); + } + delete [] edges; + *num_ccs = u->num_sets(); + + image *output = new image(width, height); + + // pick random colors for each component + rgb *colors = new rgb[width*height]; + for (int i = 0; i < width*height; i++) + colors[i] = random_rgb(); + + for (int y = 0; y < height; y++) { + for (int x = 0; x < width; x++) { + int comp = u->find(y * width + x); + imRef(output, x, y) = colors[comp]; + } + } + + delete [] colors; + delete u; + + return output; +} + +#endif diff --git a/segmentation/segment/segment.cpp b/segmentation/segment/segment.cpp new file mode 100644 index 0000000..c747d0e --- /dev/null +++ b/segmentation/segment/segment.cpp @@ -0,0 +1,49 @@ +/* +Copyright (C) 2006 Pedro Felzenszwalb + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include +#include +#include +#include +#include +#include "segment-image.h" + +int main(int argc, char **argv) { + if (argc != 6) { + fprintf(stderr, "usage: %s sigma k min input(ppm) output(ppm)\n", argv[0]); + return 1; + } + + float sigma = atof(argv[1]); + float k = atof(argv[2]); + int min_size = atoi(argv[3]); + + printf("loading input image.\n"); + image *input = loadPPM(argv[4]); + + printf("processing\n"); + int num_ccs; + image *seg = segment_image(input, sigma, k, min_size, &num_ccs); + savePPM(seg, argv[5]); + + printf("got %d components\n", num_ccs); + printf("done! uff...thats hard work.\n"); + + return 0; +} + diff --git a/train/GetMergedImsegs.m b/train/GetMergedImsegs.m new file mode 100644 index 0000000..baf64d7 --- /dev/null +++ b/train/GetMergedImsegs.m @@ -0,0 +1,29 @@ +function merged_imsegs = GetMergedImsegs( imsegs, splabel ) + assert( imsegs.nseg == length(splabel) ); + trans_splabel = TransformLabelRange( splabel ); + + segimage = zeros( size(imsegs.segimage) ); + + spstats = regionprops( imsegs.segimage, 'PixelIdxList' ); + + for ix = 1 : length(spstats) + segimage(spstats(ix).PixelIdxList) = trans_splabel(ix); + end + + merged_imsegs.segimage = segimage; + merged_imsegs.nseg = length(unique(splabel)); + + merged_imsegs = APPgetSpStats( merged_imsegs ); +end + +function out_array = TransformLabelRange( in_array ) + % transform in_array to out_array, where the elements of in_array are + % in the range [1 length(in_array)] + elem = unique(in_array); + nelem = length(elem); + + out_array = in_array; + for ix = 1 : nelem + out_array(in_array == elem(ix)) = ix; + end +end \ No newline at end of file diff --git a/train/balanceData.m b/train/balanceData.m new file mode 100644 index 0000000..623138e --- /dev/null +++ b/train/balanceData.m @@ -0,0 +1,20 @@ +function [outdata outlab] = balanceData( indata, inlab, neg_lab ) + if nargin == 2 + neg_lab = 0; + end + + pos_ind = find(inlab == 1); + neg_ind = find(inlab == neg_lab); + + alpha = 1.2; + + if length(pos_ind) < length(neg_ind) + x = [indata(pos_ind,:); indata(neg_ind(1:length(pos_ind)*alpha), :)]; + y = [inlab(pos_ind); inlab(neg_ind(1:length(pos_ind)*alpha))]; + else + x = [indata(pos_ind(1:length(neg_ind)*alpha), :); indata(neg_ind, :)]; + y = [inlab(pos_ind(1:length(neg_ind)*alpha)); inlab(neg_ind)]; + end + + [outdata outlab] = randomize( x, y ); +end \ No newline at end of file diff --git a/train/calibrateBdtClassifier.m b/train/calibrateBdtClassifier.m new file mode 100644 index 0000000..301ebb3 --- /dev/null +++ b/train/calibrateBdtClassifier.m @@ -0,0 +1,82 @@ +function [eparams, errors] = calibrateBdtClassifier(data, eclassifier, lab, ncv) +% [eparams, errors] = calibrateEdgeClassifier(efeatures, adjlist, imsegs, +% eclassifier, ncv) +lab(lab == -1 ) = 0; +nfeat = size(data, 1); +for k = 1:ncv + if ncv > 1 + testind = [(k-1)*nfeat/ncv+1:k*nfeat/ncv]; + trainind = setdiff([1:nfeat], testind); + else + trainind = (1:nfeat); + end + edata{k} = data(trainind,:); + elab{k} = lab(trainind); + econf{k} = test_boosted_dt_mc(eclassifier, edata{k}); +% econf{k} = 1 ./ (1+exp(-econf{k})); +end + +for k = 1:ncv + disp(['iter: ' num2str(k)]) + if ncv>1 + traink = setdiff([1:ncv], k); + else + traink = k; + end + eparams{k} = fminunc(@(x) objective(x, cat(1, econf{traink}), cat(1, elab{traink})), [-1 0], optimset('TolFun', 0.001)); +end + +for k = 1:ncv + econf{k} = 1 ./ (1+exp(eparams{k}(1)*econf{k}+eparams{k}(2))); +end + +elab = cat(1, elab{:}); +econf = cat(1, econf{:}); + +eerror = mean((econf>0.5)~=elab); + +econf2 = 1-abs(elab-econf); + +ind1 = find(elab==0); +ind2 = find(elab==1); +px = [0.025:0.05:0.975]; +f1 = ksdensity(econf(ind1), px, 'support', [0 1]); +f2 = ksdensity(econf(ind2), px, 'support', [0 1]); +fc = ksdensity(econf2, px, 'support', [0 1]); +%fc = fc; + +errors.err = eerror; +errors.pneg = f1; +errors.ppos = f2; +errors.conf = fc; +errors.px = px; + +medFS = 18; +bigFS = 20; + +% figure(1), hold on, plot(px, fc, 'y', 'LineWidth', 2); +% %axis([0 1 0 1]) +% xlabel('Confidence in True Label', 'FontSize', medFS) +% ylabel('Frequency', 'FontSize', medFS) +% title('Same Label Confidence', 'FontSize', bigFS) +% set(gca, 'FontSize', medFS) +% +% figure(2), hold on, plot(px, f2 ./ (f1+f2), 'y', 'LineWidth', 2) +% hold on, plot(px, px, '--k') +% axis([0 1 0 1]) +% xlabel('Estimated Probability', 'FontSize', medFS) +% ylabel('Empirical Probability', 'FontSize', medFS) +% %title('Same Label Confidence', 'FontSize', bigFS) +% set(gca, 'FontSize', medFS) + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +function err = objective(param, econf, elab) +econf = 1./(1+exp(param(1)*econf+param(2))); +px = [0.025:0.05:0.975]; +f1 = ksdensity(econf(elab==0), px, 'support', [0 1+eps])+eps; +f2 = ksdensity(econf(elab==1), px, 'support', [0 1+eps])+eps; +f1 = f1 / sum(f1+f2); +f2 = f2 / sum(f1+f2); +err = sum((f1+f2).*(px - f2./(f1+f2)).^2); +disp(num2str([sum((f1+f2).*abs(px - f2./(f1+f2))) param])) \ No newline at end of file diff --git a/train/drfiGetSegmentSaliencyLabel.m b/train/drfiGetSegmentSaliencyLabel.m new file mode 100644 index 0000000..073086a --- /dev/null +++ b/train/drfiGetSegmentSaliencyLabel.m @@ -0,0 +1,34 @@ +function lab = drfiGetSegmentSaliencyLabel( gtmask, imsegs ) + % gtmask ground truth of the PIXEL_WISE annotation + % imdata generated by the getImData + % splabel the label of the superpixels, coming from multi segmentation + % segment segment = unique(splabel) + % lab {-1, 0, 1} -1 is background, 1 is the salient object, and 0 is uncertain + thresh = 0.75; + + gtmask(gtmask >= 128) = 255; + gtmask(gtmask < 128) = 0; + gtmask = double(gtmask); + + spstats = regionprops( imsegs.segimage, 'PixelIdxList' ); + + nseg = imsegs.nseg; + + lab = zeros( nseg, 1 ); + + for ix = 1 : nseg + pixels = spstats(ix).PixelIdxList; + [saliency occurence] = mode( gtmask(pixels) ); + if occurence / max(length(pixels), eps) < thresh + lab(ix) = 0; + else + if saliency == 255 + lab(ix) = 1; + elseif saliency == 0 + lab(ix) = -1; + else + error( 'error in gtmask' ); + end + end + end +end \ No newline at end of file diff --git a/train/drfiGetSuperpixelIsSameLabel.m b/train/drfiGetSuperpixelIsSameLabel.m new file mode 100644 index 0000000..7c15179 --- /dev/null +++ b/train/drfiGetSuperpixelIsSameLabel.m @@ -0,0 +1,23 @@ +function same_lab = drfiGetSuperpixelIsSameLabel( gtmask, imdata ) + % gtmask ground truth of the PIXEL_WISE annotation + % imdata generated by the getImData + + adjlist = imdata.adjlist; + + nadj = size(adjlist, 1); + spstats = imdata.spstats; + + gtmask = double(gtmask); + + same_lab = zeros(nadj, 1); + + for k = 1:nadj + s1 = adjlist(k, 1); + s2 = adjlist(k, 2); + + same_lab(k) = (mode(gtmask(spstats(s1).PixelIdxList))... + == mode(gtmask(spstats(s2).PixelIdxList)) ); + end + + same_lab(same_lab == 0) = -1; +end \ No newline at end of file diff --git a/train/drfiLearnSaliencyFusionWeight.m b/train/drfiLearnSaliencyFusionWeight.m new file mode 100644 index 0000000..ba4d5e2 --- /dev/null +++ b/train/drfiLearnSaliencyFusionWeight.m @@ -0,0 +1,134 @@ +function w = drfiLearnSaliencyFusionWeight( train_dir, gt_dir, num_segmentation, is_resize ) + % Assume that all training images are placed under train_dir. + % The saliency maps of i-th segmentation are under the folder "i" (e.g., + % saliency maps of 3rd segmentation are under the folder "3"). + % Detailed introduction on learning the saliency fusion weight can be + % found in our supplementary material. + + M = num_segmentation; + + % Resize all training images to a fixed size 200*200 + sub_dir_list = dir(fullfile(train_dir, '*')); + + ind = []; + for m = 1 : length(sub_dir_list) + if strcmp(sub_dir_list(m).name, '.') || strcmp(sub_dir_list(m).name, '..') + ind = [ind, m]; + continue; + end + end + + % Remove '.' and '..' + sub_dir_list(ind) = []; + + normh = 200; + normw = 200; + + % Resize + if is_resize + for m = 1 : length(sub_dir_list) + image_list = dir(fullfile(train_dir, sub_dir_list(m).name, '*.png')); + sub_dir_name = sub_dir_list(m).name; + + parfor n = 1 : length(image_list) + image = imread(fullfile(train_dir, sub_dir_name, image_list(n).name)); + + image = imresize(image, [normh, normw]); + + imwrite(image, fullfile(train_dir, sub_dir_name, image_list(n).name)); + +% if mod(jx, 500) == 0 +% fprintf( 'sub_dir: %s, jx: %d\n', sub_dir_name, jx ); +% end + end + + fprintf( '%d / %d\n', m, length(sub_dir_list) ); + end + end + + image_list = dir(fullfile(train_dir, sub_dir_list(end).name, '*.png')); + num_image = length(image_list); + + % prepare H and f + H = zeros(M, M); + f = zeros(M, 1); + + for ii = 1 : M * M + [m, n] = ind2sub([M, M], ii); + sub_dir_name_n = sub_dir_list(n).name; + sub_dir_name_m = sub_dir_list(m).name; + if m >= n + temp = zeros(1, num_image); + parfor k = 1 : num_image + image_name = image_list(k).name; + Akm = im2double(imread(fullfile(train_dir, sub_dir_name_m, image_name))); + Akn = im2double(imread(fullfile(train_dir, sub_dir_name_n, image_name))); + + if size(Akm, 3) > 1 + Akm = rgb2gray( Akm ); + end + + if size(Akn, 3) > 1 + Akn = rgb2gray( Akn ); + end + + Nk = 1;%size(Akm, 1) * size(Akm, 2); + temp(k) = sum(sum(Akm .* Akn)) / Nk; + % fprintf( 'ix: %d, jx: %d, n: %d\n', ix, jx, n ); + end + H(m, n) = 2 * sum( temp ); + else + H(m, n) = H(n, m); + end + + fprintf( 'Computing H, m: %d, n: %d\n', m, n ); + end + H = H / num_image; + save( 'H.mat', 'H' ); +% load( 'H.mat' ); + + for m = 1 : M + temp = zeros(1, num_image); + sub_dir_name = sub_dir_list(m).name; + parfor k = 1 : num_image + image_name = image_list(k).name; + Akm = im2double(imread(fullfile(train_dir, sub_dir_name, image_name))); + if size(Akm, 3) > 1 + Akm = rgb2gray(Akm); + end + + A = imread(fullfile(gt_dir, image_name)); + A = imresize(A, [normh, normw]); + A = im2double( A ); + if size(A, 3) > 1 + A = rgb2gray(A); + end + + A( A > 0.5 ) = 1.0; + A( A < 0.5 ) = 0; + + % f(ix) = f(ix) - 2 * sum(sum(A .* Ani)); + % temp = temp - 2 * sum(sum(A .* Ani)); + Nk = 1;%size(A, 1) * size(A, 2); + temp(k) = - 2 * sum(sum(A .* Akm)) / Nk; + end + f(m) = sum( temp ); + fprintf( 'comupting f, m: %d\n', m ); + end + f = f / num_image; + save( 'f.mat', 'f' ); + + % Solve the quadratic programming problem + w_init = ones(M, 1) / M; + + Aeq = ones(1, M); + beq = 1; + + lb = zeros(M, 1); + ub = ones(M, 1); + + opt = optimset( 'Algorithm', 'interior-point-convex' ); + w = quadprog(H, f, [], [], Aeq, beq, lb, ub, w_init, opt ); + + w( w < 1e-6 ) = 0; +end \ No newline at end of file diff --git a/train/fGetStrList.m b/train/fGetStrList.m new file mode 100644 index 0000000..8b85794 --- /dev/null +++ b/train/fGetStrList.m @@ -0,0 +1,14 @@ +function [ strList ] = fGetStrList( fileName ) + fid = fopen(fileName); + count = 0; + tline = fgetl(fid); + strList = cell(1, 10000); + while ischar(tline) + count = count+1; + strList{count} = tline; + tline = fgetl(fid); + end + fclose(fid); + strList = strList(1:count); +end + diff --git a/train/learnFusionWeight.m b/train/learnFusionWeight.m new file mode 100644 index 0000000..974a74c --- /dev/null +++ b/train/learnFusionWeight.m @@ -0,0 +1,71 @@ +clc; + +if ~exist('./trained_classifiers/seg_para.mat', 'file') + sigma = [0.8 : 0.1 : 1.0]; + k = [200, 300, 500]; + min_size = [150 200 300]; + + [ss kk] = meshgrid( sigma, k ); + seg_para = zeros( length(sigma) * length(k) * length(min_size), 3 ); + + ind = 1; + for ix = 1 : length(ss(:)) + for jx = 1 : length(min_size) + seg_para(ind, :) = [ss(ix) kk(ix) min_size(jx)]; + ind = ind + 1; + end + end + + save( './trained_classifiers/seg_para.mat', 'seg_para' ); +end + +load( './trained_classifiers/seg_para.mat', 'seg_para' ); + +if ~exist('./trained_classifiers/fusion_weight.mat', 'file') + % training folder + base_dir = 'D:\LearningSaliency\Data'; + imdir = fullfile(base_dir, 'MSRA'); + % imdir = '../../Data/MSRA'; + %image_list = dir(fullfile(imdir, '*.jpg')); + %image_list(201:end) = []; + % image_list = fGetStrList('../../Data/train.txt'); + image_list = fGetStrList(fullfile(base_dir, 'train.txt')); + % image_list = image_list(1:50); %randsample(length(image_list), 200) + + work_dir = '../../WorkingData/MSRA/'; + gt_dir = fullfile(base_dir, 'MSRA_gt'); + % gt_dir = '../../Data/MSRA'; + trn_dir = fullfile(work_dir, 'fusion'); + + if ~exist(trn_dir, 'dir') + mkdir(trn_dir); + end + + regressor = load( 'trained_classifiers\segment_saliency_regressor_200_15_rf.mat' ); + segment_saliency_regessor = regressor.segment_saliency_regressor; + + for s = 1 : size(seg_para, 1) + sigma = seg_para(s, 1); + k = seg_para(s, 2); + min_size = seg_para(s, 3); + + if ~exist(fullfile(trn_dir, num2str(s)), 'dir') + mkdir(fullfile(trn_dir, num2str(s))); + end + + for ix = 1 : length(image_list) + image_name = image_list{ix}; + image_name = [image_name(1:end-3), 'png']; + image = imread(fullfile(imdir, image_name)); + + smap = drfiGetSaliencyMapSingleLevel(image, segment_saliency_regessor, sigma, k, min_size); + + imwrite(smap, fullfile(trn_dir, num2str(s), image_name)); + end + + fprintf( 'segmentation: %d / %d\n', s, size(seg_para, 1) ); + end + + w = drfiLearnSaliencyFusionWeight(trn_dir, gt_dir, size(seg_para, 1), true); + save( './trained_classifiers/fusiong_weight.mat', 'w' ); +end \ No newline at end of file diff --git a/train/randomize.m b/train/randomize.m new file mode 100644 index 0000000..e2bfaa0 --- /dev/null +++ b/train/randomize.m @@ -0,0 +1,7 @@ +function [outdata outlab] = randomize(indata, inlab) + nsample = length(inlab); + ind = randperm( nsample ); + + outdata = indata(ind, :); + outlab = inlab(ind); +end \ No newline at end of file diff --git a/train/trainSameLabelClassifier.m b/train/trainSameLabelClassifier.m new file mode 100644 index 0000000..acafae1 --- /dev/null +++ b/train/trainSameLabelClassifier.m @@ -0,0 +1,117 @@ +clc; +% addpath(genpath('../')); + +if ~exist('trn_same_label_data.mat', 'file') + % training folder + base_dir = 'D:\LearningSaliency\Data'; + imdir = fullfile(base_dir, 'MSRA'); + % imdir = '../../Data/MSRA'; + %image_list = dir(fullfile(imdir, '*.jpg')); + %image_list(201:end) = []; + % image_list = fGetStrList('../../Data/train.txt'); + image_list = fGetStrList(fullfile(base_dir, 'train.txt')); + % image_list = image_list(1:200); %randsample(length(image_list), 200) + + work_dir = '../../WorkingData/MSRA/'; + gt_dir = fullfile(base_dir, 'MSRA_gt'); + % gt_dir = '../../Data/MSRA'; + + if ~exist(fullfile(work_dir, 'imsegs'), 'dir') + mkdir(fullfile(work_dir, 'imsegs')); + end + + if ~exist(fullfile(work_dir, 'same_label'), 'dir') + mkdir(fullfile(work_dir, 'same_label')); + end + + if ~exist(fullfile(work_dir, 'adjlist'), 'dir') + mkdir(fullfile(work_dir, 'adjlist')); + end + + NumImgs = length(image_list); + + trn_edata_cell = cell(NumImgs, 1); + trn_elab_cell = cell(NumImgs, 1); + imgData = cell(NumImgs, 1); + + parfor ix = 1 : NumImgs + image_name = image_list{ix}; + mat_name = [image_name(1:end-4), '.mat']; + + image = imread(fullfile(imdir, [image_name(1:end-3) 'png'])); + gt = imread(fullfile(gt_dir, image_name)); + + % computing features + imsegs = im2superpixels(image, 'pedro'); + imdata = drfiGetImageData(image); + spdata = drfiGetSuperpixelData(imdata, imsegs); + pbgdata = drfiGetPbgFeat(imdata); + [edgedata, imdata] = drfiGetSameLabelFeat(imsegs, spdata, pbgdata, imdata); + edgelab = drfiGetSuperpixelIsSameLabel(gt, imdata); + adjlist = imdata.adjlist; + + trn_edata_cell{ix} = edgedata; + trn_elab_cell{ix} = edgelab; + + imgData{ix}.mat_name = mat_name; + imgData{ix}.imsegs = imsegs; + imgData{ix}.edgedata = edgedata; + imgData{ix}.edgelab = edgelab; + imgData{ix}.adjlist = adjlist; + + fprintf( '%d / %d\n', ix, NumImgs); + end + + % cache + for ix = 1:NumImgs + mat_name = imgData{ix}.mat_name; + imsegs = imgData{ix}.imsegs; + edgedata = imgData{ix}.edgedata; + edgelab = imgData{ix}.edgelab; + adjlist = imgData{ix}.adjlist; + save(fullfile(work_dir, 'imsegs', mat_name), 'imsegs'); + save(fullfile(work_dir, 'same_label', mat_name), 'edgedata', 'edgelab'); + save(fullfile(work_dir, 'adjlist', mat_name), 'adjlist'); + end + + + % train the same label classifier + trn_edata = cell2mat(trn_edata_cell); + trn_elab = cell2mat(trn_elab_cell); + + save( 'trn_same_label_data.mat', 'trn_edata', 'trn_elab' ); +else + load( 'trn_same_label_data.mat' ); +end + +[trn_edata, trn_elab] = randomize(trn_edata, trn_elab); + +ind = ceil(length(trn_elab) * 0.8); +val_edata = trn_edata(ind : end, :); +val_elab = trn_elab(ind : end); +trn_edata(ind : end, :) = []; +trn_elab(ind : end) = []; + + +clf_type = 'bdt'; +if ~exist('trained_classifiers', 'dir') + mkdir('trained_classifiers'); +end + +if strcmp(clf_type, 'bdt') + same_label_classifier = train_boosted_dt_2c(trn_edata, [], trn_elab, 200, 20); + + % calibration + ecal = calibrateBdtClassifier(val_edata, same_label_classifier, val_elab, 1); + ecal = ecal{1}; + + save( './trained_classifiers/same_label_classifier_200_20_bdt.mat', 'same_label_classifier', 'ecal' ); +elseif strcmp(clf_type, 'rf') + opt.importance = 1; + regressor = regRF_train( feat, lab, 200, 12, opt ); + same_label_classifier = compressRegModel( regressor ); + save( './trained_classifiers/same_label_clf_200_12_rf.mat', 'same_label_classifier' ); +else + error( 'Not supported classifier.' ); +end + diff --git a/train/trainSegmentSaliencyRegressor.m b/train/trainSegmentSaliencyRegressor.m new file mode 100644 index 0000000..ae36678 --- /dev/null +++ b/train/trainSegmentSaliencyRegressor.m @@ -0,0 +1,120 @@ +clc; + +if ~exist('trn_segment_saliency_data.mat', 'file') + % training folder + base_dir = 'D:\LearningSaliency\Data'; + imdir = fullfile(base_dir, 'MSRA'); + % imdir = '../../Data/MSRA'; + %image_list = dir(fullfile(imdir, '*.jpg')); + %image_list(201:end) = []; + % image_list = fGetStrList('../../Data/train.txt'); + image_list = fGetStrList(fullfile(base_dir, 'train.txt')); + % image_list = image_list(1:200); %randsample(length(image_list), 200) + + work_dir = '../../WorkingData/MSRA/'; + gt_dir = fullfile(base_dir, 'MSRA_gt'); + % gt_dir = '../../Data/MSRA'; + + if ~exist(fullfile(work_dir, 'saliency'), 'dir') + mkdir(fullfile(work_dir, 'saliency')); + end + + load( './trained_classifiers/same_label_classifier_200_20_bdt.mat' ); + + trn_sal_data_cell = cell(length(image_list), 1); + trn_sal_lab_cell = cell(length(image_list), 1); + + NumImgs = length(image_list); + for ix = 1 : NumImgs + image_name = image_list{ix}; + mat_name = [image_name(1:end-4), '.mat']; + + image = imread(fullfile(imdir, [image_name(1:end-3) 'png'])); + gt = imread(fullfile(gt_dir, image_name)); + + edata = load(fullfile(work_dir, 'same_label', mat_name), 'edgedata'); + sdata = load(fullfile(work_dir, 'imsegs', mat_name), 'imsegs'); + adata = load(fullfile(work_dir, 'adjlist', mat_name), 'adjlist'); + + imsegs = sdata.imsegs; + + % generate supervised multiple segmentations + same_label_likelihood = test_boosted_dt_mc( same_label_classifier, edata.edgedata ); + same_label_likelihood = 1 ./ (1+exp(ecal(1)*same_label_likelihood+ecal(2))); + + % generate multiple segmentations + t = [5:5:35 40:10:120 150:30:600 660:60:1200 1300:100:1800];%0 : 20 : 1000; + nSuperpixel = max( imsegs.segimage(:) ); + multi_segmentations = mexMergeAdjRegs_Felzenszwalb( adata.adjlist, same_label_likelihood, nSuperpixel, t, imsegs.npixels ); + nsegment = size(multi_segmentations, 2); + + sal_data_per_image_cell = cell(nsegment, 1); + sal_lab_per_image_cell = cell(nsegment, 1); + + imdata = drfiGetImageData(image); + pbgdata = drfiGetPbgFeat(imdata); + + for s = 1 : nsegment + spLabel = multi_segmentations(:, s); + + merged_imsegs = GetMergedImsegs( imsegs, spLabel ); + + if merged_imsegs.nseg / sdata.imsegs.nseg > 0.5 % too fine + continue; + end + + spdata = drfiGetSuperpixelData(imdata, merged_imsegs); + + sal_data_one_scale = drfiGetRegionSaliencyFeature(merged_imsegs, spdata, imdata, pbgdata); + sal_lab_one_scale = drfiGetSegmentSaliencyLabel(gt, merged_imsegs); + + assert(size(sal_data_one_scale, 1) == size(sal_lab_one_scale, 1)); + + sal_data_per_image_cell{s} = sal_data_one_scale; + sal_lab_per_image_cell{s} = sal_lab_one_scale; + +% fprintf( '%d / %d, %d / %d\n', s, nsegment, ix, length(image_list) ); + end + + % cache +% save(fullfile(work_dir, 'saliency', mat_name), 'sal_data_per_image_cell', 'sal_lab_per_image_cell' ); + + sal_data = cell2mat(sal_data_per_image_cell); + sal_lab = cell2mat(sal_lab_per_image_cell); + + trn_sal_data_cell{ix} = sal_data; + trn_sal_lab_cell{ix} = sal_lab; + + fprintf( '%d / %d\n', ix, NumImgs); + end + + trn_sal_data = cell2mat(trn_sal_data_cell); + trn_sal_lab = cell2mat(trn_sal_lab_cell); + + ind = trn_sal_lab == 0; + trn_sal_lab(ind) = []; + trn_sal_data(ind, :) = []; + + ind = trn_sal_lab == -1; + trn_sal_lab(ind) = 0; + + save('trn_segment_saliency_data.mat', 'trn_sal_data', 'trn_sal_lab'); +else + load('trn_segment_saliency_data.mat'); +end + +[trn_sal_data, trn_sal_lab] = balanceData(trn_sal_data, trn_sal_lab); + +opt.importance = 0; +opt.do_trace = 1; + +num_tree = 200; +mtry = 15; + +model = regRF_train( trn_sal_data, trn_sal_lab, num_tree, 15, opt ); +segment_saliency_regressor = compressRegModel(model); +% model = regRF_train( valid_feat, valid_lab, num_tree, mtry, opt ); +% importance = model.importance; + +save( './trained_classifiers/segment_saliency_regressor_200_15_rf.mat', 'segment_saliency_regressor', '-v7.3' ); +% save( './trained_classifiers/importance.mat', 'model.importance' ); \ No newline at end of file diff --git a/trainAll.m b/trainAll.m new file mode 100644 index 0000000..89580b8 --- /dev/null +++ b/trainAll.m @@ -0,0 +1,50 @@ +clear all; +clc; + +addpath(genpath('.')) + +trainSameLabelClassifier; +trainSegmentSaliencyRegressor; + +% it is very time consuming to learn the fusiong weight +% in practice, we found the uniform weight performs well +% learnFusionWeight; + +% let's simply set the fusion weight to 1 +seg_para = [0.8000 100.0000 150.0000; + 0.8000 400.0000 300.0000; + 0.9000 200.0000 200.0000; + 0.9000 100.0000 200.0000; + 0.8000 300.0000 150.0000; + 1.0000 200.0000 150.0000; + 0.9000 300.0000 300.0000; + 1.0000 100.0000 150.0000; + 1.0000 500.0000 300.0000; + 0.9000 200.0000 300.0000; + 0.8000 600.0000 200.0000; + 1.0000 600.0000 300.0000; + 0.8000 200.0000 150.0000; + 1.0000 500.0000 200.0000; + 0.8000 400.0000 150.0000; + 1.0000 300.0000 150.0000; + 0.8000 100.0000 200.0000; + 0.8000 100.0000 300.0000; + 0.8000 200.0000 200.0000; + 0.8000 200.0000 300.0000; + 0.8000 300.0000 200.0000; + 0.8000 300.0000 300.0000; + 0.8000 400.0000 200.0000; + 0.8000 500.0000 150.0000; + 0.8000 500.0000 200.0000]; + +w = ones(size(seg_para, 1), 1); + +regressor = load( 'trained_classifiers\segment_saliency_regressor_200_15_rf.mat' ); +segment_saliency_regressor = regressor.segment_saliency_regressor; +para = seg_para; + +if ~exist('model', 'dir') + mkdir( 'model' ); +end + +save( 'model/drfiModelMatlab.mat', 'segment_saliency_regressor', 'w', 'para' ); \ No newline at end of file