Skip to content

Commit

Permalink
Upstreamed a bunch of changes to RTBKit
Browse files Browse the repository at this point in the history
  • Loading branch information
Mathieu Stefani committed Sep 25, 2015
1 parent a1ca988 commit 8d3f4b8
Show file tree
Hide file tree
Showing 151 changed files with 2,514 additions and 4,465 deletions.
3 changes: 1 addition & 2 deletions jml/arch/exception.h
Expand Up @@ -25,7 +25,6 @@

#include <string>
#include <exception>
#include "jml/compiler/compiler.h"
#include "stdarg.h"
#include "exception_handler.h"

Expand All @@ -34,7 +33,7 @@ namespace ML {
class Exception : public std::exception {
public:
Exception(const std::string & msg);
Exception(const char * msg, ...) JML_FORMAT_STRING(2, 3);
Exception(const char * msg, ...);
Exception(const char * msg, va_list ap);
Exception(int errnum, const std::string & msg, const char * function = 0);
virtual ~Exception() throw();
Expand Down
12 changes: 1 addition & 11 deletions jml/arch/exception_handler.cc
Expand Up @@ -4,7 +4,6 @@
*/

#include <sys/utsname.h>
#include <cxxabi.h>
#include <cstring>
#include <fstream>
Expand Down Expand Up @@ -137,14 +136,6 @@ void default_exception_tracer(void * object, const std::type_info * tinfo)
heapDemangled = char_demangle(tinfo->name());
demangled = heapDemangled;
}
const char *nodeName = "<unknown>";
struct utsname utsName;
if (::uname(&utsName) == 0) {
nodeName = utsName.nodename;
}
else {
cerr << "error calling uname\n";
}
auto pid = getpid();
auto tid = gettid();

Expand All @@ -154,9 +145,8 @@ void default_exception_tracer(void * object, const std::type_info * tinfo)
"---------------------------\n"
"time: %s\n"
"type: %s\n"
"node: %s\n"
"pid: %d; tid: %d\n",
datetime, demangled, nodeName, pid, tid);
datetime, demangled, pid, tid);
if (heapDemangled) {
free(heapDemangled);
}
Expand Down
2 changes: 0 additions & 2 deletions jml/arch/node_exception_tracing.cc
Expand Up @@ -15,14 +15,12 @@ __thread BacktraceInfo * current_backtrace = nullptr;

namespace {

#if 0
void cleanup_current_backtrace(void * arg)
{
BacktraceInfo * p = (BacktraceInfo *)arg;
delete p;
p = nullptr;
}
#endif

void ensure_current_backtrace()
{
Expand Down
2 changes: 0 additions & 2 deletions jml/boosting/boosted_stumps.cc
Expand Up @@ -515,7 +515,6 @@ namespace {
static const std::string BOOSTED_STUMPS_MAGIC = "BOOSTED_STUMPS";
static const compact_size_t BOOSTED_STUMPS_VERSION = 4;

#if 0
void serialize_dist(const distribution<float> & dist,
DB::Store_Writer & store)
{
Expand Down Expand Up @@ -558,7 +557,6 @@ void reconstitute_dist(distribution<float> & dist,
store >> dist[i];
}
}
#endif

} // file scope

Expand Down
46 changes: 7 additions & 39 deletions jml/boosting/decision_tree_generator.cc
Expand Up @@ -21,7 +21,6 @@
#include <boost/random/mersenne_twister.hpp>
#include <boost/random/uniform_int.hpp>
#include <boost/random/variate_generator.hpp>
#include <mutex>


using namespace std;
Expand Down Expand Up @@ -306,23 +305,6 @@ struct Tree_Accum {

if (z < best_z) {

if (!isfinite(arg)) { // will never happen
static std::mutex mutex;
std::unique_lock<std::mutex> guard(mutex);

cerr << "Best arg had non-finite split" << endl;
cerr << "feature = " << fs.print(feature) << endl;
cerr << "info = " << fs.info(feature) << endl;
cerr << "z = " << z << endl;
cerr << "arg = " << arg << endl;

cerr << "Beating previous best" << endl;
cerr << "feature = " << fs.print(best_feature) << endl;
cerr << "info = " << fs.info(best_feature) << endl;
cerr << "z = " << best_z << endl;
cerr << "arg = " << best_arg << endl;
}

if (tracer || print_feat)
tracer("tree accum", 4) << w.print() << endl;
// A better one. This replaces whatever we had accumulated so
Expand All @@ -339,18 +321,6 @@ struct Tree_Accum {

float add(const Feature & feature, const W & w, float arg, double missing)
{
// If the decision tree generator is having a really tough time
// separating the classes, and it's a bucketed feature, than it
// may send back a -INF for arg (which means split on missing or
// not missing), even if there is no missing feature, due to
// numerical issues. Since the decision tree can't handle a split
// point of -INIFINITY, we return that we don't want this split
// point so that it will continue looking for something better.

if (!isfinite(arg)) {
return Z::none;
}

float z = calc_z.non_missing(w, missing);
return add_z(feature, w, arg, z);
}
Expand Down Expand Up @@ -904,19 +874,17 @@ train_recursive(Thread_Context & context,
}

if (split.feature() == MISSING_FEATURE) {
//cerr << "in_class = " << in_class << endl;
//cerr << "weights = " << endl;
//for (unsigned i = 0; i < weights.size(); ++i)
// cerr << weights[i][0] << " " << weights[i][1] << endl;
cerr << "in_class.total() = " << in_class.total() << endl;
cerr << "in_class non zero = " << (in_class != 0.0).count() << endl;
cerr << "in_class.size() = " << in_class.size() << endl;

/*
cerr << "in_class = " << in_class << endl;
cerr << "weights = " << endl;
for (unsigned i = 0; i < weights.size(); ++i)
cerr << weights[i][0] << " " << weights[i][1] << endl;
cerr << "example count = " << data.example_count() << endl;
cerr << "class_weights = " << class_weights << endl;
cerr << "total_weight = " << total_weight << endl;

*/
cerr << "WARNING: no feature found in decision tree split" << endl;
cerr << "warning : didn't print a sometimes awfully long print in decision_tree_generator.cc" << endl;
Tree::Leaf * result = tree.new_leaf();
*result = leaf;
return result;
Expand Down
2 changes: 0 additions & 2 deletions jml/boosting/dense_features.cc
Expand Up @@ -806,7 +806,6 @@ get_sizes(Parse_Context & context,
return boost::make_tuple(row_count, var_count, header);
}

#if 0
boost::tuple<size_t, size_t, string>
get_sizes(const std::string & filename,
vector<unsigned> & row_start_ofs)
Expand All @@ -815,7 +814,6 @@ get_sizes(const std::string & filename,

return get_sizes(context, row_start_ofs);
}
#endif

} // file scope

Expand Down
2 changes: 1 addition & 1 deletion jml/boosting/feature_info.cc
Expand Up @@ -290,7 +290,7 @@ void Mutable_Feature_Info::parse(Parse_Context & context)
set_categorical(make_sp(new Mutable_Categorical_Info()), STRING);
else if (context.match_literal("REAL"))
type_ = REAL;
else if (context.match_literal("INUTILE"))
else if (context.match_literal("UNUTILE"))
type_ = INUTILE;
else context.exception("Feature_Info::parse(): unknown type");
break;
Expand Down
42 changes: 21 additions & 21 deletions jml/boosting/stump_training_core.h
Expand Up @@ -719,10 +719,10 @@ struct Stump_Trainer {

double missing;
if (!results.start(feature, w, missing)) return Z::worst;
float Zvalue = results.add(feature, w, 0.5, missing);
float Z = results.add(feature, w, 0.5, missing);
results.finish(feature);

return Zvalue;
return Z;
}

/** Test a presence variable. */
Expand Down Expand Up @@ -751,10 +751,10 @@ struct Stump_Trainer {
double missing;

if (!results.start(feature, w, missing)) return Z::worst;
float Zvalue = results.add_presence(feature, w, 0.5, missing);
float Z = results.add_presence(feature, w, 0.5, missing);
results.finish(feature);

return Zvalue;
return Z;
}

template<class Results, class Weights, class ExampleWeights>
Expand Down Expand Up @@ -815,14 +815,14 @@ struct Stump_Trainer {

/* One candidate split point is -INF, which lets us split only based
upon missing or not. */
float Zvalue = Z::worst;
float Z = Z::worst;

if (i != 0) {
Zvalue = results.add(feature, w, -INFINITY, missing);
Z = results.add(feature, w, -INFINITY, missing);

if (debug)
cerr << "added split " << -INFINITY << " with " << missing
<< " missing and score " << Zvalue << endl;
<< " missing and score " << Z << endl;
}

float prev = index[i].value();
Expand Down Expand Up @@ -853,15 +853,15 @@ struct Stump_Trainer {
/* Add this split point. */
float arg = prev;
float new_Z = results.add(feature, w, arg, missing);
Zvalue = std::min(Zvalue, new_Z);
Z = std::min(Z, new_Z);

if (debug && new_Z == Zvalue) {
if (debug && new_Z == Z) {
cerr << "i = " << i << endl;
cerr << "added split "
<< data.feature_space()->print(feature, arg)
<< " with " << missing
<< " missing and score " << new_Z
<< (new_Z == Zvalue ? " *** BEST ***" : "")
<< (new_Z == Z ? " *** BEST ***" : "")
<< endl;

cerr << "nex = " << nex << endl;
Expand All @@ -878,7 +878,7 @@ struct Stump_Trainer {

results.finish(feature);

return Zvalue;
return Z;
}

template<class Results, class Weights, class ExampleWeights>
Expand Down Expand Up @@ -953,11 +953,11 @@ struct Stump_Trainer {
}

// TODO: not missing
float Zvalue = Z::worst;
float Z = Z::worst;
#if 0
/* One candidate split point is -INF, which lets us split only based
upon missing or not. */
float Zvalue = results.add(feature, w, -INFINITY, missing);
float Z = results.add(feature, w, -INFINITY, missing);
#endif

float prev = index[i].value();
Expand Down Expand Up @@ -1020,14 +1020,14 @@ struct Stump_Trainer {
prev, i1, index[i].value(), i2, dist)
<< endl;
}
Zvalue = std::min(Zvalue, new_Z);
Z = std::min(Z, new_Z);

prev = index[i].value();
}

results.finish(feature);

return Zvalue;
return Z;
}

template<class Results, class Weights, class ExampleWeights>
Expand Down Expand Up @@ -1103,7 +1103,7 @@ struct Stump_Trainer {

/* One candidate split point is -INF, which lets us split only based
upon missing or not. */
float Zvalue = Z::worst;
float Z = Z::worst;

if (missing > 0.0)
results.add(feature, w, -INFINITY, missing);
Expand All @@ -1113,7 +1113,7 @@ struct Stump_Trainer {
cerr << "missing = " << missing << endl;
cerr << "nb = " << nb << endl;
cerr << "added default split " << -INFINITY << " with "
<< missing << " missing and score " << Zvalue
<< missing << " missing and score " << Z
<< endl;
}

Expand Down Expand Up @@ -1151,12 +1151,12 @@ struct Stump_Trainer {
cerr << "i = " << i << endl;
cerr << "added split " << arg << " with " << missing
<< " missing and score " << new_Z
<< (new_Z < Zvalue ? " *** BEST ***" : "")
<< (new_Z < Z ? " *** BEST ***" : "")
<< endl;
if (new_Z < Zvalue) best_arg = arg;
if (new_Z < Z) best_arg = arg;
}

Zvalue = std::min(Zvalue, new_Z);
Z = std::min(Z, new_Z);

}

Expand All @@ -1165,7 +1165,7 @@ struct Stump_Trainer {
}

results.finish(feature);
return Zvalue;
return Z;
}
};

Expand Down
4 changes: 2 additions & 2 deletions jml/boosting/training_data.cc
Expand Up @@ -311,13 +311,13 @@ preindex(const Feature & label, const std::vector<Feature> & features)
void Training_Data::preindex(const Feature & label)
{
return;
throw Exception("STUB: %s", __PRETTY_FUNCTION__);
throw Exception("STUB", __PRETTY_FUNCTION__);
}

void Training_Data::preindex_features()
{
return;
throw Exception("STUB: %s", __PRETTY_FUNCTION__);
throw Exception("STUB", __PRETTY_FUNCTION__);
}

vector<std::shared_ptr<Training_Data> >
Expand Down
1 change: 0 additions & 1 deletion jml/jml-build
Submodule jml-build deleted from 6b111d
3 changes: 1 addition & 2 deletions jml/judy/judy.mk
Expand Up @@ -25,7 +25,6 @@ LIBJUDY_SOURCES := \

LIBJUDY_LINK :=

# gcc 4.9 compilation requirements
$(eval $(call set_compile_option,$(LIBJUDY_SOURCES),-fno-strict-aliasing -Wno-array-bounds))
$(eval $(call set_compile_option,$(LIBJUDY_SOURCES),-fno-strict-aliasing))

$(eval $(call library,judy,$(LIBJUDY_SOURCES),$(LIBJUDY_LINK)))
10 changes: 8 additions & 2 deletions jml/utils/exc_assert.cc
Expand Up @@ -16,13 +16,19 @@ Assertion_Failure(const std::string & msg)
{
}

Assertion_Failure::
Assertion_Failure(const char * msg, ...)
: Exception(msg)
{
}

Assertion_Failure::
Assertion_Failure(const char * assertion,
const char * function,
const char * file,
int line)
: Exception("assertion failure: %s at %s:%d in %s",
assertion, file, line, function)
: Exception(format("assertion failure: %s at %s:%d in %s",
assertion, file, line, function))
{
}

Expand Down
8 changes: 1 addition & 7 deletions jml/utils/exc_assert.h
Expand Up @@ -17,13 +17,7 @@ namespace ML {

struct Assertion_Failure: public Exception {
Assertion_Failure(const std::string & msg);

template<typename... Args>
Assertion_Failure(const char * msg, Args &&... args)
: Exception(msg, std::forward<Args>(args)...)
{
}

Assertion_Failure(const char * msg, ...);
Assertion_Failure(const char * assertion,
const char * function,
const char * file,
Expand Down

0 comments on commit 8d3f4b8

Please sign in to comment.