Upstreamed a bunch of changes to RTBKit

rtbkit · Sep 25, 2015 · 8d3f4b8 · 8d3f4b8
1 parent a1ca988
commit 8d3f4b8
Show file tree

Hide file tree

Showing 151 changed files with 2,514 additions and 4,465 deletions.
diff --git a/jml/arch/exception.h b/jml/arch/exception.h
@@ -25,7 +25,6 @@
 
 #include <string>
 #include <exception>
-#include "jml/compiler/compiler.h"
 #include "stdarg.h"
 #include "exception_handler.h"
 
@@ -34,7 +33,7 @@ namespace ML {
 class Exception : public std::exception {
 public:
     Exception(const std::string & msg);
-    Exception(const char * msg, ...) JML_FORMAT_STRING(2, 3);
+    Exception(const char * msg, ...);
     Exception(const char * msg, va_list ap);
     Exception(int errnum, const std::string & msg, const char * function = 0);
     virtual ~Exception() throw();

diff --git a/jml/arch/exception_handler.cc b/jml/arch/exception_handler.cc
@@ -4,7 +4,6 @@
 
 */
 
-#include <sys/utsname.h>
 #include <cxxabi.h>
 #include <cstring>
 #include <fstream>
@@ -137,14 +136,6 @@ void default_exception_tracer(void * object, const std::type_info * tinfo)
         heapDemangled = char_demangle(tinfo->name());
         demangled = heapDemangled;
     }
-    const char *nodeName = "<unknown>";
-    struct utsname utsName;
-    if (::uname(&utsName) == 0) {
-        nodeName = utsName.nodename;
-    }
-    else {
-        cerr << "error calling uname\n";
-    }
     auto pid = getpid();
     auto tid = gettid();
 
@@ -154,9 +145,8 @@ void default_exception_tracer(void * object, const std::type_info * tinfo)
                          "---------------------------\n"
                          "time:   %s\n"
                          "type:   %s\n"
-                         "node:   %s\n"
                          "pid:    %d; tid: %d\n",
-                         datetime, demangled, nodeName, pid, tid);
+                         datetime, demangled, pid, tid);
     if (heapDemangled) {
         free(heapDemangled);
     }

diff --git a/jml/arch/node_exception_tracing.cc b/jml/arch/node_exception_tracing.cc
@@ -15,14 +15,12 @@ __thread BacktraceInfo * current_backtrace = nullptr;
 
 namespace {
 
-#if 0
 void cleanup_current_backtrace(void * arg)
 {
     BacktraceInfo * p = (BacktraceInfo *)arg;
     delete p;
     p = nullptr;
 }
-#endif
 
 void ensure_current_backtrace()
 {

diff --git a/jml/boosting/boosted_stumps.cc b/jml/boosting/boosted_stumps.cc
@@ -515,7 +515,6 @@ namespace {
 static const std::string BOOSTED_STUMPS_MAGIC = "BOOSTED_STUMPS";
 static const compact_size_t BOOSTED_STUMPS_VERSION = 4;
 
-#if 0
 void serialize_dist(const distribution<float> & dist,
                     DB::Store_Writer & store)
 {
@@ -558,7 +557,6 @@ void reconstitute_dist(distribution<float> & dist,
             store >> dist[i];
     }
 }
-#endif
 
 } // file scope
 

diff --git a/jml/boosting/decision_tree_generator.cc b/jml/boosting/decision_tree_generator.cc
@@ -21,7 +21,6 @@
 #include <boost/random/mersenne_twister.hpp>
 #include <boost/random/uniform_int.hpp>
 #include <boost/random/variate_generator.hpp>
-#include <mutex>
 
 
 using namespace std;
@@ -306,23 +305,6 @@ struct Tree_Accum {
 
             if (z < best_z) {
 
-                if (!isfinite(arg)) {  // will never happen
-                    static std::mutex mutex;
-                    std::unique_lock<std::mutex> guard(mutex);
-
-                    cerr << "Best arg had non-finite split" << endl;
-                    cerr << "feature = " << fs.print(feature) << endl;
-                    cerr << "info = " << fs.info(feature) << endl;
-                    cerr << "z = " << z << endl;
-                    cerr << "arg = " << arg << endl;
-
-                    cerr << "Beating previous best" << endl;
-                    cerr << "feature = " << fs.print(best_feature) << endl;
-                    cerr << "info = " << fs.info(best_feature) << endl;
-                    cerr << "z = " << best_z << endl;
-                    cerr << "arg = " << best_arg << endl;
-                }
-
                 if (tracer || print_feat)
                     tracer("tree accum", 4) << w.print() << endl;
                 // A better one.  This replaces whatever we had accumulated so
@@ -339,18 +321,6 @@ struct Tree_Accum {
 
     float add(const Feature & feature, const W & w, float arg, double missing)
     {
-        // If the decision tree generator is having a really tough time
-        // separating the classes, and it's a bucketed feature, than it
-        // may send back a -INF for arg (which means split on missing or
-        // not missing), even if there is no missing feature, due to
-        // numerical issues.  Since the decision tree can't handle a split
-        // point of -INIFINITY, we return that we don't want this split
-        // point so that it will continue looking for something better.
-
-        if (!isfinite(arg)) {
-            return Z::none;
-        }
-
         float z = calc_z.non_missing(w, missing);
         return add_z(feature, w, arg, z);
     }
@@ -904,19 +874,17 @@ train_recursive(Thread_Context & context,
     }
 
     if (split.feature() == MISSING_FEATURE) {
-        //cerr << "in_class = " << in_class << endl;
-        //cerr << "weights = " << endl;
-        //for (unsigned i = 0;  i < weights.size();  ++i)
-        //    cerr << weights[i][0] << " " << weights[i][1] << endl;
-        cerr << "in_class.total() = " << in_class.total() << endl;
-        cerr << "in_class non zero = " << (in_class != 0.0).count() << endl;
-        cerr << "in_class.size() = " << in_class.size() << endl;
-
+/*
+        cerr << "in_class = " << in_class << endl;
+        cerr << "weights = " << endl;
+        for (unsigned i = 0;  i < weights.size();  ++i)
+            cerr << weights[i][0] << " " << weights[i][1] << endl;
         cerr << "example count = " << data.example_count() << endl;
         cerr << "class_weights = " << class_weights << endl;
         cerr << "total_weight = " << total_weight << endl;
-
+ */       
         cerr << "WARNING: no feature found in decision tree split" << endl;
+        cerr << "warning : didn't print a sometimes awfully long print in decision_tree_generator.cc" << endl;
         Tree::Leaf * result = tree.new_leaf();
         *result = leaf;
         return result;

diff --git a/jml/boosting/dense_features.cc b/jml/boosting/dense_features.cc
@@ -806,7 +806,6 @@ get_sizes(Parse_Context & context,
     return boost::make_tuple(row_count, var_count, header);
 }
 
-#if 0
 boost::tuple<size_t, size_t, string>
 get_sizes(const std::string & filename,
           vector<unsigned> & row_start_ofs)
@@ -815,7 +814,6 @@ get_sizes(const std::string & filename,
 
     return get_sizes(context, row_start_ofs);
 }
-#endif
 
 } // file scope
 

diff --git a/jml/boosting/feature_info.cc b/jml/boosting/feature_info.cc
@@ -290,7 +290,7 @@ void Mutable_Feature_Info::parse(Parse_Context & context)
                 set_categorical(make_sp(new Mutable_Categorical_Info()), STRING);
             else if (context.match_literal("REAL"))
                 type_ = REAL;
-            else if (context.match_literal("INUTILE"))
+            else if (context.match_literal("UNUTILE"))
                 type_ = INUTILE;
             else context.exception("Feature_Info::parse(): unknown type");
             break;

diff --git a/jml/boosting/stump_training_core.h b/jml/boosting/stump_training_core.h
@@ -719,10 +719,10 @@ struct Stump_Trainer {
 
         double missing;
         if (!results.start(feature, w, missing)) return Z::worst;
-        float Zvalue = results.add(feature, w, 0.5, missing);
+        float Z = results.add(feature, w, 0.5, missing);
         results.finish(feature);
 
-        return Zvalue;
+        return Z;
     }
 
     /** Test a presence variable. */
@@ -751,10 +751,10 @@ struct Stump_Trainer {
         double missing;
 
         if (!results.start(feature, w, missing)) return Z::worst;
-        float Zvalue = results.add_presence(feature, w, 0.5, missing);
+        float Z = results.add_presence(feature, w, 0.5, missing);
         results.finish(feature);
 
-        return Zvalue;
+        return Z;
     }
 
     template<class Results, class Weights, class ExampleWeights>
@@ -815,14 +815,14 @@ struct Stump_Trainer {
 
         /* One candidate split point is -INF, which lets us split only based
            upon missing or not. */
-        float Zvalue = Z::worst;
+        float Z = Z::worst;
 
         if (i != 0) {
-            Zvalue = results.add(feature, w, -INFINITY, missing);
+            Z = results.add(feature, w, -INFINITY, missing);
 
             if (debug)
                 cerr << "added split " << -INFINITY << " with " << missing
-                     << " missing and score " << Zvalue << endl;
+                     << " missing and score " << Z << endl;
         }
 
         float prev = index[i].value();
@@ -853,15 +853,15 @@ struct Stump_Trainer {
             /* Add this split point. */
             float arg = prev;
             float new_Z = results.add(feature, w, arg, missing);
-            Zvalue = std::min(Zvalue, new_Z);
+            Z = std::min(Z, new_Z);
 
-            if (debug && new_Z == Zvalue) {
+            if (debug && new_Z == Z) {
                 cerr << "i = " << i << endl;
                 cerr << "added split "
                      << data.feature_space()->print(feature, arg)
                      << " with " << missing
                      << " missing and score " << new_Z
-                     << (new_Z == Zvalue ? " *** BEST ***" : "")
+                     << (new_Z == Z ? " *** BEST ***" : "")
                      << endl;
 
                 cerr << "nex = " << nex << endl;
@@ -878,7 +878,7 @@ struct Stump_Trainer {
 
         results.finish(feature);
 
-        return Zvalue;
+        return Z;
     }
 
     template<class Results, class Weights, class ExampleWeights>
@@ -953,11 +953,11 @@ struct Stump_Trainer {
         }
 
         // TODO: not missing
-        float Zvalue = Z::worst;
+        float Z = Z::worst;
 #if 0
         /* One candidate split point is -INF, which lets us split only based
            upon missing or not. */
-        float Zvalue = results.add(feature, w, -INFINITY, missing);
+        float Z = results.add(feature, w, -INFINITY, missing);
 #endif
 
         float prev = index[i].value();
@@ -1020,14 +1020,14 @@ struct Stump_Trainer {
                                prev, i1, index[i].value(), i2, dist)
                      << endl;
             }
-            Zvalue = std::min(Zvalue, new_Z);
+            Z = std::min(Z, new_Z);
 
             prev = index[i].value();
         }
 
         results.finish(feature);
 
-        return Zvalue;
+        return Z;
     }
 
     template<class Results, class Weights, class ExampleWeights>
@@ -1103,7 +1103,7 @@ struct Stump_Trainer {
 
         /* One candidate split point is -INF, which lets us split only based
            upon missing or not. */
-        float Zvalue = Z::worst;
+        float Z = Z::worst;
 
         if (missing > 0.0)
             results.add(feature, w, -INFINITY, missing);
@@ -1113,7 +1113,7 @@ struct Stump_Trainer {
             cerr << "missing = " << missing << endl;
             cerr << "nb = " << nb << endl;
             cerr << "added default split " << -INFINITY << " with "
-                 << missing << " missing and score " << Zvalue
+                 << missing << " missing and score " << Z
                  << endl;
         }
 
@@ -1151,12 +1151,12 @@ struct Stump_Trainer {
                 cerr << "i = " << i << endl;
                 cerr << "added split " << arg << " with " << missing
                      << " missing and score " << new_Z
-                     << (new_Z < Zvalue ? " *** BEST ***" : "")
+                     << (new_Z < Z ? " *** BEST ***" : "")
                      << endl;
-                if (new_Z < Zvalue) best_arg = arg;
+                if (new_Z < Z) best_arg = arg;
             }
 
-            Zvalue = std::min(Zvalue, new_Z);
+            Z = std::min(Z, new_Z);
 
         }
 
@@ -1165,7 +1165,7 @@ struct Stump_Trainer {
         }
 
         results.finish(feature);
-        return Zvalue;
+        return Z;
     }
 };
 

diff --git a/jml/boosting/training_data.cc b/jml/boosting/training_data.cc
@@ -311,13 +311,13 @@ preindex(const Feature & label, const std::vector<Feature> & features)
 void Training_Data::preindex(const Feature & label)
 {
     return;
-    throw Exception("STUB: %s", __PRETTY_FUNCTION__);
+    throw Exception("STUB", __PRETTY_FUNCTION__);
 }
 
 void Training_Data::preindex_features()
 {
     return;
-    throw Exception("STUB: %s", __PRETTY_FUNCTION__);
+    throw Exception("STUB", __PRETTY_FUNCTION__);
 }
 
 vector<std::shared_ptr<Training_Data> >

diff --git a/jml/jml-build b/jml/jml-build
diff --git a/jml/judy/judy.mk b/jml/judy/judy.mk
@@ -25,7 +25,6 @@ LIBJUDY_SOURCES := \
 
 LIBJUDY_LINK :=
 
-# gcc 4.9 compilation requirements
-$(eval $(call set_compile_option,$(LIBJUDY_SOURCES),-fno-strict-aliasing -Wno-array-bounds))
+$(eval $(call set_compile_option,$(LIBJUDY_SOURCES),-fno-strict-aliasing))
 
 $(eval $(call library,judy,$(LIBJUDY_SOURCES),$(LIBJUDY_LINK)))
diff --git a/jml/utils/exc_assert.cc b/jml/utils/exc_assert.cc
@@ -16,13 +16,19 @@ Assertion_Failure(const std::string & msg)
 {
 }
 
+Assertion_Failure::
+Assertion_Failure(const char * msg, ...)
+    : Exception(msg)
+{
+}
+
 Assertion_Failure::
 Assertion_Failure(const char * assertion,
                   const char * function,
                   const char * file,
                   int line)
-    : Exception("assertion failure: %s at %s:%d in %s",
-                assertion, file, line, function)
+    : Exception(format("assertion failure: %s at %s:%d in %s",
+                    assertion, file, line, function))
 {
 }
 

diff --git a/jml/utils/exc_assert.h b/jml/utils/exc_assert.h
@@ -17,13 +17,7 @@ namespace ML {
 
 struct Assertion_Failure: public Exception {
     Assertion_Failure(const std::string & msg);
-
-    template<typename... Args>
-    Assertion_Failure(const char * msg, Args &&... args)
-        : Exception(msg, std::forward<Args>(args)...)
-    {
-    }
-
+    Assertion_Failure(const char * msg, ...);
     Assertion_Failure(const char * assertion,
                       const char * function,
                       const char * file,