root-project · lmoneta · Oct 9, 2020 · Oct 2, 2020 · Oct 6, 2020 · Oct 8, 2020
@@ -76,14 +76,17 @@ namespace TMVA {
       /// Get the Keras backend (can be: TensorFlow, Theano or CNTK)
       EBackendType GetKerasBackend();
       TString GetKerasBackendName();
+      // flag to indicate we are using the Keras shipped with Tensorflow 2
+      Bool_t UseTFKeras() const { return fUseTFKeras; }
 
-    private:
+   private:
 
       TString fFilenameModel; // Filename of the previously exported Keras model
       UInt_t fBatchSize {0}; // Training batch size
       UInt_t fNumEpochs {0}; // Number of training epochs
       Int_t fNumThreads {0}; // Number of CPU threads (if 0 uses default values)
       Int_t fVerbose; // Keras verbosity during training
+      Bool_t fUseTFKeras { kFALSE};   // use Keras from Tensorflow (-1, default, 0 false, 1, true)
       Bool_t fContinueTraining; // Load weights from previous training
       Bool_t fSaveBestOnly; // Store only weights with smallest validation loss
       Int_t fTriesEarlyStopping; // Stop training if validation loss is not decreasing for several epochs
@@ -92,6 +95,7 @@ namespace TMVA {
       TString fNumValidationString;  // option string defining the number of validation events
       TString fGpuOptions;    // GPU options (for Tensorflow to set in session_config.gpu_options)
       TString fUserCodeName; // filename of an optional user script that will be executed before loading the Keras model
+      TString fKerasString;  // string identifying keras or tf.keras
 
       bool fModelIsSetup = false; // flag whether model is loaded, needed for getMvaValue during evaluation
       float* fVals = nullptr; // variables array used for GetMvaValue

@@ -15,6 +15,7 @@
 #include "TMVA/VariableTransformBase.h"
 #include "TMVA/Tools.h"
 #include "TMVA/Timer.h"
+#include "TSystem.h"
 
 using namespace TMVA;
 
@@ -80,6 +81,7 @@ void MethodPyKeras::DeclareOptions() {
    DeclareOptionRef(fNumEpochs, "NumEpochs", "Number of training epochs");
    DeclareOptionRef(fNumThreads, "NumThreads", "Number of CPU threads (only for Tensorflow backend)");
    DeclareOptionRef(fGpuOptions, "GpuOptions", "GPU options for tensorflow, such as allow_growth");
+   DeclareOptionRef(fUseTFKeras, "tf.keras", "Use tensorflow from Keras");
    DeclareOptionRef(fVerbose, "Verbose", "Keras verbosity during training");
    DeclareOptionRef(fContinueTraining, "ContinueTraining", "Load weights from previous training");
    DeclareOptionRef(fSaveBestOnly, "SaveBestOnly", "Store only weights with smallest validation loss");
@@ -154,51 +156,135 @@ UInt_t TMVA::MethodPyKeras::GetNumValidationSamples()
    return nValidationSamples;
 }
 
+/// Function processing the options
+/// This is called only when creating the method before training not when
+/// readinf from XML file. Called from MethodBase::ProcessSetup
+/// that is called from Factory::BookMethod
 void MethodPyKeras::ProcessOptions() {
+
    // Set default filename for trained model if option is not used
    if (fFilenameTrainedModel.IsNull()) {
       fFilenameTrainedModel = GetWeightFileDir() + "/TrainedModel_" + GetName() + ".h5";
    }
 
-   // set here some specific options for Tensorflow backend
-   //  -  when using tensorflow gpu set option to allow memory growth to avoid allocating all memory
-   //  -  set up number of threads for CPU if NumThreads option was specified
+   // Setup model, either the initial model from `fFilenameModel` or
+   // the trained model from `fFilenameTrainedModel`
+   if (fContinueTraining) Log() << kINFO << "Continue training with trained model" << Endl;
+   SetupKerasModel(fContinueTraining);
+}
 
-   // check first if using tensorflow backend
-   if (GetKerasBackend() == kTensorFlow) {
-      Log() << kINFO << "Using TensorFlow backend - setting special configuration options "  << Endl;
-      PyRunString("import tensorflow as tf","Error importing tensorflow");
-      PyRunString("from keras.backend import tensorflow_backend as K");
-      // run these above lines also in global namespace to make them visible overall
-      PyRun_String("import tensorflow as tf", Py_single_input, fGlobalNS, fGlobalNS);
-      PyRun_String("from keras.backend import tensorflow_backend as K", Py_single_input, fGlobalNS, fGlobalNS);
+void MethodPyKeras::SetupKerasModel(bool loadTrainedModel) {
+
+   // initialize first Keras. This is done only here when class has
+   // all state variable set from options or read from XML file
+   // Import Keras
+
+   if (fUseTFKeras)
+      Log() << kINFO << "Setting up tf.keras" << Endl;
+   else
+      Log() << kINFO << "Setting up keras with " << gSystem->Getenv("KERAS_BACKEND") << " backend" << Endl;
+
+   bool useTFBackend = kFALSE;
+   bool kerasIsCompatible = kTRUE;
+   bool kerasIsPresent = kFALSE;
+
+   if (!fUseTFKeras) {
+      auto ret  = PyRun_String("import keras", Py_single_input, fGlobalNS, fLocalNS);
+      // need importing also in global namespace
+      if (ret != nullptr) ret = PyRun_String("import keras", Py_single_input, fGlobalNS, fGlobalNS);
+      if (ret != nullptr)
+         kerasIsPresent = kTRUE;
+      if (kerasIsPresent) {
+         // check compatibility with tensorflow
+         if (GetKerasBackend() == kTensorFlow ) {
+            useTFBackend = kTRUE;
+
+            PyRunString("keras_major_version = int(keras.__version__.split('.')[0])");
+            PyRunString("keras_minor_version = int(keras.__version__.split('.')[1])");
+            PyObject *pyKerasMajorVersion = PyDict_GetItemString(fLocalNS, "keras_major_version");
+            PyObject *pyKerasMinorVersion = PyDict_GetItemString(fLocalNS, "keras_minor_version");
+            int kerasMajorVersion = PyLong_AsLong(pyKerasMajorVersion);
+            int kerasMinorVersion = PyLong_AsLong(pyKerasMinorVersion);
+            Log() << kINFO << "Using Keras version " << kerasMajorVersion << "." << kerasMinorVersion << Endl;
+            kerasIsCompatible = (kerasMajorVersion >= 2 && kerasMinorVersion >= 3);
 
+         }
+      } else {
+         // Keras is not found. try tyo use tf.keras
+         Log() << kINFO << "Keras is not found. Trying using tf.keras" << Endl;
+         fUseTFKeras = 1;
+      }
+   }
+
+   // import Tensoprflow (if requested or because is keras backend)
+   if (fUseTFKeras || useTFBackend) {
+      auto ret = PyRun_String("import tensorflow as tf", Py_single_input, fGlobalNS, fLocalNS);
+      if (ret != nullptr) ret = PyRun_String("import tensorflow as tf", Py_single_input, fGlobalNS, fGlobalNS);
+      if (ret == nullptr) {
+         Log() << kFATAL << "Importing TensorFlow failed" << Endl;
+      }
       // check tensorflow version
       PyRunString("tf_major_version = int(tf.__version__.split('.')[0])");
       PyObject *pyTfVersion = PyDict_GetItemString(fLocalNS, "tf_major_version");
       int tfVersion = PyLong_AsLong(pyTfVersion);
-      Log() << kINFO << "Using Tensorflow version " << tfVersion << Endl;
+      Log() << kINFO << "Using TensorFlow version " << tfVersion << Endl;
+
+      if (tfVersion < 2) {
+         if (fUseTFKeras == 1) {
+            Log() << kWARNING << "Using TensorFlow version 1.x which does not contain tf.keras - use then TensorFlow as Keras backend" << Endl;
+            fUseTFKeras = kFALSE;
+            // case when Keras was not found
+            if (!kerasIsPresent) {
+               Log() << kFATAL << "Keras is not present and not a suitable TensorFlow version is found " << Endl;
+               return;
+            }
+         }
+      }
+      else {
+         // using version larger than 2.0 - can use tf.keras
+         if (!kerasIsCompatible) {
+            Log() << kWARNING << "The Keras version is not compatible with TensorFlow 2. Use instead tf.keras" << Endl;
+            fUseTFKeras = 1;
+         }
+      }
+
+      // if keras 2.3 and tensorflow 2 are found. Use tf.keras or keras ?
+      // at the moment default is tf.keras=false to keep compatibility
+      // but this might change in future releases
+      if (fUseTFKeras) {
+         Log() << kINFO << "Use Keras version from TensorFlow : tf.keras" << Endl;
+         fKerasString = "tf.keras";
+         PyRunString("K = tf.keras.backend");
+         PyRun_String("K = tf.keras.backend", Py_single_input, fGlobalNS, fGlobalNS);
+      }
+      else {
+         Log() << kINFO << "Use TensorFlow as Keras backend" << Endl;
+         fKerasString = "keras";
+         PyRunString("from keras.backend import tensorflow_backend as K");
+         PyRun_String("from keras.backend import tensorflow_backend as K", Py_single_input, fGlobalNS, fGlobalNS);
+      }
 
+      // extra options for tensorflow
       // use different naming in tf2 for ConfigProto and Session
       TString configProto = (tfVersion >= 2) ? "tf.compat.v1.ConfigProto" : "tf.ConfigProto";
       TString session = (tfVersion >= 2) ? "tf.compat.v1.Session" : "tf.Session";
 
       // in case specify number of threads
       int num_threads = fNumThreads;
       if (num_threads > 0) {
-         Log() << kINFO << "Setting the CPU number of threads =  "  << num_threads << Endl;
+         Log() << kINFO << "Setting the CPU number of threads =  " << num_threads << Endl;
 
-         PyRunString(TString::Format("session_conf = %s(intra_op_parallelism_threads=%d,inter_op_parallelism_threads=%d)",
-                                        configProto.Data(), num_threads,num_threads));
-      }
-      else
-         PyRunString(TString::Format("session_conf = %s()",configProto.Data()));
+         PyRunString(
+            TString::Format("session_conf = %s(intra_op_parallelism_threads=%d,inter_op_parallelism_threads=%d)",
+                            configProto.Data(), num_threads, num_threads));
+      } else
+         PyRunString(TString::Format("session_conf = %s()", configProto.Data()));
 
       // applying GPU options such as allow_growth=True to avoid allocating all memory on GPU
       // that prevents running later TMVA-GPU
       // Also new Nvidia RTX cards (e.g. RTX 2070)  require this option
-      if (!fGpuOptions.IsNull() ) {
-         TObjArray * optlist = fGpuOptions.Tokenize(",");
+      if (!fGpuOptions.IsNull()) {
+         TObjArray *optlist = fGpuOptions.Tokenize(",");
          for (int item = 0; item < optlist->GetEntries(); ++item) {
             Log() << kINFO << "Applying GPU option:  gpu_options." << optlist->At(item)->GetName() << Endl;
             PyRunString(TString::Format("session_conf.gpu_options.%s", optlist->At(item)->GetName()));
@@ -212,32 +298,28 @@ void MethodPyKeras::ProcessOptions() {
          PyRunString("tf.compat.v1.keras.backend.set_session(sess)");
       }
    }
+   // case not using a Tensorflow backend
    else {
+      fKerasString = "keras";
       if (fNumThreads > 0)
-         Log() << kWARNING << "Cannot set the given " << fNumThreads << " threads when not using tensorflow as  backend"  << Endl;
-      if (!fGpuOptions.IsNull() ) {
-         Log() << kWARNING << "Cannot set the given GPU option " << fGpuOptions << " when not using tensorflow as  backend"  << Endl;
+         Log() << kWARNING << "Cannot set the given " << fNumThreads << " threads when not using tensorflow as  backend"
+               << Endl;
+      if (!fGpuOptions.IsNull()) {
+         Log() << kWARNING << "Cannot set the given GPU option " << fGpuOptions
+               << " when not using tensorflow as  backend" << Endl;
       }
    }
 
-   // Setup model, either the initial model from `fFilenameModel` or
-   // the trained model from `fFilenameTrainedModel`
-   if (fContinueTraining) Log() << kINFO << "Continue training with trained model" << Endl;
-   SetupKerasModel(fContinueTraining);
-}
-
-void MethodPyKeras::SetupKerasModel(bool loadTrainedModel) {
    /*
     * Load Keras model from file
     */
 
-   Log() << kINFO << " Setup Keras Model " << Endl;
+   Log() << kINFO << " Loading Keras Model " << Endl;
 
    PyRunString("load_model_custom_objects=None");
 
 
 
-
    if (!fUserCodeName.IsNull()) {
       Log() << kINFO << " Executing user initialization code from  " << fUserCodeName << Endl;
 
@@ -250,9 +332,6 @@ void MethodPyKeras::SetupKerasModel(bool loadTrainedModel) {
       PyRunString("print('custom objects for loading model : ',load_model_custom_objects)");
    }
 
-
-
-
    // Load initial model or already trained model
    TString filenameLoadModel;
    if (loadTrainedModel) {
@@ -262,10 +341,9 @@ void MethodPyKeras::SetupKerasModel(bool loadTrainedModel) {
       filenameLoadModel = fFilenameModel;
    }
 
-   Log() << kINFO << " Loading Keras Model " << Endl;
+   PyRunString("model = " + fKerasString + ".models.load_model('" + filenameLoadModel +
+                     "', custom_objects=load_model_custom_objects)", "Failed to load Keras model from file: " + filenameLoadModel);
 
-   PyRunString("model = keras.models.load_model('"+filenameLoadModel+"', custom_objects=load_model_custom_objects)",
-               "Failed to load Keras model from file: "+filenameLoadModel);
    Log() << kINFO << "Loaded model from file: " << filenameLoadModel << Endl;
 
 
@@ -294,6 +372,9 @@ void MethodPyKeras::SetupKerasModel(bool loadTrainedModel) {
    fModelIsSetup = true;
 }
 
+/// Initialization function called from MethodBase::SetupMethod()
+/// Note that option string are not yet filled with their values.
+/// This is done before ProcessOption method or after reading from XML file
 void MethodPyKeras::Init() {
 
    TMVA::Internal::PyGILRAII raii;
@@ -303,20 +384,15 @@ void MethodPyKeras::Init() {
    }
    _import_array(); // required to use numpy arrays
 
-   // Import Keras
    // NOTE: sys.argv has to be cleared because otherwise TensorFlow breaks
    PyRunString("import sys; sys.argv = ['']", "Set sys.argv failed");
-   PyRunString("import keras", "Import Keras failed");
-   // do import also in global namespace
-   auto ret = PyRun_String("import keras", Py_single_input, fGlobalNS, fGlobalNS);
-   if (!ret)
-      Log() << kFATAL << "Import Keras in global namespace failed " << Endl;
 
    // Set flag that model is not setup
    fModelIsSetup = false;
 }
 
 void MethodPyKeras::Train() {
+
    if(!fModelIsSetup) Log() << kFATAL << "Model is not setup for training" << Endl;
 
    /*
@@ -435,15 +511,15 @@ void MethodPyKeras::Train() {
 
    // Callback: Save only weights with smallest validation loss
    if (fSaveBestOnly) {
-      PyRunString("callbacks.append(keras.callbacks.ModelCheckpoint('"+fFilenameTrainedModel+"', monitor='val_loss', verbose=verbose, save_best_only=True, mode='auto'))", "Failed to setup training callback: SaveBestOnly");
+      PyRunString("callbacks.append(" + fKerasString +".callbacks.ModelCheckpoint('"+fFilenameTrainedModel+"', monitor='val_loss', verbose=verbose, save_best_only=True, mode='auto'))", "Failed to setup training callback: SaveBestOnly");
       Log() << kINFO << "Option SaveBestOnly: Only model weights with smallest validation loss will be stored" << Endl;
    }
 
    // Callback: Stop training early if no improvement in validation loss is observed
    if (fTriesEarlyStopping>=0) {
       TString tries;
       tries.Form("%i", fTriesEarlyStopping);
-      PyRunString("callbacks.append(keras.callbacks.EarlyStopping(monitor='val_loss', patience="+tries+", verbose=verbose, mode='auto'))", "Failed to setup training callback: TriesEarlyStopping");
+      PyRunString("callbacks.append(" + fKerasString + ".callbacks.EarlyStopping(monitor='val_loss', patience="+tries+", verbose=verbose, mode='auto'))", "Failed to setup training callback: TriesEarlyStopping");
       Log() << kINFO << "Option TriesEarlyStopping: Training will stop after " << tries << " number of epochs with no improvement of validation loss" << Endl;
    }
 
@@ -464,7 +540,7 @@ void MethodPyKeras::Train() {
                   "Failed to setup scheduler function with string: "+fLearningRateSchedule,
                   Py_file_input);
       // Setup callback
-      PyRunString("callbacks.append(keras.callbacks.LearningRateScheduler(schedule))",
+      PyRunString("callbacks.append(" + fKerasString + ".callbacks.LearningRateScheduler(schedule))",
                   "Failed to setup training callback: LearningRateSchedule");
       Log() << kINFO << "Option LearningRateSchedule: Set learning rate during training: " << fLearningRateSchedule << Endl;
    }
@@ -473,7 +549,7 @@ void MethodPyKeras::Train() {
    if (fTensorBoard != "") {
       TString logdir = TString("'") + fTensorBoard + TString("'");
       PyRunString(
-         "callbacks.append(keras.callbacks.TensorBoard(log_dir=" + logdir +
+         "callbacks.append(" + fKerasString + ".callbacks.TensorBoard(log_dir=" + logdir +
             ", histogram_freq=0, batch_size=batchSize, write_graph=True, write_grads=False, write_images=False))",
          "Failed to setup training callback: TensorBoard");
       Log() << kINFO << "Option TensorBoard: Log files for training monitoring are stored in: " << logdir << Endl;
@@ -691,6 +767,10 @@ void MethodPyKeras::GetHelpMessage() const {
 
 MethodPyKeras::EBackendType MethodPyKeras::GetKerasBackend()  {
    // get the keras backend
+
+   // in case we use tf.keras backend is tensorflow
+   if (UseTFKeras())  return kTensorFlow;
+
    // check first if using tensorflow backend
    PyRunString("keras_backend_is_set =  keras.backend.backend() == \"tensorflow\"");
    PyObject * keras_backend = PyDict_GetItemString(fLocalNS,"keras_backend_is_set");