diff --git a/tmva/pymva/inc/TMVA/MethodPyKeras.h b/tmva/pymva/inc/TMVA/MethodPyKeras.h index bf2c1ba4717e8..7188da5787a9d 100644 --- a/tmva/pymva/inc/TMVA/MethodPyKeras.h +++ b/tmva/pymva/inc/TMVA/MethodPyKeras.h @@ -76,14 +76,17 @@ namespace TMVA { /// Get the Keras backend (can be: TensorFlow, Theano or CNTK) EBackendType GetKerasBackend(); TString GetKerasBackendName(); + // flag to indicate we are using the Keras shipped with Tensorflow 2 + Bool_t UseTFKeras() const { return fUseTFKeras; } - private: + private: TString fFilenameModel; // Filename of the previously exported Keras model UInt_t fBatchSize {0}; // Training batch size UInt_t fNumEpochs {0}; // Number of training epochs Int_t fNumThreads {0}; // Number of CPU threads (if 0 uses default values) Int_t fVerbose; // Keras verbosity during training + Bool_t fUseTFKeras { kFALSE}; // use Keras from Tensorflow (-1, default, 0 false, 1, true) Bool_t fContinueTraining; // Load weights from previous training Bool_t fSaveBestOnly; // Store only weights with smallest validation loss Int_t fTriesEarlyStopping; // Stop training if validation loss is not decreasing for several epochs @@ -92,6 +95,7 @@ namespace TMVA { TString fNumValidationString; // option string defining the number of validation events TString fGpuOptions; // GPU options (for Tensorflow to set in session_config.gpu_options) TString fUserCodeName; // filename of an optional user script that will be executed before loading the Keras model + TString fKerasString; // string identifying keras or tf.keras bool fModelIsSetup = false; // flag whether model is loaded, needed for getMvaValue during evaluation float* fVals = nullptr; // variables array used for GetMvaValue diff --git a/tmva/pymva/src/MethodPyKeras.cxx b/tmva/pymva/src/MethodPyKeras.cxx index 0e7fdb00f8924..a0aaa14f9428e 100644 --- a/tmva/pymva/src/MethodPyKeras.cxx +++ b/tmva/pymva/src/MethodPyKeras.cxx @@ -15,6 +15,7 @@ #include "TMVA/VariableTransformBase.h" #include "TMVA/Tools.h" #include "TMVA/Timer.h" +#include "TSystem.h" using namespace TMVA; @@ -80,6 +81,7 @@ void MethodPyKeras::DeclareOptions() { DeclareOptionRef(fNumEpochs, "NumEpochs", "Number of training epochs"); DeclareOptionRef(fNumThreads, "NumThreads", "Number of CPU threads (only for Tensorflow backend)"); DeclareOptionRef(fGpuOptions, "GpuOptions", "GPU options for tensorflow, such as allow_growth"); + DeclareOptionRef(fUseTFKeras, "tf.keras", "Use tensorflow from Keras"); DeclareOptionRef(fVerbose, "Verbose", "Keras verbosity during training"); DeclareOptionRef(fContinueTraining, "ContinueTraining", "Load weights from previous training"); DeclareOptionRef(fSaveBestOnly, "SaveBestOnly", "Store only weights with smallest validation loss"); @@ -154,31 +156,115 @@ UInt_t TMVA::MethodPyKeras::GetNumValidationSamples() return nValidationSamples; } +/// Function processing the options +/// This is called only when creating the method before training not when +/// readinf from XML file. Called from MethodBase::ProcessSetup +/// that is called from Factory::BookMethod void MethodPyKeras::ProcessOptions() { + // Set default filename for trained model if option is not used if (fFilenameTrainedModel.IsNull()) { fFilenameTrainedModel = GetWeightFileDir() + "/TrainedModel_" + GetName() + ".h5"; } - // set here some specific options for Tensorflow backend - // - when using tensorflow gpu set option to allow memory growth to avoid allocating all memory - // - set up number of threads for CPU if NumThreads option was specified + // Setup model, either the initial model from `fFilenameModel` or + // the trained model from `fFilenameTrainedModel` + if (fContinueTraining) Log() << kINFO << "Continue training with trained model" << Endl; + SetupKerasModel(fContinueTraining); +} - // check first if using tensorflow backend - if (GetKerasBackend() == kTensorFlow) { - Log() << kINFO << "Using TensorFlow backend - setting special configuration options " << Endl; - PyRunString("import tensorflow as tf","Error importing tensorflow"); - PyRunString("from keras.backend import tensorflow_backend as K"); - // run these above lines also in global namespace to make them visible overall - PyRun_String("import tensorflow as tf", Py_single_input, fGlobalNS, fGlobalNS); - PyRun_String("from keras.backend import tensorflow_backend as K", Py_single_input, fGlobalNS, fGlobalNS); +void MethodPyKeras::SetupKerasModel(bool loadTrainedModel) { + + // initialize first Keras. This is done only here when class has + // all state variable set from options or read from XML file + // Import Keras + + if (fUseTFKeras) + Log() << kINFO << "Setting up tf.keras" << Endl; + else + Log() << kINFO << "Setting up keras with " << gSystem->Getenv("KERAS_BACKEND") << " backend" << Endl; + + bool useTFBackend = kFALSE; + bool kerasIsCompatible = kTRUE; + bool kerasIsPresent = kFALSE; + + if (!fUseTFKeras) { + auto ret = PyRun_String("import keras", Py_single_input, fGlobalNS, fLocalNS); + // need importing also in global namespace + if (ret != nullptr) ret = PyRun_String("import keras", Py_single_input, fGlobalNS, fGlobalNS); + if (ret != nullptr) + kerasIsPresent = kTRUE; + if (kerasIsPresent) { + // check compatibility with tensorflow + if (GetKerasBackend() == kTensorFlow ) { + useTFBackend = kTRUE; + + PyRunString("keras_major_version = int(keras.__version__.split('.')[0])"); + PyRunString("keras_minor_version = int(keras.__version__.split('.')[1])"); + PyObject *pyKerasMajorVersion = PyDict_GetItemString(fLocalNS, "keras_major_version"); + PyObject *pyKerasMinorVersion = PyDict_GetItemString(fLocalNS, "keras_minor_version"); + int kerasMajorVersion = PyLong_AsLong(pyKerasMajorVersion); + int kerasMinorVersion = PyLong_AsLong(pyKerasMinorVersion); + Log() << kINFO << "Using Keras version " << kerasMajorVersion << "." << kerasMinorVersion << Endl; + kerasIsCompatible = (kerasMajorVersion >= 2 && kerasMinorVersion >= 3); + } + } else { + // Keras is not found. try tyo use tf.keras + Log() << kINFO << "Keras is not found. Trying using tf.keras" << Endl; + fUseTFKeras = 1; + } + } + + // import Tensoprflow (if requested or because is keras backend) + if (fUseTFKeras || useTFBackend) { + auto ret = PyRun_String("import tensorflow as tf", Py_single_input, fGlobalNS, fLocalNS); + if (ret != nullptr) ret = PyRun_String("import tensorflow as tf", Py_single_input, fGlobalNS, fGlobalNS); + if (ret == nullptr) { + Log() << kFATAL << "Importing TensorFlow failed" << Endl; + } // check tensorflow version PyRunString("tf_major_version = int(tf.__version__.split('.')[0])"); PyObject *pyTfVersion = PyDict_GetItemString(fLocalNS, "tf_major_version"); int tfVersion = PyLong_AsLong(pyTfVersion); - Log() << kINFO << "Using Tensorflow version " << tfVersion << Endl; + Log() << kINFO << "Using TensorFlow version " << tfVersion << Endl; + + if (tfVersion < 2) { + if (fUseTFKeras == 1) { + Log() << kWARNING << "Using TensorFlow version 1.x which does not contain tf.keras - use then TensorFlow as Keras backend" << Endl; + fUseTFKeras = kFALSE; + // case when Keras was not found + if (!kerasIsPresent) { + Log() << kFATAL << "Keras is not present and not a suitable TensorFlow version is found " << Endl; + return; + } + } + } + else { + // using version larger than 2.0 - can use tf.keras + if (!kerasIsCompatible) { + Log() << kWARNING << "The Keras version is not compatible with TensorFlow 2. Use instead tf.keras" << Endl; + fUseTFKeras = 1; + } + } + + // if keras 2.3 and tensorflow 2 are found. Use tf.keras or keras ? + // at the moment default is tf.keras=false to keep compatibility + // but this might change in future releases + if (fUseTFKeras) { + Log() << kINFO << "Use Keras version from TensorFlow : tf.keras" << Endl; + fKerasString = "tf.keras"; + PyRunString("K = tf.keras.backend"); + PyRun_String("K = tf.keras.backend", Py_single_input, fGlobalNS, fGlobalNS); + } + else { + Log() << kINFO << "Use TensorFlow as Keras backend" << Endl; + fKerasString = "keras"; + PyRunString("from keras.backend import tensorflow_backend as K"); + PyRun_String("from keras.backend import tensorflow_backend as K", Py_single_input, fGlobalNS, fGlobalNS); + } + // extra options for tensorflow // use different naming in tf2 for ConfigProto and Session TString configProto = (tfVersion >= 2) ? "tf.compat.v1.ConfigProto" : "tf.ConfigProto"; TString session = (tfVersion >= 2) ? "tf.compat.v1.Session" : "tf.Session"; @@ -186,19 +272,19 @@ void MethodPyKeras::ProcessOptions() { // in case specify number of threads int num_threads = fNumThreads; if (num_threads > 0) { - Log() << kINFO << "Setting the CPU number of threads = " << num_threads << Endl; + Log() << kINFO << "Setting the CPU number of threads = " << num_threads << Endl; - PyRunString(TString::Format("session_conf = %s(intra_op_parallelism_threads=%d,inter_op_parallelism_threads=%d)", - configProto.Data(), num_threads,num_threads)); - } - else - PyRunString(TString::Format("session_conf = %s()",configProto.Data())); + PyRunString( + TString::Format("session_conf = %s(intra_op_parallelism_threads=%d,inter_op_parallelism_threads=%d)", + configProto.Data(), num_threads, num_threads)); + } else + PyRunString(TString::Format("session_conf = %s()", configProto.Data())); // applying GPU options such as allow_growth=True to avoid allocating all memory on GPU // that prevents running later TMVA-GPU // Also new Nvidia RTX cards (e.g. RTX 2070) require this option - if (!fGpuOptions.IsNull() ) { - TObjArray * optlist = fGpuOptions.Tokenize(","); + if (!fGpuOptions.IsNull()) { + TObjArray *optlist = fGpuOptions.Tokenize(","); for (int item = 0; item < optlist->GetEntries(); ++item) { Log() << kINFO << "Applying GPU option: gpu_options." << optlist->At(item)->GetName() << Endl; PyRunString(TString::Format("session_conf.gpu_options.%s", optlist->At(item)->GetName())); @@ -212,32 +298,28 @@ void MethodPyKeras::ProcessOptions() { PyRunString("tf.compat.v1.keras.backend.set_session(sess)"); } } + // case not using a Tensorflow backend else { + fKerasString = "keras"; if (fNumThreads > 0) - Log() << kWARNING << "Cannot set the given " << fNumThreads << " threads when not using tensorflow as backend" << Endl; - if (!fGpuOptions.IsNull() ) { - Log() << kWARNING << "Cannot set the given GPU option " << fGpuOptions << " when not using tensorflow as backend" << Endl; + Log() << kWARNING << "Cannot set the given " << fNumThreads << " threads when not using tensorflow as backend" + << Endl; + if (!fGpuOptions.IsNull()) { + Log() << kWARNING << "Cannot set the given GPU option " << fGpuOptions + << " when not using tensorflow as backend" << Endl; } } - // Setup model, either the initial model from `fFilenameModel` or - // the trained model from `fFilenameTrainedModel` - if (fContinueTraining) Log() << kINFO << "Continue training with trained model" << Endl; - SetupKerasModel(fContinueTraining); -} - -void MethodPyKeras::SetupKerasModel(bool loadTrainedModel) { /* * Load Keras model from file */ - Log() << kINFO << " Setup Keras Model " << Endl; + Log() << kINFO << " Loading Keras Model " << Endl; PyRunString("load_model_custom_objects=None"); - if (!fUserCodeName.IsNull()) { Log() << kINFO << " Executing user initialization code from " << fUserCodeName << Endl; @@ -250,9 +332,6 @@ void MethodPyKeras::SetupKerasModel(bool loadTrainedModel) { PyRunString("print('custom objects for loading model : ',load_model_custom_objects)"); } - - - // Load initial model or already trained model TString filenameLoadModel; if (loadTrainedModel) { @@ -262,10 +341,9 @@ void MethodPyKeras::SetupKerasModel(bool loadTrainedModel) { filenameLoadModel = fFilenameModel; } - Log() << kINFO << " Loading Keras Model " << Endl; + PyRunString("model = " + fKerasString + ".models.load_model('" + filenameLoadModel + + "', custom_objects=load_model_custom_objects)", "Failed to load Keras model from file: " + filenameLoadModel); - PyRunString("model = keras.models.load_model('"+filenameLoadModel+"', custom_objects=load_model_custom_objects)", - "Failed to load Keras model from file: "+filenameLoadModel); Log() << kINFO << "Loaded model from file: " << filenameLoadModel << Endl; @@ -294,6 +372,9 @@ void MethodPyKeras::SetupKerasModel(bool loadTrainedModel) { fModelIsSetup = true; } +/// Initialization function called from MethodBase::SetupMethod() +/// Note that option string are not yet filled with their values. +/// This is done before ProcessOption method or after reading from XML file void MethodPyKeras::Init() { TMVA::Internal::PyGILRAII raii; @@ -303,20 +384,15 @@ void MethodPyKeras::Init() { } _import_array(); // required to use numpy arrays - // Import Keras // NOTE: sys.argv has to be cleared because otherwise TensorFlow breaks PyRunString("import sys; sys.argv = ['']", "Set sys.argv failed"); - PyRunString("import keras", "Import Keras failed"); - // do import also in global namespace - auto ret = PyRun_String("import keras", Py_single_input, fGlobalNS, fGlobalNS); - if (!ret) - Log() << kFATAL << "Import Keras in global namespace failed " << Endl; // Set flag that model is not setup fModelIsSetup = false; } void MethodPyKeras::Train() { + if(!fModelIsSetup) Log() << kFATAL << "Model is not setup for training" << Endl; /* @@ -435,7 +511,7 @@ void MethodPyKeras::Train() { // Callback: Save only weights with smallest validation loss if (fSaveBestOnly) { - PyRunString("callbacks.append(keras.callbacks.ModelCheckpoint('"+fFilenameTrainedModel+"', monitor='val_loss', verbose=verbose, save_best_only=True, mode='auto'))", "Failed to setup training callback: SaveBestOnly"); + PyRunString("callbacks.append(" + fKerasString +".callbacks.ModelCheckpoint('"+fFilenameTrainedModel+"', monitor='val_loss', verbose=verbose, save_best_only=True, mode='auto'))", "Failed to setup training callback: SaveBestOnly"); Log() << kINFO << "Option SaveBestOnly: Only model weights with smallest validation loss will be stored" << Endl; } @@ -443,7 +519,7 @@ void MethodPyKeras::Train() { if (fTriesEarlyStopping>=0) { TString tries; tries.Form("%i", fTriesEarlyStopping); - PyRunString("callbacks.append(keras.callbacks.EarlyStopping(monitor='val_loss', patience="+tries+", verbose=verbose, mode='auto'))", "Failed to setup training callback: TriesEarlyStopping"); + PyRunString("callbacks.append(" + fKerasString + ".callbacks.EarlyStopping(monitor='val_loss', patience="+tries+", verbose=verbose, mode='auto'))", "Failed to setup training callback: TriesEarlyStopping"); Log() << kINFO << "Option TriesEarlyStopping: Training will stop after " << tries << " number of epochs with no improvement of validation loss" << Endl; } @@ -464,7 +540,7 @@ void MethodPyKeras::Train() { "Failed to setup scheduler function with string: "+fLearningRateSchedule, Py_file_input); // Setup callback - PyRunString("callbacks.append(keras.callbacks.LearningRateScheduler(schedule))", + PyRunString("callbacks.append(" + fKerasString + ".callbacks.LearningRateScheduler(schedule))", "Failed to setup training callback: LearningRateSchedule"); Log() << kINFO << "Option LearningRateSchedule: Set learning rate during training: " << fLearningRateSchedule << Endl; } @@ -473,7 +549,7 @@ void MethodPyKeras::Train() { if (fTensorBoard != "") { TString logdir = TString("'") + fTensorBoard + TString("'"); PyRunString( - "callbacks.append(keras.callbacks.TensorBoard(log_dir=" + logdir + + "callbacks.append(" + fKerasString + ".callbacks.TensorBoard(log_dir=" + logdir + ", histogram_freq=0, batch_size=batchSize, write_graph=True, write_grads=False, write_images=False))", "Failed to setup training callback: TensorBoard"); Log() << kINFO << "Option TensorBoard: Log files for training monitoring are stored in: " << logdir << Endl; @@ -691,6 +767,10 @@ void MethodPyKeras::GetHelpMessage() const { MethodPyKeras::EBackendType MethodPyKeras::GetKerasBackend() { // get the keras backend + + // in case we use tf.keras backend is tensorflow + if (UseTFKeras()) return kTensorFlow; + // check first if using tensorflow backend PyRunString("keras_backend_is_set = keras.backend.backend() == \"tensorflow\""); PyObject * keras_backend = PyDict_GetItemString(fLocalNS,"keras_backend_is_set");