Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[TMVA] Add support for tf.keras in MethodPyKeras #6568

Merged
merged 3 commits into from
Oct 9, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
6 changes: 5 additions & 1 deletion tmva/pymva/inc/TMVA/MethodPyKeras.h
Original file line number Diff line number Diff line change
Expand Up @@ -76,14 +76,17 @@ namespace TMVA {
/// Get the Keras backend (can be: TensorFlow, Theano or CNTK)
EBackendType GetKerasBackend();
TString GetKerasBackendName();
// flag to indicate we are using the Keras shipped with Tensorflow 2
Bool_t UseTFKeras() const { return fUseTFKeras; }

private:
private:

TString fFilenameModel; // Filename of the previously exported Keras model
UInt_t fBatchSize {0}; // Training batch size
UInt_t fNumEpochs {0}; // Number of training epochs
Int_t fNumThreads {0}; // Number of CPU threads (if 0 uses default values)
Int_t fVerbose; // Keras verbosity during training
Bool_t fUseTFKeras { kFALSE}; // use Keras from Tensorflow (-1, default, 0 false, 1, true)
stwunsch marked this conversation as resolved.
Show resolved Hide resolved
Bool_t fContinueTraining; // Load weights from previous training
Bool_t fSaveBestOnly; // Store only weights with smallest validation loss
Int_t fTriesEarlyStopping; // Stop training if validation loss is not decreasing for several epochs
Expand All @@ -92,6 +95,7 @@ namespace TMVA {
TString fNumValidationString; // option string defining the number of validation events
TString fGpuOptions; // GPU options (for Tensorflow to set in session_config.gpu_options)
TString fUserCodeName; // filename of an optional user script that will be executed before loading the Keras model
TString fKerasString; // string identifying keras or tf.keras

bool fModelIsSetup = false; // flag whether model is loaded, needed for getMvaValue during evaluation
float* fVals = nullptr; // variables array used for GetMvaValue
Expand Down
176 changes: 128 additions & 48 deletions tmva/pymva/src/MethodPyKeras.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#include "TMVA/VariableTransformBase.h"
#include "TMVA/Tools.h"
#include "TMVA/Timer.h"
#include "TSystem.h"

using namespace TMVA;

Expand Down Expand Up @@ -80,6 +81,7 @@ void MethodPyKeras::DeclareOptions() {
DeclareOptionRef(fNumEpochs, "NumEpochs", "Number of training epochs");
DeclareOptionRef(fNumThreads, "NumThreads", "Number of CPU threads (only for Tensorflow backend)");
DeclareOptionRef(fGpuOptions, "GpuOptions", "GPU options for tensorflow, such as allow_growth");
DeclareOptionRef(fUseTFKeras, "tf.keras", "Use tensorflow from Keras");
DeclareOptionRef(fVerbose, "Verbose", "Keras verbosity during training");
DeclareOptionRef(fContinueTraining, "ContinueTraining", "Load weights from previous training");
DeclareOptionRef(fSaveBestOnly, "SaveBestOnly", "Store only weights with smallest validation loss");
Expand Down Expand Up @@ -154,51 +156,135 @@ UInt_t TMVA::MethodPyKeras::GetNumValidationSamples()
return nValidationSamples;
}

/// Function processing the options
/// This is called only when creating the method before training not when
/// readinf from XML file. Called from MethodBase::ProcessSetup
/// that is called from Factory::BookMethod
void MethodPyKeras::ProcessOptions() {

// Set default filename for trained model if option is not used
if (fFilenameTrainedModel.IsNull()) {
fFilenameTrainedModel = GetWeightFileDir() + "/TrainedModel_" + GetName() + ".h5";
}

// set here some specific options for Tensorflow backend
// - when using tensorflow gpu set option to allow memory growth to avoid allocating all memory
// - set up number of threads for CPU if NumThreads option was specified
// Setup model, either the initial model from `fFilenameModel` or
// the trained model from `fFilenameTrainedModel`
if (fContinueTraining) Log() << kINFO << "Continue training with trained model" << Endl;
SetupKerasModel(fContinueTraining);
}

// check first if using tensorflow backend
if (GetKerasBackend() == kTensorFlow) {
Log() << kINFO << "Using TensorFlow backend - setting special configuration options " << Endl;
PyRunString("import tensorflow as tf","Error importing tensorflow");
PyRunString("from keras.backend import tensorflow_backend as K");
// run these above lines also in global namespace to make them visible overall
PyRun_String("import tensorflow as tf", Py_single_input, fGlobalNS, fGlobalNS);
PyRun_String("from keras.backend import tensorflow_backend as K", Py_single_input, fGlobalNS, fGlobalNS);
void MethodPyKeras::SetupKerasModel(bool loadTrainedModel) {

// initialize first Keras. This is done only here when class has
// all state variable set from options or read from XML file
// Import Keras

if (fUseTFKeras)
Log() << kINFO << "Setting up tf.keras" << Endl;
else
Log() << kINFO << "Setting up keras with " << gSystem->Getenv("KERAS_BACKEND") << " backend" << Endl;

bool useTFBackend = kFALSE;
bool kerasIsCompatible = kTRUE;
bool kerasIsPresent = kFALSE;

if (!fUseTFKeras) {
auto ret = PyRun_String("import keras", Py_single_input, fGlobalNS, fLocalNS);
// need importing also in global namespace
if (ret != nullptr) ret = PyRun_String("import keras", Py_single_input, fGlobalNS, fGlobalNS);
if (ret != nullptr)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here you have two times if(ret != nullptr), you can merge them.

kerasIsPresent = kTRUE;
if (kerasIsPresent) {
// check compatibility with tensorflow
if (GetKerasBackend() == kTensorFlow ) {
useTFBackend = kTRUE;

PyRunString("keras_major_version = int(keras.__version__.split('.')[0])");
PyRunString("keras_minor_version = int(keras.__version__.split('.')[1])");
PyObject *pyKerasMajorVersion = PyDict_GetItemString(fLocalNS, "keras_major_version");
PyObject *pyKerasMinorVersion = PyDict_GetItemString(fLocalNS, "keras_minor_version");
int kerasMajorVersion = PyLong_AsLong(pyKerasMajorVersion);
int kerasMinorVersion = PyLong_AsLong(pyKerasMinorVersion);
Log() << kINFO << "Using Keras version " << kerasMajorVersion << "." << kerasMinorVersion << Endl;
kerasIsCompatible = (kerasMajorVersion >= 2 && kerasMinorVersion >= 3);

}
} else {
// Keras is not found. try tyo use tf.keras
Log() << kINFO << "Keras is not found. Trying using tf.keras" << Endl;
fUseTFKeras = 1;
}
}

// import Tensoprflow (if requested or because is keras backend)
if (fUseTFKeras || useTFBackend) {
auto ret = PyRun_String("import tensorflow as tf", Py_single_input, fGlobalNS, fLocalNS);
if (ret != nullptr) ret = PyRun_String("import tensorflow as tf", Py_single_input, fGlobalNS, fGlobalNS);
if (ret == nullptr) {
stwunsch marked this conversation as resolved.
Show resolved Hide resolved
Log() << kFATAL << "Importing TensorFlow failed" << Endl;
}
// check tensorflow version
PyRunString("tf_major_version = int(tf.__version__.split('.')[0])");
PyObject *pyTfVersion = PyDict_GetItemString(fLocalNS, "tf_major_version");
int tfVersion = PyLong_AsLong(pyTfVersion);
Log() << kINFO << "Using Tensorflow version " << tfVersion << Endl;
Log() << kINFO << "Using TensorFlow version " << tfVersion << Endl;

if (tfVersion < 2) {
if (fUseTFKeras == 1) {
stwunsch marked this conversation as resolved.
Show resolved Hide resolved
Log() << kWARNING << "Using TensorFlow version 1.x which does not contain tf.keras - use then TensorFlow as Keras backend" << Endl;
fUseTFKeras = kFALSE;
// case when Keras was not found
if (!kerasIsPresent) {
Log() << kFATAL << "Keras is not present and not a suitable TensorFlow version is found " << Endl;
return;
}
}
}
else {
// using version larger than 2.0 - can use tf.keras
if (!kerasIsCompatible) {
Log() << kWARNING << "The Keras version is not compatible with TensorFlow 2. Use instead tf.keras" << Endl;
fUseTFKeras = 1;
}
}

// if keras 2.3 and tensorflow 2 are found. Use tf.keras or keras ?
// at the moment default is tf.keras=false to keep compatibility
// but this might change in future releases
if (fUseTFKeras) {
Log() << kINFO << "Use Keras version from TensorFlow : tf.keras" << Endl;
fKerasString = "tf.keras";
PyRunString("K = tf.keras.backend");
PyRun_String("K = tf.keras.backend", Py_single_input, fGlobalNS, fGlobalNS);
}
else {
Log() << kINFO << "Use TensorFlow as Keras backend" << Endl;
fKerasString = "keras";
PyRunString("from keras.backend import tensorflow_backend as K");
PyRun_String("from keras.backend import tensorflow_backend as K", Py_single_input, fGlobalNS, fGlobalNS);
}

// extra options for tensorflow
// use different naming in tf2 for ConfigProto and Session
TString configProto = (tfVersion >= 2) ? "tf.compat.v1.ConfigProto" : "tf.ConfigProto";
TString session = (tfVersion >= 2) ? "tf.compat.v1.Session" : "tf.Session";

// in case specify number of threads
int num_threads = fNumThreads;
if (num_threads > 0) {
Log() << kINFO << "Setting the CPU number of threads = " << num_threads << Endl;
Log() << kINFO << "Setting the CPU number of threads = " << num_threads << Endl;

PyRunString(TString::Format("session_conf = %s(intra_op_parallelism_threads=%d,inter_op_parallelism_threads=%d)",
configProto.Data(), num_threads,num_threads));
}
else
PyRunString(TString::Format("session_conf = %s()",configProto.Data()));
PyRunString(
TString::Format("session_conf = %s(intra_op_parallelism_threads=%d,inter_op_parallelism_threads=%d)",
configProto.Data(), num_threads, num_threads));
} else
PyRunString(TString::Format("session_conf = %s()", configProto.Data()));

// applying GPU options such as allow_growth=True to avoid allocating all memory on GPU
// that prevents running later TMVA-GPU
// Also new Nvidia RTX cards (e.g. RTX 2070) require this option
if (!fGpuOptions.IsNull() ) {
TObjArray * optlist = fGpuOptions.Tokenize(",");
if (!fGpuOptions.IsNull()) {
TObjArray *optlist = fGpuOptions.Tokenize(",");
for (int item = 0; item < optlist->GetEntries(); ++item) {
Log() << kINFO << "Applying GPU option: gpu_options." << optlist->At(item)->GetName() << Endl;
PyRunString(TString::Format("session_conf.gpu_options.%s", optlist->At(item)->GetName()));
Expand All @@ -212,32 +298,28 @@ void MethodPyKeras::ProcessOptions() {
PyRunString("tf.compat.v1.keras.backend.set_session(sess)");
}
}
// case not using a Tensorflow backend
else {
fKerasString = "keras";
if (fNumThreads > 0)
Log() << kWARNING << "Cannot set the given " << fNumThreads << " threads when not using tensorflow as backend" << Endl;
if (!fGpuOptions.IsNull() ) {
Log() << kWARNING << "Cannot set the given GPU option " << fGpuOptions << " when not using tensorflow as backend" << Endl;
Log() << kWARNING << "Cannot set the given " << fNumThreads << " threads when not using tensorflow as backend"
<< Endl;
if (!fGpuOptions.IsNull()) {
Log() << kWARNING << "Cannot set the given GPU option " << fGpuOptions
stwunsch marked this conversation as resolved.
Show resolved Hide resolved
<< " when not using tensorflow as backend" << Endl;
}
}

// Setup model, either the initial model from `fFilenameModel` or
// the trained model from `fFilenameTrainedModel`
if (fContinueTraining) Log() << kINFO << "Continue training with trained model" << Endl;
SetupKerasModel(fContinueTraining);
}

void MethodPyKeras::SetupKerasModel(bool loadTrainedModel) {
/*
* Load Keras model from file
*/

Log() << kINFO << " Setup Keras Model " << Endl;
Log() << kINFO << " Loading Keras Model " << Endl;

PyRunString("load_model_custom_objects=None");




if (!fUserCodeName.IsNull()) {
Log() << kINFO << " Executing user initialization code from " << fUserCodeName << Endl;

Expand All @@ -250,9 +332,6 @@ void MethodPyKeras::SetupKerasModel(bool loadTrainedModel) {
PyRunString("print('custom objects for loading model : ',load_model_custom_objects)");
}




// Load initial model or already trained model
TString filenameLoadModel;
if (loadTrainedModel) {
Expand All @@ -262,10 +341,9 @@ void MethodPyKeras::SetupKerasModel(bool loadTrainedModel) {
filenameLoadModel = fFilenameModel;
}

Log() << kINFO << " Loading Keras Model " << Endl;
PyRunString("model = " + fKerasString + ".models.load_model('" + filenameLoadModel +
"', custom_objects=load_model_custom_objects)", "Failed to load Keras model from file: " + filenameLoadModel);

PyRunString("model = keras.models.load_model('"+filenameLoadModel+"', custom_objects=load_model_custom_objects)",
"Failed to load Keras model from file: "+filenameLoadModel);
Log() << kINFO << "Loaded model from file: " << filenameLoadModel << Endl;


Expand Down Expand Up @@ -294,6 +372,9 @@ void MethodPyKeras::SetupKerasModel(bool loadTrainedModel) {
fModelIsSetup = true;
}

/// Initialization function called from MethodBase::SetupMethod()
/// Note that option string are not yet filled with their values.
/// This is done before ProcessOption method or after reading from XML file
void MethodPyKeras::Init() {

TMVA::Internal::PyGILRAII raii;
Expand All @@ -303,20 +384,15 @@ void MethodPyKeras::Init() {
}
_import_array(); // required to use numpy arrays

// Import Keras
// NOTE: sys.argv has to be cleared because otherwise TensorFlow breaks
PyRunString("import sys; sys.argv = ['']", "Set sys.argv failed");
PyRunString("import keras", "Import Keras failed");
// do import also in global namespace
auto ret = PyRun_String("import keras", Py_single_input, fGlobalNS, fGlobalNS);
if (!ret)
Log() << kFATAL << "Import Keras in global namespace failed " << Endl;

// Set flag that model is not setup
fModelIsSetup = false;
}

void MethodPyKeras::Train() {

if(!fModelIsSetup) Log() << kFATAL << "Model is not setup for training" << Endl;

/*
Expand Down Expand Up @@ -435,15 +511,15 @@ void MethodPyKeras::Train() {

// Callback: Save only weights with smallest validation loss
if (fSaveBestOnly) {
PyRunString("callbacks.append(keras.callbacks.ModelCheckpoint('"+fFilenameTrainedModel+"', monitor='val_loss', verbose=verbose, save_best_only=True, mode='auto'))", "Failed to setup training callback: SaveBestOnly");
PyRunString("callbacks.append(" + fKerasString +".callbacks.ModelCheckpoint('"+fFilenameTrainedModel+"', monitor='val_loss', verbose=verbose, save_best_only=True, mode='auto'))", "Failed to setup training callback: SaveBestOnly");
Log() << kINFO << "Option SaveBestOnly: Only model weights with smallest validation loss will be stored" << Endl;
}

// Callback: Stop training early if no improvement in validation loss is observed
if (fTriesEarlyStopping>=0) {
TString tries;
tries.Form("%i", fTriesEarlyStopping);
PyRunString("callbacks.append(keras.callbacks.EarlyStopping(monitor='val_loss', patience="+tries+", verbose=verbose, mode='auto'))", "Failed to setup training callback: TriesEarlyStopping");
PyRunString("callbacks.append(" + fKerasString + ".callbacks.EarlyStopping(monitor='val_loss', patience="+tries+", verbose=verbose, mode='auto'))", "Failed to setup training callback: TriesEarlyStopping");
Log() << kINFO << "Option TriesEarlyStopping: Training will stop after " << tries << " number of epochs with no improvement of validation loss" << Endl;
}

Expand All @@ -464,7 +540,7 @@ void MethodPyKeras::Train() {
"Failed to setup scheduler function with string: "+fLearningRateSchedule,
Py_file_input);
// Setup callback
PyRunString("callbacks.append(keras.callbacks.LearningRateScheduler(schedule))",
PyRunString("callbacks.append(" + fKerasString + ".callbacks.LearningRateScheduler(schedule))",
"Failed to setup training callback: LearningRateSchedule");
Log() << kINFO << "Option LearningRateSchedule: Set learning rate during training: " << fLearningRateSchedule << Endl;
}
Expand All @@ -473,7 +549,7 @@ void MethodPyKeras::Train() {
if (fTensorBoard != "") {
TString logdir = TString("'") + fTensorBoard + TString("'");
PyRunString(
"callbacks.append(keras.callbacks.TensorBoard(log_dir=" + logdir +
"callbacks.append(" + fKerasString + ".callbacks.TensorBoard(log_dir=" + logdir +
", histogram_freq=0, batch_size=batchSize, write_graph=True, write_grads=False, write_images=False))",
"Failed to setup training callback: TensorBoard");
Log() << kINFO << "Option TensorBoard: Log files for training monitoring are stored in: " << logdir << Endl;
Expand Down Expand Up @@ -691,6 +767,10 @@ void MethodPyKeras::GetHelpMessage() const {

MethodPyKeras::EBackendType MethodPyKeras::GetKerasBackend() {
// get the keras backend

// in case we use tf.keras backend is tensorflow
if (UseTFKeras()) return kTensorFlow;

// check first if using tensorflow backend
PyRunString("keras_backend_is_set = keras.backend.backend() == \"tensorflow\"");
PyObject * keras_backend = PyDict_GetItemString(fLocalNS,"keras_backend_is_set");
Expand Down