Getting error when using Tensorboard #128

NamTran838P · 2020-04-12T21:43:55Z

I am trying to train a seq2seq-attention model using 2 GPUs and would like to use Tensorboard for visualization. I followed the nmt-keras tutorial for Tensorboard but still got an error. Here is the code that I used:

def start_training(use_gpu):

    ds = Dataset('tutorial_dataset', 'tutorial', silence=False)
    PATH = ""
    ds.setOutput(PATH + "train_correct.txt",
                 'train',
                 type='text',
                 id='target_text',
                 tokenization='tokenize_basic',
                 build_vocabulary=True,
                 pad_on_batch=True,
                 sample_weights=True,
                 max_text_len=100,
                 max_words=50000,
                 min_occ=1)

    ds.setOutput(PATH + "validation_correct.txt",
                 'val',
                 type='text',
                 id='target_text',
                 pad_on_batch=True,
                 tokenization='tokenize_basic',
                 sample_weights=True,
                 max_text_len=100,
                 max_words=0)

    ds.setInput(PATH + "train_error.txt",
                'train',
                type='text',
                id='source_text',
                pad_on_batch=True,
                tokenization='tokenize_basic',
                build_vocabulary=True,
                fill='end',
                max_text_len=100,
                max_words=50000,
                min_occ=1)

    ds.setInput(PATH + "validation_error.txt",
                'val',
                type='text',
                id='source_text',
                pad_on_batch=True,
                tokenization='tokenize_basic',
                fill='end',
                max_text_len=100,
                min_occ=1)

    """...and for the 'state_below' data. Note that: 1) The offset flat is set to 1, which means that the text will be shifted to the right 1 position. 2) During sampling time, we won't have this input. Hence, we 'hack' the dataset model by inserting an artificial input, of type 'ghost' for the validation split."""

    ds.setInput(PATH + "train_correct.txt",
                'train',
                type='text',
                id='state_below',
                required=False,
                tokenization='tokenize_basic',
                pad_on_batch=True,
                build_vocabulary='target_text',
                offset=1,
                fill='end',
                max_text_len=100,
                max_words=50000)
    ds.setInput(None,
                'val',
                type='ghost',
                id='state_below',
                required=False)

    """We can also keep the literal source words (for replacing unknown words)."""

    for split, input_text_filename in zip(['train', 'val'], [PATH + "train_error.txt", PATH + "validation_error.txt"]):
        ds.setRawInput(input_text_filename,
                      split,
                      type='file-name',
                      id='raw_source_text',
                      overwrite_split=True)

    """We also need to match the references with the inputs. Since we only have one reference per input sample, we set `repeat=1`."""

    keep_n_captions(ds, repeat=1, n=1, set_names=['val'])

    """Finally, we can save our dataset instance for using in other experiments:"""

    saveDataset(ds, PATH + "dataset")

    """## 2. Creating and training a Neural Translation Model
    Now, we'll create and train a Neural Machine Translation (NMT) model. Since there is a significant number of hyperparameters, we'll use the default ones, specified in the `config.py` file. Note that almost every hardcoded parameter is automatically set from config if we run  `main.py `.

    We'll create an `'AttentionRNNEncoderDecoder'` (a LSTM encoder-decoder with attention mechanism). Refer to the [`model_zoo.py`](https://github.com/lvapeab/nmt-keras/blob/master/nmt_keras/model_zoo.py) file for other models (e.g. Transformer). 

    So first, let's import the model and the hyperparameters. We'll also load the dataset we stored in the previous section (not necessary as it is in memory, but as a demonstration):
    """

    params = load_parameters()
    dataset = loadDataset(PATH + "dataset/Dataset_tutorial_dataset.pkl")

    """Since the number of words in the dataset may be unknown beforehand, we must update the params information according to the dataset instance:"""

    params['INPUT_VOCABULARY_SIZE'] = dataset.vocabulary_len['source_text']
    params['OUTPUT_VOCABULARY_SIZE'] = dataset.vocabulary_len['target_text']
    params['USE_CUDNN'] = use_gpu
    params['EARLY_STOP'] = True
    params['PATIENCE'] = 10
    params['SAVE_EACH_EVALUATION'] = True
    params['STORE_PATH'] = PATH + "model/"
    params['ATTENTION_MODE'] = "add"
    params['N_LAYERS_ENCODER'] = 2
    params['N_LAYERS_DECODER'] = 2
    params['SOURCE_TEXT_EMBEDDING_SIZE'] = 128
    params['TARGET_TEXT_EMBEDDING_SIZE'] = 128
    params['SKIP_VECTORS_HIDDEN_SIZE'] = 128
    params['ATTENTION_SIZE'] = 128
    params['ENCODER_HIDDEN_SIZE'] = 128
    params['DECODER_HIDDEN_SIZE'] = 128
    params['ENCODER_RNN_TYPE'] = "GRU"
    params['DECODER_RNN_TYPE'] = "ConditionalGRU"
    params['METRICS'] = ['sacrebleu']
    params['STOP_METRIC'] = 'sacrebleu'
    params['APPLY_DETOKENIZATION'] = True
    params['LENGTH_PENALTY'] = True
    params['LENGTH_NORM_FACTOR'] = 1.0
    params['TENSORBOARD'] = True
    params['LOG_DIR'] = 'tensorboard_logs'
    params['EMBEDDING_FREQ'] = 1
    params['WRITE_GRAPH'] = True
    params['WRITE_GRADS'] = True
    params['WRITE_IMAGES'] = True
    params['EMBEDDING_LAYER_NAMES'] = ["source_word_embedding", "target_word_embedding"]
    params['LABEL_WORD_EMBEDDINGS_WITH_VOCAB'] = True
    params['WORD_EMBEDDINGS_LABELS'] = ['source_text', 'target_text']
    nmt_model = TranslationModel(params,
                                model_type='AttentionRNNEncoderDecoder', 
                                 model_name='tutorial_model',
                                vocabularies=dataset.vocabulary,
                                store_path=params['STORE_PATH'],
                               verbose=True)
                                 
    inputMapping = dict()
    for i, id_in in enumerate(params['INPUTS_IDS_DATASET']):
        pos_source = dataset.ids_inputs.index(id_in)
        id_dest = nmt_model.ids_inputs[i]
        inputMapping[id_dest] = pos_source
    nmt_model.setInputsMapping(inputMapping)

    outputMapping = dict()
    for i, id_out in enumerate(params['OUTPUTS_IDS_DATASET']):
        pos_target = dataset.ids_outputs.index(id_out)
        id_dest = nmt_model.ids_outputs[i]
        outputMapping[id_dest] = pos_target
    nmt_model.setOutputsMapping(outputMapping)

    """We can add some callbacks for controlling the training (e.g. Sampling each N updates, early stop, learning rate annealing...). For instance, let's build a sampling callback. After each epoch, it will compute the BLEU scores on the development set using the sacreBLEU package. We need to pass some configuration variables to the callback (in the extra_vars dictionary):"""

    search_params = {
        'language': 'en',
        'tokenize_f': eval('dataset.' + 'tokenize_basic'),
        'beam_size': 1,
        'optimized_search': True,
        'n_gpus' : 2,
        'model_inputs': params['INPUTS_IDS_MODEL'],
        'model_outputs': params['OUTPUTS_IDS_MODEL'],
        'dataset_inputs':  params['INPUTS_IDS_DATASET'],
        'dataset_outputs':  params['OUTPUTS_IDS_DATASET'],
        'n_parallel_loaders': 1,
        'maxlen': 100,
        'model_inputs': ['source_text', 'state_below'],
        'model_outputs': ['target_text'],
        'dataset_inputs': ['source_text', 'state_below'],
        'dataset_outputs': ['target_text'],
        'normalize': True,
        'pos_unk': True,
        'heuristic': 0,
        'state_below_maxlen': 1,
        'val': {'references': dataset.extra_variables['val']['target_text']}
      }

    vocab = dataset.vocabulary['target_text']['idx2words']
    callbacks = []
    input_text_id = params['INPUTS_IDS_DATASET'][0]

    callbacks.append(PrintPerformanceMetricOnEpochEndOrEachNUpdates(nmt_model,
                                                                    dataset,
                                                                    gt_id='target_text',
                                                                    metric_name=['sacrebleu'],
                                                                    set_name=['val'],
                                                                    batch_size=256,
                                                                    each_n_epochs=1,
                                                                    extra_vars=search_params,
                                                                    reload_epoch=0,
                                                                    is_text=True,
                                                                    input_text_id=input_text_id,
                                                                    index2word_y=vocab,
                                                                    sampling_type='max_likelihood',
                                                                    beam_search=True,
                                                                    save_path=nmt_model.model_path,
                                                                    start_eval_on_epoch=0,
                                                                    write_samples=True,
                                                                    write_type='list',
                                                                    verbose=True))

    """Now we are ready to train. Let's set up some training parameters..."""


    training_params = {'n_epochs': 500,
                       'batch_size': 256,
                       'maxlen': 50,
                       'epochs_for_save': 1,
                       'verbose': 1,
                       'eval_on_sets': [], 
                       'n_parallel_loaders': 1,
                       'extra_callbacks': callbacks,
                       'reload_epoch': 0,
                       'epoch_offset': 0,
                       'n_gpus': 2,
                       'tensorboard': True,
                       'tensorboard_params': {'log_dir': 'tensorboard_logs', 'embeddings_freq': 1, 
                       'embeddings_metadata': None, 'word_embedding_labels': ['source_text', 'target_text'],
                       'label_word_embeddings_with_vocab': True,
                       'embeddings_layer_names': ["source_word_embedding", "target_word_embedding"],
                       'histogram_freq': 1, 'batch_size': 100, 'write_graph': True, 'write_grads': True, 'write_images': True}}
                       
    nmt_model.trainNet(dataset, training_params)

Here is the full log of the error I got (notice that if I disable Tensorboard, everything trains perfectly):

Traceback (most recent call last):
  File "train_model.py", line 384, in <module>
    main()
  File "train_model.py", line 373, in main
    start_training(use_gpu)
  File "train_model.py", line 235, in start_training
    nmt_model.trainNet(dataset, training_params)
  File "/WAVE/users/unix/nvtran/.local/lib/python3.7/site-packages/keras_wrapper/cnn_model.py", line 923, in trainNet
    self.__train(ds, params)
  File "/WAVE/users/unix/nvtran/.local/lib/python3.7/site-packages/keras_wrapper/cnn_model.py", line 1040, in __train
    callback_tensorboard.set_model(self.model)
  File "/WAVE/users/unix/nvtran/keras/keras/callbacks/tensorboard_v1.py", line 199, in set_model
    layer.output)
  File "/WAVE/apps/eb/software/TensorFlow/1.14.0-fosscuda-2019a-Python-3.7.2/lib/python3.7/site-packages/tensorflow/python/summary/summary.py", line 179, in histogram
    tag=tag, values=values, name=scope)
  File "/WAVE/apps/eb/software/TensorFlow/1.14.0-fosscuda-2019a-Python-3.7.2/lib/python3.7/site-packages/tensorflow/python/ops/gen_logging_ops.py", line 329, in histogram_summary
    "HistogramSummary", tag=tag, values=values, name=name)
  File "/WAVE/apps/eb/software/TensorFlow/1.14.0-fosscuda-2019a-Python-3.7.2/lib/python3.7/site-packages/tensorflow/python/framework/op_def_library.py", line 626, in _apply_op_helper
    param_name=input_name)
  File "/WAVE/apps/eb/software/TensorFlow/1.14.0-fosscuda-2019a-Python-3.7.2/lib/python3.7/site-packages/tensorflow/python/framework/op_def_library.py", line 60, in _SatisfiesTypeConstraint
    ", ".join(dtypes.as_dtype(x).name for x in allowed_list)))
TypeError: Value passed to parameter 'values' has DataType bool not in list of allowed values: float32, float64, int32, uint8, int16, int8, int64, bfloat16, uint16, float16, uint32, uint64

The text was updated successfully, but these errors were encountered:

lvapeab · 2020-04-13T11:12:18Z

This was due to an old Tensorboard callback. For the moment, it works if you set the following parameters:

nmt-keras/nmt_keras/training.py

Lines 159 to 172 in 5a29099

    
           'tensorboard_params': 
        
               { 
        
                   'log_dir': params.get('LOG_DIR', 'tensorboard_logs'), 
        
                   'histogram_freq': params.get('HISTOGRAM_FREQ', 0), 
        
                   'batch_size': params.get('TENSORBOARD_BATCH_SIZE', params['BATCH_SIZE']), 
        
                   'write_graph': params.get('WRITE_GRAPH', True), 
        
                   'write_grads': params.get('WRITE_GRADS', False), 
        
                   'write_images': params.get('WRITE_IMAGES', False), 
        
                   'embeddings_freq': None, 
        
                   'embeddings_layer_names': None, 
        
                   'embeddings_metadata': None, 
        
                   'word_embeddings_labels': None, 
        
                   'update_freq': params.get('UPDATE_FREQ', 'epoch')} 
        
           }

You'll may need to update the dependencies.

NamTran838P · 2020-04-13T20:57:53Z

It works now. Thanks a lot.

NamTran838P closed this as completed Apr 13, 2020

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Getting error when using Tensorboard #128

Getting error when using Tensorboard #128

NamTran838P commented Apr 12, 2020

lvapeab commented Apr 13, 2020

NamTran838P commented Apr 13, 2020

Getting error when using Tensorboard #128

Getting error when using Tensorboard #128

Comments

NamTran838P commented Apr 12, 2020

lvapeab commented Apr 13, 2020

NamTran838P commented Apr 13, 2020