Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

16x8 Quantization fails for RNN model - Max and min for dynamic tensors should be recorded during calibration #1090

Open
Black3rror opened this issue Aug 30, 2023 · 4 comments
Assignees
Labels
bug Something isn't working

Comments

@Black3rror
Copy link

Doing 16x8 quantization on RNN models fails.

Code to reproduce the issue
gist to reproduce the issue on Google Colab
Code:

import numpy as np
import tensorflow as tf
import tensorflow_model_optimization as tfmot

def create_model():
  model = tf.keras.models.Sequential()

  # For the model to later get converted, batch_size and sequence_length should be fixed.
  # E.g., batch_input_shape=[None, 10] will throw an error.
  # This is just a limitation when using RNNs. E.g., for FC or CNN we can have batch_size=None
  model.add(tf.keras.layers.Embedding(
    input_dim=5,
    output_dim=16,
    batch_input_shape=[1, 10]
  ))

  model.add(tf.keras.layers.SimpleRNN(
    units=8,
    return_sequences=True,
    stateful=False
  ))

  model.add(tf.keras.layers.Dense(5))

  return model

model = create_model()
model.summary()

model.save("/content/model/")

representative_data = np.random.randint(0, 5, (200, 10)).astype(np.float32)

def representative_dataset():
  for sample in representative_data:
    sample = np.expand_dims(sample, axis=0)     # batch_size = 1
    yield [sample]                              # set sample as first (and only) input of the model

# 16x8 quantization - Fail
converter = tf.lite.TFLiteConverter.from_saved_model("/content/model/")
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.target_spec.supported_ops = [tf.lite.OpsSet.EXPERIMENTAL_TFLITE_BUILTINS_ACTIVATIONS_INT16_WEIGHTS_INT8]
# Commenting the following line will remove the error
converter.representative_dataset = representative_dataset
tflite_quant_model = converter.convert()

Error:

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
[<ipython-input-11-b8b7cfec032a>](https://localhost:8080/#) in <cell line: 7>()
      5 # Commenting the following line will remove the error
      6 converter.representative_dataset = representative_dataset
----> 7 tflite_quant_model = converter.convert()

10 frames
[/usr/local/lib/python3.10/dist-packages/tensorflow/lite/python/lite.py](https://localhost:8080/#) in wrapper(self, *args, **kwargs)
    960   def wrapper(self, *args, **kwargs):
    961     # pylint: disable=protected-access
--> 962     return self._convert_and_export_metrics(convert_func, *args, **kwargs)
    963     # pylint: enable=protected-access
    964 

[/usr/local/lib/python3.10/dist-packages/tensorflow/lite/python/lite.py](https://localhost:8080/#) in _convert_and_export_metrics(self, convert_func, *args, **kwargs)
    938     self._save_conversion_params_metric()
    939     start_time = time.process_time()
--> 940     result = convert_func(self, *args, **kwargs)
    941     elapsed_time_ms = (time.process_time() - start_time) * 1000
    942     if result:

[/usr/local/lib/python3.10/dist-packages/tensorflow/lite/python/lite.py](https://localhost:8080/#) in convert(self)
   1245           graph_def)
   1246 
-> 1247     return self._convert_from_saved_model(graph_def)
   1248 
   1249 

[/usr/local/lib/python3.10/dist-packages/tensorflow/lite/python/lite.py](https://localhost:8080/#) in _convert_from_saved_model(self, graph_def)
   1129 
   1130     result = _convert_saved_model(**converter_kwargs)
-> 1131     return self._optimize_tflite_model(
   1132         result, quant_mode, quant_io=self.experimental_new_quantizer)
   1133 

[/usr/local/lib/python3.10/dist-packages/tensorflow/lite/python/convert_phase.py](https://localhost:8080/#) in wrapper(*args, **kwargs)
    213       except Exception as error:
    214         report_error_message(str(error))
--> 215         raise error from None  # Re-throws the exception.
    216 
    217     return wrapper

[/usr/local/lib/python3.10/dist-packages/tensorflow/lite/python/convert_phase.py](https://localhost:8080/#) in wrapper(*args, **kwargs)
    203     def wrapper(*args, **kwargs):
    204       try:
--> 205         return func(*args, **kwargs)
    206       except ConverterError as converter_error:
    207         if converter_error.errors:

[/usr/local/lib/python3.10/dist-packages/tensorflow/lite/python/lite.py](https://localhost:8080/#) in _optimize_tflite_model(self, model, quant_mode, quant_io)
    897         q_allow_float = quant_mode.is_allow_float()
    898         q_variable_quantization = quant_mode.enable_mlir_variable_quantization
--> 899         model = self._quantize(model, q_in_type, q_out_type, q_activations_type,
    900                                q_bias_type, q_allow_float,
    901                                q_variable_quantization)

[/usr/local/lib/python3.10/dist-packages/tensorflow/lite/python/lite.py](https://localhost:8080/#) in _quantize(self, result, input_type, output_type, activations_type, bias_type, allow_float, enable_variable_quantization)
    652           enable_variable_quantization=enable_variable_quantization)
    653     else:
--> 654       return calibrate_quantize.calibrate_and_quantize(
    655           self.representative_dataset.input_gen,
    656           input_type,

[/usr/local/lib/python3.10/dist-packages/tensorflow/lite/python/convert_phase.py](https://localhost:8080/#) in wrapper(*args, **kwargs)
    213       except Exception as error:
    214         report_error_message(str(error))
--> 215         raise error from None  # Re-throws the exception.
    216 
    217     return wrapper

[/usr/local/lib/python3.10/dist-packages/tensorflow/lite/python/convert_phase.py](https://localhost:8080/#) in wrapper(*args, **kwargs)
    203     def wrapper(*args, **kwargs):
    204       try:
--> 205         return func(*args, **kwargs)
    206       except ConverterError as converter_error:
    207         if converter_error.errors:

[/usr/local/lib/python3.10/dist-packages/tensorflow/lite/python/optimize/calibrator.py](https://localhost:8080/#) in calibrate_and_quantize(self, dataset_gen, input_type, output_type, allow_float, activations_type, bias_type, resize_input, disable_per_channel)
    174     """
    175     self._feed_tensors(dataset_gen, resize_input)
--> 176     return self._calibrator.QuantizeModel(
    177         np.dtype(input_type.as_numpy_dtype()).num,
    178         np.dtype(output_type.as_numpy_dtype()).num, allow_float,

RuntimeError: Max and min for dynamic tensors should be recorded during calibration: Failed for tensor arg1
Empty min/max for tensor arg1
@Black3rror Black3rror added the bug Something isn't working label Aug 30, 2023
@cdh4696
Copy link

cdh4696 commented Aug 31, 2023

@yyoon Could you please check? Thanks!

@yyoon
Copy link
Contributor

yyoon commented Sep 2, 2023

@tucan9389 could you take a look? Not sure if RNN is supported at all.

@Black3rror
Copy link
Author

Any update?

@HajarAva
Copy link

Hello, I'm facing the same problem. Is there any update on how to deal with this error?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
bug Something isn't working
Projects
None yet
Development

No branches or pull requests

5 participants