nebuly-ai · diegofiori · Jan 3, 2023 · Dec 21, 2022 · Dec 21, 2022 · Dec 21, 2022
diff --git a/.gitignore b/.gitignore
@@ -137,3 +137,6 @@ dmypy.json
 
 # Folder where optimized models are stored
 optimized_model
+
+# Config file for tests coverage
+.coveragerc
diff --git a/Dockerfile b/Dockerfile
@@ -15,36 +15,45 @@ RUN apt-get install -y python3-opencv python3-pip && \
 # Install other libraries
 RUN apt-get install -y sudo wget
 
-# Install pytorch
-RUN pip3 install torch torchvision --extra-index-url https://download.pytorch.org/whl/cu117
+# Install dl frameworks
+RUN pip3 install --no-cache-dir torch torchvision --extra-index-url https://download.pytorch.org/whl/cu117
+RUN pip3 install --no-cache-dir tensorflow
+RUN pip3 install --no-cache-dir onnx
+RUN pip3 install --no-cache-dir transformers
 
 # Install nebullvm
 ARG NEBULLVM_VERSION=latest
 RUN if [ "$NEBULLVM_VERSION" = "latest" ] ; then \
-        # pip install nebullvm ; \
-        pip install git+https://github.com/nebuly-ai/nebullvm.git ; \
+        cd nebullvm ; \
+        pip install . ; \
+        cd apps/accelerate/speedster ; \
+        pip install . ; \
+        cd ../../../.. ; \
+        rm -rf nebullvm ; \
     else \
-        pip install nebullvm==${NEBULLVM_VERSION} ; \
+        pip install --no-cache-dir nebullvm==${NEBULLVM_VERSION} ; \
     fi
 
 # Install required python modules
-RUN pip install cmake
+RUN pip install --no-cache-dir cmake
 
 # Install default deep learning compilers
 ARG COMPILER=all
-ENV NO_COMPILER_INSTALLATION=1
 RUN if [ "$COMPILER" = "all" ] ; then \
-        python3 -c "python -m nebullvm.installers.auto_installer --frameworks torch onnx tensorflow huggingface --compilers all" ; \
+        python3 -m nebullvm.installers.auto_installer --frameworks torch onnx tensorflow huggingface --compilers all ; \
     elif [ "$COMPILER" = "tensorrt" ] ; then \
-        python3 -c "python -m nebullvm.installers.auto_installer --frameworks torch onnx tensorflow huggingface --compilers tensorrt" ; \
+        python3 -m nebullvm.installers.auto_installer --frameworks torch onnx tensorflow huggingface --compilers tensorrt ; \
     elif [ "$COMPILER" = "openvino" ] ; then \
-        python3 -c "python -m nebullvm.installers.auto_installer --frameworks torch onnx tensorflow huggingface --compilers openvino" ; \
+        python3 -m nebullvm.installers.auto_installer --frameworks torch onnx tensorflow huggingface --compilers openvino ; \
     elif [ "$COMPILER" = "onnxruntime" ] ; then \
-        python3 -c "python -m nebullvm.installers.auto_installer --frameworks torch onnx tensorflow huggingface --compilers onnxruntime" ; \
+        python3 -m nebullvm.installers.auto_installer --frameworks torch onnx tensorflow huggingface --compilers onnxruntime ; \
     fi
 
 # Install TVM
 RUN if [ "$COMPILER" = "all" ] || [ "$COMPILER" = "tvm" ] ; then \
-        python3 -c "from nebullvm.installers.installers import install_tvm; install_tvm()" ; \
+        pip install --no-cache-dir https://github.com/tlc-pack/tlcpack/releases/download/v0.10.0/apache_tvm_cu116_cu116-0.10.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl ; \
+        pip install --no-cache-dir xgboost ; \
         python3 -c "from tvm.runtime import Module" ; \
     fi
+
+ENV SIGOPT_PROJECT="tmp"
diff --git a/apps/accelerate/speedster/requirements.txt b/apps/accelerate/speedster/requirements.txt
@@ -1 +1,2 @@
 nebullvm>=0.6.1
+tabulate>=0.8.0
diff --git a/apps/accelerate/speedster/setup.py b/apps/accelerate/speedster/setup.py
@@ -4,6 +4,7 @@
 
 REQUIREMENTS = [
     "nebullvm>=0.6.1",
+    "tabulate>=0.8.0",
 ]
 
 this_directory = Path(__file__).parent

diff --git a/apps/accelerate/speedster/speedster/api/tests/test_huggingface.py b/apps/accelerate/speedster/speedster/api/tests/test_huggingface.py
@@ -1,14 +1,15 @@
-import torch
-from transformers import AlbertModel, AlbertTokenizer
-
 from nebullvm.config import COMPILER_LIST, COMPRESSOR_LIST
 from nebullvm.operations.inference_learners.huggingface import (
     HuggingFaceInferenceLearner,
 )
+from nebullvm.optional_modules.tensorflow import tensorflow as tf
+from nebullvm.optional_modules.torch import torch
+from transformers import AlbertModel, TFAlbertModel, AlbertTokenizer
+
 from speedster import optimize_model
 
 
-def test_huggingface_ort_input_text():
+def test_torch_huggingface_ort_input_text():
     tokenizer = AlbertTokenizer.from_pretrained("albert-base-v1")
     model = AlbertModel.from_pretrained("albert-base-v1")
 
@@ -73,7 +74,7 @@ def test_huggingface_ort_input_text():
     )
 
 
-def test_huggingface_ort_input_tensors():
+def test_torch_huggingface_ort_input_tensors():
     tokenizer = AlbertTokenizer.from_pretrained("albert-base-v1")
     model = AlbertModel.from_pretrained("albert-base-v1")
 
@@ -136,7 +137,7 @@ def test_huggingface_ort_input_tensors():
     )
 
 
-def test_huggingface_torchscript_input_tensors():
+def test_torch_huggingface_torchscript_input_tensors():
     tokenizer = AlbertTokenizer.from_pretrained("albert-base-v1")
     model = AlbertModel.from_pretrained("albert-base-v1", torchscript=True)
 
@@ -177,3 +178,259 @@ def test_huggingface_torchscript_input_tensors():
 
     assert torch.max(abs((res_original[0] - res_optimized[0]))) < 1e-2
     assert torch.max(abs((res_original[1] - res_optimized[1]))) < 1e-2
+
+
+def test_tensorflow_huggingface_ort_input_text_np():
+    tokenizer = AlbertTokenizer.from_pretrained("albert-base-v1")
+    model = TFAlbertModel.from_pretrained("albert-base-v1")
+
+    input_data = [
+        "this is a test",
+        "hi my name is Valerio",
+        "india is very far from italy",
+    ]
+
+    dynamic_info = {
+        "inputs": [
+            {0: "batch", 1: "num_tokens"},
+            {0: "batch", 1: "num_tokens"},
+            {0: "batch", 1: "num_tokens"},
+        ],
+        "outputs": [{0: "batch", 1: "num_tokens"}, {0: "batch"}],
+    }
+
+    optimized_model = optimize_model(
+        model=model,
+        input_data=input_data,
+        optimization_time="constrained",
+        tokenizer=tokenizer,
+        ignore_compilers=[
+            compiler for compiler in COMPILER_LIST if compiler != "onnxruntime"
+        ],
+        ignore_compressors=[compressor for compressor in COMPRESSOR_LIST],
+        tokenizer_args=dict(
+            add_special_tokens=True,
+            return_attention_mask=True,
+            return_tensors="np",
+            return_token_type_ids=None,  # Sets to model default
+            padding="longest",
+            truncation=True,
+        ),
+        dynamic_info=dynamic_info,
+    )
+
+    x = ["this is a test input to see if the optimized model works."]
+    inputs = tokenizer(x, return_tensors="np")
+    res_original = model(**inputs)
+    res_optimized = optimized_model(**inputs)
+
+    assert isinstance(optimized_model, HuggingFaceInferenceLearner)
+
+    assert (
+        tf.math.reduce_max(
+            abs(
+                (
+                    res_original["last_hidden_state"]
+                    - res_optimized["last_hidden_state"]
+                )
+            )
+        )
+        < 1e-2
+    )
+    assert (
+        tf.math.reduce_max(
+            abs(
+                (
+                    res_original["pooler_output"]
+                    - res_optimized["pooler_output"]
+                )
+            )
+        )
+        < 1e-2
+    )
+
+
+def test_tensorflow_huggingface_ort_input_tensors_np():
+    tokenizer = AlbertTokenizer.from_pretrained("albert-base-v1")
+    model = TFAlbertModel.from_pretrained("albert-base-v1")
+
+    text = "hi my name is Valerio"
+    inputs = tokenizer(text, return_tensors="np")
+
+    dynamic_info = {
+        "inputs": [
+            {0: "batch", 1: "num_tokens"},
+            {0: "batch", 1: "num_tokens"},
+            {0: "batch", 1: "num_tokens"},
+        ],
+        "outputs": [{0: "batch", 1: "num_tokens"}, {0: "batch"}],
+    }
+
+    optimized_model = optimize_model(
+        model=model,
+        input_data=[inputs for _ in range(10)],
+        optimization_time="constrained",
+        ignore_compilers=[
+            compiler for compiler in COMPILER_LIST if compiler != "onnxruntime"
+        ],
+        ignore_compressors=[compressor for compressor in COMPRESSOR_LIST],
+        dynamic_info=dynamic_info,
+    )
+
+    x = ["Test to see if it works with a different output"]
+    inputs = tokenizer(x, return_tensors="np")
+    res_original = model(**inputs)
+    res_optimized = optimized_model(**inputs)
+
+    assert isinstance(optimized_model, HuggingFaceInferenceLearner)
+
+    assert (
+        tf.math.reduce_max(
+            abs(
+                (
+                    res_original["last_hidden_state"]
+                    - res_optimized["last_hidden_state"]
+                )
+            )
+        )
+        < 1e-2
+    )
+    assert (
+        tf.math.reduce_max(
+            abs(
+                (
+                    res_original["pooler_output"]
+                    - res_optimized["pooler_output"]
+                )
+            )
+        )
+        < 1e-2
+    )
+
+
+def test_tensorflow_huggingface_ort_input_text_tf():
+    tokenizer = AlbertTokenizer.from_pretrained("albert-base-v1")
+    model = TFAlbertModel.from_pretrained("albert-base-v1")
+
+    input_data = [
+        "this is a test",
+        "hi my name is Valerio",
+        "india is very far from italy",
+    ]
+
+    dynamic_info = {
+        "inputs": [
+            {0: "batch", 1: "num_tokens"},
+            {0: "batch", 1: "num_tokens"},
+            {0: "batch", 1: "num_tokens"},
+        ],
+        "outputs": [{0: "batch", 1: "num_tokens"}, {0: "batch"}],
+    }
+
+    optimized_model = optimize_model(
+        model=model,
+        input_data=input_data,
+        optimization_time="constrained",
+        tokenizer=tokenizer,
+        ignore_compilers=[
+            compiler for compiler in COMPILER_LIST if compiler != "onnxruntime"
+        ],
+        ignore_compressors=[compressor for compressor in COMPRESSOR_LIST],
+        tokenizer_args=dict(
+            add_special_tokens=True,
+            return_attention_mask=True,
+            return_tensors="tf",
+            return_token_type_ids=None,  # Sets to model default
+            padding="longest",
+            truncation=True,
+        ),
+        dynamic_info=dynamic_info,
+    )
+
+    x = ["this is a test input to see if the optimized model works."]
+    inputs = tokenizer(x, return_tensors="tf")
+    res_original = model(**inputs)
+    res_optimized = optimized_model(**inputs)
+
+    assert isinstance(optimized_model, HuggingFaceInferenceLearner)
+
+    assert (
+        tf.math.reduce_max(
+            abs(
+                (
+                    res_original["last_hidden_state"]
+                    - res_optimized["last_hidden_state"]
+                )
+            )
+        )
+        < 1e-2
+    )
+    assert (
+        tf.math.reduce_max(
+            abs(
+                (
+                    res_original["pooler_output"]
+                    - res_optimized["pooler_output"]
+                )
+            )
+        )
+        < 1e-2
+    )
+
+
+def test_tensorflow_huggingface_ort_input_tensors_tf():
+    tokenizer = AlbertTokenizer.from_pretrained("albert-base-v1")
+    model = TFAlbertModel.from_pretrained("albert-base-v1")
+
+    text = "hi my name is Valerio"
+    inputs = tokenizer(text, return_tensors="tf")
+
+    dynamic_info = {
+        "inputs": [
+            {0: "batch", 1: "num_tokens"},
+            {0: "batch", 1: "num_tokens"},
+            {0: "batch", 1: "num_tokens"},
+        ],
+        "outputs": [{0: "batch", 1: "num_tokens"}, {0: "batch"}],
+    }
+
+    optimized_model = optimize_model(
+        model=model,
+        input_data=[inputs for _ in range(10)],
+        optimization_time="constrained",
+        ignore_compilers=[
+            compiler for compiler in COMPILER_LIST if compiler != "onnxruntime"
+        ],
+        ignore_compressors=[compressor for compressor in COMPRESSOR_LIST],
+        dynamic_info=dynamic_info,
+    )
+
+    x = ["Test to see if it works with a different output"]
+    inputs = tokenizer(x, return_tensors="tf")
+    res_original = model(**inputs)
+    res_optimized = optimized_model(**inputs)
+
+    assert isinstance(optimized_model, HuggingFaceInferenceLearner)
+
+    assert (
+        tf.math.reduce_max(
+            abs(
+                (
+                    res_original["last_hidden_state"]
+                    - res_optimized["last_hidden_state"]
+                )
+            )
+        )
+        < 1e-2
+    )
+    assert (
+        tf.math.reduce_max(
+            abs(
+                (
+                    res_original["pooler_output"]
+                    - res_optimized["pooler_output"]
+                )
+            )
+        )
+        < 1e-2
+    )