Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for HuggingFace's TensorFlow models #127

Merged
merged 30 commits into from
Jan 3, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
871161c
added support for for HuggingFace's TensorFlow models
valeriosofi Dec 21, 2022
f6e498d
fix
valeriosofi Dec 21, 2022
1d21079
fix
valeriosofi Dec 21, 2022
3cc3bad
fix
valeriosofi Dec 21, 2022
01ade06
added notebook for HuggingFace's tensorflow bert model
valeriosofi Dec 21, 2022
faccd23
fix
valeriosofi Dec 21, 2022
495f2ad
fix
valeriosofi Dec 21, 2022
a2c6f64
change nebullvm name in logs
valeriosofi Dec 21, 2022
1232091
Add optimized model details + warning if static shape is used for HF …
valeriosofi Dec 22, 2022
53f7d78
fix tvm issue
valeriosofi Dec 22, 2022
6acb926
edit dockerfile and add image auto building
valeriosofi Dec 23, 2022
dfcdadd
add docker installation on azure pipeline
valeriosofi Dec 23, 2022
369af08
fix docker permission error
valeriosofi Dec 23, 2022
5e73e9e
fix docker permission error
valeriosofi Dec 23, 2022
ac97106
fix docker permission error
valeriosofi Dec 23, 2022
2c76a42
fix docker permission error
valeriosofi Dec 23, 2022
2450d71
fix bug
valeriosofi Dec 23, 2022
e76982a
fix tensorflow bug
valeriosofi Dec 23, 2022
82c0be0
fix tensorflow bug
valeriosofi Dec 23, 2022
bb73213
fix tensorflow
valeriosofi Dec 24, 2022
054909d
fix tensorflow
valeriosofi Dec 24, 2022
76e2f35
fix bug in neural compressor output shape
valeriosofi Dec 24, 2022
ce31a61
Merge branch 'main' into main
valeriosofi Jan 2, 2023
724c5fe
add support for openvino with python 3.10
valeriosofi Jan 2, 2023
f614e17
add build docker image to azure pipelines
valeriosofi Jan 3, 2023
022f22f
revert docker build from az pipelines and edit format of the optimiza…
valeriosofi Jan 3, 2023
158653f
Merge branch 'main' of https://github.com/valeriosofi/nebullvm
valeriosofi Jan 3, 2023
ba00516
revert docker build from az pipelines
valeriosofi Jan 3, 2023
d12400e
added tabulate to setup.py and general fixes
valeriosofi Jan 3, 2023
7cc555c
fixes
valeriosofi Jan 3, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -137,3 +137,6 @@ dmypy.json

# Folder where optimized models are stored
optimized_model

# Config file for tests coverage
.coveragerc
33 changes: 21 additions & 12 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -15,36 +15,45 @@ RUN apt-get install -y python3-opencv python3-pip && \
# Install other libraries
RUN apt-get install -y sudo wget

# Install pytorch
RUN pip3 install torch torchvision --extra-index-url https://download.pytorch.org/whl/cu117
# Install dl frameworks
RUN pip3 install --no-cache-dir torch torchvision --extra-index-url https://download.pytorch.org/whl/cu117
RUN pip3 install --no-cache-dir tensorflow
RUN pip3 install --no-cache-dir onnx
RUN pip3 install --no-cache-dir transformers

# Install nebullvm
ARG NEBULLVM_VERSION=latest
RUN if [ "$NEBULLVM_VERSION" = "latest" ] ; then \
# pip install nebullvm ; \
pip install git+https://github.com/nebuly-ai/nebullvm.git ; \
cd nebullvm ; \
pip install . ; \
cd apps/accelerate/speedster ; \
pip install . ; \
cd ../../../.. ; \
rm -rf nebullvm ; \
else \
pip install nebullvm==${NEBULLVM_VERSION} ; \
pip install --no-cache-dir nebullvm==${NEBULLVM_VERSION} ; \
fi

# Install required python modules
RUN pip install cmake
RUN pip install --no-cache-dir cmake

# Install default deep learning compilers
ARG COMPILER=all
ENV NO_COMPILER_INSTALLATION=1
RUN if [ "$COMPILER" = "all" ] ; then \
python3 -c "python -m nebullvm.installers.auto_installer --frameworks torch onnx tensorflow huggingface --compilers all" ; \
python3 -m nebullvm.installers.auto_installer --frameworks torch onnx tensorflow huggingface --compilers all ; \
elif [ "$COMPILER" = "tensorrt" ] ; then \
python3 -c "python -m nebullvm.installers.auto_installer --frameworks torch onnx tensorflow huggingface --compilers tensorrt" ; \
python3 -m nebullvm.installers.auto_installer --frameworks torch onnx tensorflow huggingface --compilers tensorrt ; \
elif [ "$COMPILER" = "openvino" ] ; then \
python3 -c "python -m nebullvm.installers.auto_installer --frameworks torch onnx tensorflow huggingface --compilers openvino" ; \
python3 -m nebullvm.installers.auto_installer --frameworks torch onnx tensorflow huggingface --compilers openvino ; \
elif [ "$COMPILER" = "onnxruntime" ] ; then \
python3 -c "python -m nebullvm.installers.auto_installer --frameworks torch onnx tensorflow huggingface --compilers onnxruntime" ; \
python3 -m nebullvm.installers.auto_installer --frameworks torch onnx tensorflow huggingface --compilers onnxruntime ; \
fi

# Install TVM
RUN if [ "$COMPILER" = "all" ] || [ "$COMPILER" = "tvm" ] ; then \
python3 -c "from nebullvm.installers.installers import install_tvm; install_tvm()" ; \
pip install --no-cache-dir https://github.com/tlc-pack/tlcpack/releases/download/v0.10.0/apache_tvm_cu116_cu116-0.10.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl ; \
pip install --no-cache-dir xgboost ; \
python3 -c "from tvm.runtime import Module" ; \
fi

ENV SIGOPT_PROJECT="tmp"
1 change: 1 addition & 0 deletions apps/accelerate/speedster/requirements.txt
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
nebullvm>=0.6.1
tabulate>=0.8.0
1 change: 1 addition & 0 deletions apps/accelerate/speedster/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

REQUIREMENTS = [
"nebullvm>=0.6.1",
"tabulate>=0.8.0",
]

this_directory = Path(__file__).parent
Expand Down
269 changes: 263 additions & 6 deletions apps/accelerate/speedster/speedster/api/tests/test_huggingface.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
import torch
from transformers import AlbertModel, AlbertTokenizer

from nebullvm.config import COMPILER_LIST, COMPRESSOR_LIST
from nebullvm.operations.inference_learners.huggingface import (
HuggingFaceInferenceLearner,
)
from nebullvm.optional_modules.tensorflow import tensorflow as tf
from nebullvm.optional_modules.torch import torch
from transformers import AlbertModel, TFAlbertModel, AlbertTokenizer

from speedster import optimize_model


def test_huggingface_ort_input_text():
def test_torch_huggingface_ort_input_text():
tokenizer = AlbertTokenizer.from_pretrained("albert-base-v1")
model = AlbertModel.from_pretrained("albert-base-v1")

Expand Down Expand Up @@ -73,7 +74,7 @@ def test_huggingface_ort_input_text():
)


def test_huggingface_ort_input_tensors():
def test_torch_huggingface_ort_input_tensors():
tokenizer = AlbertTokenizer.from_pretrained("albert-base-v1")
model = AlbertModel.from_pretrained("albert-base-v1")

Expand Down Expand Up @@ -136,7 +137,7 @@ def test_huggingface_ort_input_tensors():
)


def test_huggingface_torchscript_input_tensors():
def test_torch_huggingface_torchscript_input_tensors():
tokenizer = AlbertTokenizer.from_pretrained("albert-base-v1")
model = AlbertModel.from_pretrained("albert-base-v1", torchscript=True)

Expand Down Expand Up @@ -177,3 +178,259 @@ def test_huggingface_torchscript_input_tensors():

assert torch.max(abs((res_original[0] - res_optimized[0]))) < 1e-2
assert torch.max(abs((res_original[1] - res_optimized[1]))) < 1e-2


def test_tensorflow_huggingface_ort_input_text_np():
tokenizer = AlbertTokenizer.from_pretrained("albert-base-v1")
model = TFAlbertModel.from_pretrained("albert-base-v1")

input_data = [
"this is a test",
"hi my name is Valerio",
"india is very far from italy",
]

dynamic_info = {
"inputs": [
{0: "batch", 1: "num_tokens"},
{0: "batch", 1: "num_tokens"},
{0: "batch", 1: "num_tokens"},
],
"outputs": [{0: "batch", 1: "num_tokens"}, {0: "batch"}],
}

optimized_model = optimize_model(
model=model,
input_data=input_data,
optimization_time="constrained",
tokenizer=tokenizer,
ignore_compilers=[
compiler for compiler in COMPILER_LIST if compiler != "onnxruntime"
],
ignore_compressors=[compressor for compressor in COMPRESSOR_LIST],
tokenizer_args=dict(
add_special_tokens=True,
return_attention_mask=True,
return_tensors="np",
return_token_type_ids=None, # Sets to model default
padding="longest",
truncation=True,
),
dynamic_info=dynamic_info,
)

x = ["this is a test input to see if the optimized model works."]
inputs = tokenizer(x, return_tensors="np")
res_original = model(**inputs)
res_optimized = optimized_model(**inputs)

assert isinstance(optimized_model, HuggingFaceInferenceLearner)

assert (
tf.math.reduce_max(
abs(
(
res_original["last_hidden_state"]
- res_optimized["last_hidden_state"]
)
)
)
< 1e-2
)
assert (
tf.math.reduce_max(
abs(
(
res_original["pooler_output"]
- res_optimized["pooler_output"]
)
)
)
< 1e-2
)


def test_tensorflow_huggingface_ort_input_tensors_np():
tokenizer = AlbertTokenizer.from_pretrained("albert-base-v1")
model = TFAlbertModel.from_pretrained("albert-base-v1")

text = "hi my name is Valerio"
inputs = tokenizer(text, return_tensors="np")

dynamic_info = {
"inputs": [
{0: "batch", 1: "num_tokens"},
{0: "batch", 1: "num_tokens"},
{0: "batch", 1: "num_tokens"},
],
"outputs": [{0: "batch", 1: "num_tokens"}, {0: "batch"}],
}

optimized_model = optimize_model(
model=model,
input_data=[inputs for _ in range(10)],
optimization_time="constrained",
ignore_compilers=[
compiler for compiler in COMPILER_LIST if compiler != "onnxruntime"
],
ignore_compressors=[compressor for compressor in COMPRESSOR_LIST],
dynamic_info=dynamic_info,
)

x = ["Test to see if it works with a different output"]
inputs = tokenizer(x, return_tensors="np")
res_original = model(**inputs)
res_optimized = optimized_model(**inputs)

assert isinstance(optimized_model, HuggingFaceInferenceLearner)

assert (
tf.math.reduce_max(
abs(
(
res_original["last_hidden_state"]
- res_optimized["last_hidden_state"]
)
)
)
< 1e-2
)
assert (
tf.math.reduce_max(
abs(
(
res_original["pooler_output"]
- res_optimized["pooler_output"]
)
)
)
< 1e-2
)


def test_tensorflow_huggingface_ort_input_text_tf():
tokenizer = AlbertTokenizer.from_pretrained("albert-base-v1")
model = TFAlbertModel.from_pretrained("albert-base-v1")

input_data = [
"this is a test",
"hi my name is Valerio",
"india is very far from italy",
]

dynamic_info = {
"inputs": [
{0: "batch", 1: "num_tokens"},
{0: "batch", 1: "num_tokens"},
{0: "batch", 1: "num_tokens"},
],
"outputs": [{0: "batch", 1: "num_tokens"}, {0: "batch"}],
}

optimized_model = optimize_model(
model=model,
input_data=input_data,
optimization_time="constrained",
tokenizer=tokenizer,
ignore_compilers=[
compiler for compiler in COMPILER_LIST if compiler != "onnxruntime"
],
ignore_compressors=[compressor for compressor in COMPRESSOR_LIST],
tokenizer_args=dict(
add_special_tokens=True,
return_attention_mask=True,
return_tensors="tf",
return_token_type_ids=None, # Sets to model default
padding="longest",
truncation=True,
),
dynamic_info=dynamic_info,
)

x = ["this is a test input to see if the optimized model works."]
inputs = tokenizer(x, return_tensors="tf")
res_original = model(**inputs)
res_optimized = optimized_model(**inputs)

assert isinstance(optimized_model, HuggingFaceInferenceLearner)

assert (
tf.math.reduce_max(
abs(
(
res_original["last_hidden_state"]
- res_optimized["last_hidden_state"]
)
)
)
< 1e-2
)
assert (
tf.math.reduce_max(
abs(
(
res_original["pooler_output"]
- res_optimized["pooler_output"]
)
)
)
< 1e-2
)


def test_tensorflow_huggingface_ort_input_tensors_tf():
tokenizer = AlbertTokenizer.from_pretrained("albert-base-v1")
model = TFAlbertModel.from_pretrained("albert-base-v1")

text = "hi my name is Valerio"
inputs = tokenizer(text, return_tensors="tf")

dynamic_info = {
"inputs": [
{0: "batch", 1: "num_tokens"},
{0: "batch", 1: "num_tokens"},
{0: "batch", 1: "num_tokens"},
],
"outputs": [{0: "batch", 1: "num_tokens"}, {0: "batch"}],
}

optimized_model = optimize_model(
model=model,
input_data=[inputs for _ in range(10)],
optimization_time="constrained",
ignore_compilers=[
compiler for compiler in COMPILER_LIST if compiler != "onnxruntime"
],
ignore_compressors=[compressor for compressor in COMPRESSOR_LIST],
dynamic_info=dynamic_info,
)

x = ["Test to see if it works with a different output"]
inputs = tokenizer(x, return_tensors="tf")
res_original = model(**inputs)
res_optimized = optimized_model(**inputs)

assert isinstance(optimized_model, HuggingFaceInferenceLearner)

assert (
tf.math.reduce_max(
abs(
(
res_original["last_hidden_state"]
- res_optimized["last_hidden_state"]
)
)
)
< 1e-2
)
assert (
tf.math.reduce_max(
abs(
(
res_original["pooler_output"]
- res_optimized["pooler_output"]
)
)
)
< 1e-2
)