Skip to content

Commit

Permalink
Merge pull request #167 from cccntu/faster_transformer-poc
Browse files Browse the repository at this point in the history
Add Faster Transformer compiler for Bert
  • Loading branch information
diegofiori committed Apr 18, 2023
2 parents 92fc131 + 6303568 commit 0130847
Show file tree
Hide file tree
Showing 25 changed files with 4,299 additions and 49 deletions.
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
BASE_CHECKPOINT_DIR = "checkpoints"
BASE_CHECKPOINT_DATA_DIR = "games"
BASE_CHECKPOINT_DATA_DIR = "games"
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,10 @@
import tqdm
from torch.utils.data import DataLoader

from open_alpha_tensor.config import BASE_CHECKPOINT_DATA_DIR, BASE_CHECKPOINT_DIR
from open_alpha_tensor.config import (
BASE_CHECKPOINT_DATA_DIR,
BASE_CHECKPOINT_DIR,
)
from open_alpha_tensor.core.actors.stage import actor_prediction
from open_alpha_tensor.core.data.basis_change import ChangeOfBasis
from open_alpha_tensor.core.data.dataset import TensorGameDataset
Expand Down Expand Up @@ -181,7 +184,9 @@ def __init__(
)
self.checkpoint_dir.mkdir(exist_ok=True, parents=True)
self.checkpoint_data_dir = (
checkpoint_data_dir if checkpoint_data_dir else Path(BASE_CHECKPOINT_DATA_DIR)
checkpoint_data_dir
if checkpoint_data_dir
else Path(BASE_CHECKPOINT_DATA_DIR)
)
self.checkpoint_data_dir.mkdir(exist_ok=True, parents=True)
self.change_of_basis = ChangeOfBasis(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,10 @@
import torch
from nebullvm.operations.base import Operation

from open_alpha_tensor.config import BASE_CHECKPOINT_DATA_DIR, BASE_CHECKPOINT_DIR
from open_alpha_tensor.config import (
BASE_CHECKPOINT_DATA_DIR,
BASE_CHECKPOINT_DIR,
)
from open_alpha_tensor.core.modules.alpha_tensor import AlphaTensorModel
from open_alpha_tensor.core.training import Trainer

Expand Down
4 changes: 3 additions & 1 deletion azure-pipelines.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,9 @@ steps:
- script: python -m pip install torch torchvision --extra-index-url https://download.pytorch.org/whl/cu117
displayName: 'Install PyTorch'

- script: python -m nebullvm.installers.auto_installer --compilers all
- script: |
export PATH=$PATH:/home/AzDevOps/.local/bin
python -m nebullvm.installers.auto_installer --compilers all
displayName: 'Install deep learning compilers'
- script: python -m pip install -r "requirements-dev.txt"
Expand Down
4 changes: 3 additions & 1 deletion nebullvm/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@
"openvino",
"bladedisc",
"intel_neural_compressor",
"faster_transformer",
]

COMPRESSOR_LIST = [
Expand All @@ -85,13 +86,14 @@
"intel_neural_compressor",
"tensor_rt",
"torch_tensor_rt",
"faster_transformer",
]

TENSORFLOW_MODULES = []
HUGGING_FACE_MODULES = []
DIFFUSERS_MODULES = []

LIBRARIES_GPU = ["tensor_rt", "torch_tensor_rt"]
LIBRARIES_GPU = ["tensor_rt", "torch_tensor_rt", "faster_transformer"]

MIN_NUMBER = 1e-4
DEFAULT_METRIC_DROP_THS = 1e-3
Expand Down
37 changes: 37 additions & 0 deletions nebullvm/installers/install_fastertransformer.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
#!/bin/bash

# TODO: check requirements
# https://github.com/NVIDIA/FasterTransformer/blob/main/docs/bert_guide.md
# Requirements
#CMake >= 3.8 for Tensorflow, CMake >= 3.13 for PyTorch
#CUDA 11.0 or newer version
#Python: Only verify on python 3
#Tensorflow: Verify on 1.15, 1.13 and 1.14 should work.
#PyTorch: Verify on 1.8.0, >= 1.5.0 should work.


# Set non interactive mode for apt-get
export DEBIAN_FRONTEND=noninteractive

if [[ $OSTYPE == "darwin"* ]]
then
echo "MacOS is not supported for FasterTransformer"
exit 1
fi

if [ ! -d "FasterTransformer" ]
then
git clone --recursive https://github.com/NVIDIA/FasterTransformer FasterTransformer
fi

# TODO: checkout to latest release

cd FasterTransformer &&
mkdir -p build &&
cd build &&
cmake -DSM=$COMPUTE_CAPABILITY -DCMAKE_BUILD_TYPE=Release -DBUILD_PYT=ON .. &&
make -j8 &&
touch ../../FasterTransformer_build_success # create a file to indicate that the build was successful

# TODO: enable multi gpu if possible
#-DBUILD_MULTI_GPU=OFF
70 changes: 58 additions & 12 deletions nebullvm/installers/installers.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,21 +9,17 @@
import cpuinfo
from loguru import logger

from nebullvm.config import (
LIBRARIES_GPU,
)
from nebullvm.config import LIBRARIES_GPU
from nebullvm.operations.optimizations.compilers.utils import (
deepsparse_is_available,
get_faster_transformer_repo_path,
intel_neural_compressor_is_available,
openvino_is_available,
tensorrt_is_available,
torch_tensorrt_is_available,
deepsparse_is_available,
intel_neural_compressor_is_available,
)
from nebullvm.optional_modules.torch import torch
from nebullvm.tools.utils import (
gpu_is_available,
check_module_version,
)
from nebullvm.tools.utils import check_module_version, gpu_is_available


def get_cpu_arch():
Expand Down Expand Up @@ -140,7 +136,14 @@ def install_torch_tensor_rt():
]
subprocess.run(cmd)
cuda_version = subprocess.check_output(["nvidia-smi"])
cuda_version = int(cuda_version.decode("utf-8").split("\n")[2].split("|")[-2].split(":")[-1].strip().split(".")[0])
cuda_version = int(
cuda_version.decode("utf-8")
.split("\n")[2]
.split("|")[-2]
.split(":")[-1]
.strip()
.split(".")[0]
)
if cuda_version >= 12:
cmd = [
"pip3",
Expand Down Expand Up @@ -233,10 +236,10 @@ def install_openvino(with_optimization: bool = True):

try:
from openvino.runtime import ( # noqa F401
Core,
Model,
CompiledModel,
Core,
InferRequest,
Model,
)
except ImportError:
return False
Expand Down Expand Up @@ -341,6 +344,47 @@ def install_onnx_simplifier():
return True


def install_faster_transformer(
working_dir: str = None,
):
"""Helper function for installing FasterTransformer.
https://github.com/NVIDIA/FasterTransformer
This function needs some prerequisites for running, as a valid `git`
installation and having MacOS or a Linux-distribution as OS.
Args:
working_dir (str, optional): The directory where the FasterTransformer
repo will be cloned and installed. Default: None
"""
if not gpu_is_available():
return False
path = Path(__file__).parent
# install faster transformer
try:
import torch

CP = compute_capability = torch.cuda.get_device_capability()
assert len(compute_capability) == 2
except (ImportError, AssertionError):
return False
installation_file = str(path / "install_fastertransformer.sh")
env_dict = {
"COMPUTE_CAPABILITY": f"{CP[0]}{CP[1]}",
**dict(os.environ.copy()),
}

result = subprocess.run(
["bash", installation_file],
cwd=get_faster_transformer_repo_path().parent,
env=env_dict,
)
# check result
if result.returncode != 0:
return False
return True


class BaseInstaller(ABC):
def __init__(self, module_list: List[str]):
self.modules = module_list
Expand Down Expand Up @@ -581,6 +625,7 @@ def install_framework():
"torch_tensor_rt": install_torch_tensor_rt,
"deepsparse": install_deepsparse,
"intel_neural_compressor": install_intel_neural_compressor,
#"faster_transformer": install_faster_transformer,
}

COMPILERS_AVAILABLE = {
Expand All @@ -589,4 +634,5 @@ def install_framework():
"torch_tensor_rt": torch_tensorrt_is_available,
"deepsparse": deepsparse_is_available,
"intel_neural_compressor": intel_neural_compressor_is_available,
#"faster_transformer": faster_transformer_is_available,
}
3 changes: 3 additions & 0 deletions nebullvm/installers/tests/test_install_frameworks.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,7 @@ def test_install_huggingface_torch_compilers_all():

assert compiler_list == [
"deepsparse",
"faster_transformer",
"intel_neural_compressor",
"tensor_rt",
"torch_tensor_rt",
Expand All @@ -190,6 +191,7 @@ def test_install_torch_compilers_all():

assert compiler_list == [
"deepsparse",
"faster_transformer",
"intel_neural_compressor",
"tensor_rt",
"torch_tensor_rt",
Expand Down Expand Up @@ -228,6 +230,7 @@ def test_install_torch_onnx_compilers_all():

assert compiler_list == [
"deepsparse",
"faster_transformer",
"intel_neural_compressor",
"openvino",
"tensor_rt",
Expand Down
25 changes: 22 additions & 3 deletions nebullvm/operations/inference_learners/builders.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
from pathlib import Path
from typing import Union, Any
from typing import Any, Union

from nebullvm.operations.inference_learners.base import BuildInferenceLearner
from nebullvm.operations.inference_learners.deepsparse import (
PytorchDeepSparseInferenceLearner,
)
from nebullvm.operations.inference_learners.faster_transformer import (
FasterTransformerInferenceLearner,
)
from nebullvm.operations.inference_learners.neural_compressor import (
PytorchNeuralCompressorInferenceLearner,
)
Expand All @@ -16,16 +19,16 @@
PytorchBackendInferenceLearner,
)
from nebullvm.operations.inference_learners.tensor_rt import (
PytorchTensorRTInferenceLearner,
TENSOR_RT_INFERENCE_LEARNERS,
PytorchTensorRTInferenceLearner,
)
from nebullvm.operations.inference_learners.tensorflow import (
TensorflowBackendInferenceLearner,
TFLiteBackendInferenceLearner,
)
from nebullvm.operations.inference_learners.tvm import (
PytorchApacheTVMInferenceLearner,
APACHE_TVM_INFERENCE_LEARNERS,
PytorchApacheTVMInferenceLearner,
)
from nebullvm.optional_modules.tensor_rt import tensorrt as trt
from nebullvm.optional_modules.tensorflow import tensorflow as tf
Expand Down Expand Up @@ -290,3 +293,19 @@ def execute(
target=target_device,
device=self.device,
)


class FasterTransformerBuildInferenceLearner(BuildInferenceLearner):
def execute(
self,
model: ScriptModule,
model_params: ModelParams,
input_tfms: MultiStageTransformation,
**kwargs,
):
self.inference_learner = FasterTransformerInferenceLearner(
torch_model=model,
network_parameters=model_params,
input_tfms=input_tfms,
device=self.device,
)
8 changes: 8 additions & 0 deletions nebullvm/operations/inference_learners/faster_transformer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
from nebullvm.operations.inference_learners.pytorch import (
PytorchBackendInferenceLearner,
)


class FasterTransformerInferenceLearner(PytorchBackendInferenceLearner):
MODEL_NAME = "faster_transformer_model_scripted.pt"
name = "FasterTransformer"
Loading

0 comments on commit 0130847

Please sign in to comment.