Merge pull request #167 from cccntu/faster_transformer-poc

Add Faster Transformer compiler for Bert
nebuly-ai · Apr 18, 2023 · 0130847 · 0130847
2 parents 92fc131 + 6303568
commit 0130847
Show file tree

Hide file tree

Showing 25 changed files with 4,299 additions and 49 deletions.
diff --git a/apps/accelerate/open_alpha_tensor/open_alpha_tensor/config.py b/apps/accelerate/open_alpha_tensor/open_alpha_tensor/config.py
@@ -1,2 +1,2 @@
 BASE_CHECKPOINT_DIR = "checkpoints"
-BASE_CHECKPOINT_DATA_DIR = "games"
+BASE_CHECKPOINT_DATA_DIR = "games"
diff --git a/apps/accelerate/open_alpha_tensor/open_alpha_tensor/core/training.py b/apps/accelerate/open_alpha_tensor/open_alpha_tensor/core/training.py
@@ -5,7 +5,10 @@
 import tqdm
 from torch.utils.data import DataLoader
 
-from open_alpha_tensor.config import BASE_CHECKPOINT_DATA_DIR, BASE_CHECKPOINT_DIR
+from open_alpha_tensor.config import (
+    BASE_CHECKPOINT_DATA_DIR,
+    BASE_CHECKPOINT_DIR,
+)
 from open_alpha_tensor.core.actors.stage import actor_prediction
 from open_alpha_tensor.core.data.basis_change import ChangeOfBasis
 from open_alpha_tensor.core.data.dataset import TensorGameDataset
@@ -181,7 +184,9 @@ def __init__(
         )
         self.checkpoint_dir.mkdir(exist_ok=True, parents=True)
         self.checkpoint_data_dir = (
-            checkpoint_data_dir if checkpoint_data_dir else Path(BASE_CHECKPOINT_DATA_DIR)
+            checkpoint_data_dir
+            if checkpoint_data_dir
+            else Path(BASE_CHECKPOINT_DATA_DIR)
         )
         self.checkpoint_data_dir.mkdir(exist_ok=True, parents=True)
         self.change_of_basis = ChangeOfBasis(

diff --git a/apps/accelerate/open_alpha_tensor/open_alpha_tensor/operations/checkpoint_op.py b/apps/accelerate/open_alpha_tensor/open_alpha_tensor/operations/checkpoint_op.py
@@ -4,7 +4,10 @@
 import torch
 from nebullvm.operations.base import Operation
 
-from open_alpha_tensor.config import BASE_CHECKPOINT_DATA_DIR, BASE_CHECKPOINT_DIR
+from open_alpha_tensor.config import (
+    BASE_CHECKPOINT_DATA_DIR,
+    BASE_CHECKPOINT_DIR,
+)
 from open_alpha_tensor.core.modules.alpha_tensor import AlphaTensorModel
 from open_alpha_tensor.core.training import Trainer
 

diff --git a/azure-pipelines.yml b/azure-pipelines.yml
@@ -36,7 +36,9 @@ steps:
   - script: python -m pip install torch torchvision --extra-index-url https://download.pytorch.org/whl/cu117
     displayName: 'Install PyTorch'
 
-  - script: python -m nebullvm.installers.auto_installer --compilers all
+  - script: |
+      export PATH=$PATH:/home/AzDevOps/.local/bin
+      python -m nebullvm.installers.auto_installer --compilers all
     displayName: 'Install deep learning compilers'
 
   - script: python -m pip install -r "requirements-dev.txt"

diff --git a/nebullvm/config.py b/nebullvm/config.py
@@ -71,6 +71,7 @@
     "openvino",
     "bladedisc",
     "intel_neural_compressor",
+    "faster_transformer",
 ]
 
 COMPRESSOR_LIST = [
@@ -85,13 +86,14 @@
     "intel_neural_compressor",
     "tensor_rt",
     "torch_tensor_rt",
+    "faster_transformer",
 ]
 
 TENSORFLOW_MODULES = []
 HUGGING_FACE_MODULES = []
 DIFFUSERS_MODULES = []
 
-LIBRARIES_GPU = ["tensor_rt", "torch_tensor_rt"]
+LIBRARIES_GPU = ["tensor_rt", "torch_tensor_rt", "faster_transformer"]
 
 MIN_NUMBER = 1e-4
 DEFAULT_METRIC_DROP_THS = 1e-3

diff --git a/nebullvm/installers/install_fastertransformer.sh b/nebullvm/installers/install_fastertransformer.sh
@@ -0,0 +1,37 @@
+#!/bin/bash
+
+# TODO: check requirements
+# https://github.com/NVIDIA/FasterTransformer/blob/main/docs/bert_guide.md
+# Requirements
+#CMake >= 3.8 for Tensorflow, CMake >= 3.13 for PyTorch
+#CUDA 11.0 or newer version
+#Python: Only verify on python 3
+#Tensorflow: Verify on 1.15, 1.13 and 1.14 should work.
+#PyTorch: Verify on 1.8.0, >= 1.5.0 should work.
+
+
+# Set non interactive mode for apt-get
+export DEBIAN_FRONTEND=noninteractive
+
+if [[ $OSTYPE == "darwin"* ]]
+then
+  echo "MacOS is not supported for FasterTransformer"
+  exit 1
+fi
+
+if [ ! -d "FasterTransformer" ]
+then
+  git clone --recursive https://github.com/NVIDIA/FasterTransformer FasterTransformer
+fi
+
+# TODO: checkout to latest release
+
+cd FasterTransformer &&
+mkdir -p build &&
+cd build &&
+cmake -DSM=$COMPUTE_CAPABILITY -DCMAKE_BUILD_TYPE=Release -DBUILD_PYT=ON  .. &&
+make -j8 &&
+touch ../../FasterTransformer_build_success  # create a file to indicate that the build was successful
+
+# TODO: enable multi gpu if possible
+#-DBUILD_MULTI_GPU=OFF
diff --git a/nebullvm/installers/installers.py b/nebullvm/installers/installers.py
@@ -9,21 +9,17 @@
 import cpuinfo
 from loguru import logger
 
-from nebullvm.config import (
-    LIBRARIES_GPU,
-)
+from nebullvm.config import LIBRARIES_GPU
 from nebullvm.operations.optimizations.compilers.utils import (
+    deepsparse_is_available,
+    get_faster_transformer_repo_path,
+    intel_neural_compressor_is_available,
     openvino_is_available,
     tensorrt_is_available,
     torch_tensorrt_is_available,
-    deepsparse_is_available,
-    intel_neural_compressor_is_available,
 )
 from nebullvm.optional_modules.torch import torch
-from nebullvm.tools.utils import (
-    gpu_is_available,
-    check_module_version,
-)
+from nebullvm.tools.utils import check_module_version, gpu_is_available
 
 
 def get_cpu_arch():
@@ -140,7 +136,14 @@ def install_torch_tensor_rt():
     ]
     subprocess.run(cmd)
     cuda_version = subprocess.check_output(["nvidia-smi"])
-    cuda_version = int(cuda_version.decode("utf-8").split("\n")[2].split("|")[-2].split(":")[-1].strip().split(".")[0])
+    cuda_version = int(
+        cuda_version.decode("utf-8")
+        .split("\n")[2]
+        .split("|")[-2]
+        .split(":")[-1]
+        .strip()
+        .split(".")[0]
+    )
     if cuda_version >= 12:
         cmd = [
             "pip3",
@@ -233,10 +236,10 @@ def install_openvino(with_optimization: bool = True):
 
     try:
         from openvino.runtime import (  # noqa F401
-            Core,
-            Model,
             CompiledModel,
+            Core,
             InferRequest,
+            Model,
         )
     except ImportError:
         return False
@@ -341,6 +344,47 @@ def install_onnx_simplifier():
     return True
 
 
+def install_faster_transformer(
+    working_dir: str = None,
+):
+    """Helper function for installing FasterTransformer.
+    https://github.com/NVIDIA/FasterTransformer
+
+    This function needs some prerequisites for running, as a valid `git`
+    installation and having MacOS or a Linux-distribution as OS.
+
+    Args:
+        working_dir (str, optional): The directory where the FasterTransformer
+        repo will be cloned and installed. Default: None
+    """
+    if not gpu_is_available():
+        return False
+    path = Path(__file__).parent
+    # install faster transformer
+    try:
+        import torch
+
+        CP = compute_capability = torch.cuda.get_device_capability()
+        assert len(compute_capability) == 2
+    except (ImportError, AssertionError):
+        return False
+    installation_file = str(path / "install_fastertransformer.sh")
+    env_dict = {
+        "COMPUTE_CAPABILITY": f"{CP[0]}{CP[1]}",
+        **dict(os.environ.copy()),
+    }
+
+    result = subprocess.run(
+        ["bash", installation_file],
+        cwd=get_faster_transformer_repo_path().parent,
+        env=env_dict,
+    )
+    # check result
+    if result.returncode != 0:
+        return False
+    return True
+
+
 class BaseInstaller(ABC):
     def __init__(self, module_list: List[str]):
         self.modules = module_list
@@ -581,6 +625,7 @@ def install_framework():
     "torch_tensor_rt": install_torch_tensor_rt,
     "deepsparse": install_deepsparse,
     "intel_neural_compressor": install_intel_neural_compressor,
+    #"faster_transformer": install_faster_transformer,
 }
 
 COMPILERS_AVAILABLE = {
@@ -589,4 +634,5 @@ def install_framework():
     "torch_tensor_rt": torch_tensorrt_is_available,
     "deepsparse": deepsparse_is_available,
     "intel_neural_compressor": intel_neural_compressor_is_available,
+    #"faster_transformer": faster_transformer_is_available,
 }
diff --git a/nebullvm/installers/tests/test_install_frameworks.py b/nebullvm/installers/tests/test_install_frameworks.py
@@ -174,6 +174,7 @@ def test_install_huggingface_torch_compilers_all():
 
     assert compiler_list == [
         "deepsparse",
+        "faster_transformer",
         "intel_neural_compressor",
         "tensor_rt",
         "torch_tensor_rt",
@@ -190,6 +191,7 @@ def test_install_torch_compilers_all():
 
     assert compiler_list == [
         "deepsparse",
+        "faster_transformer",
         "intel_neural_compressor",
         "tensor_rt",
         "torch_tensor_rt",
@@ -228,6 +230,7 @@ def test_install_torch_onnx_compilers_all():
 
     assert compiler_list == [
         "deepsparse",
+        "faster_transformer",
         "intel_neural_compressor",
         "openvino",
         "tensor_rt",

diff --git a/nebullvm/operations/inference_learners/builders.py b/nebullvm/operations/inference_learners/builders.py
@@ -1,10 +1,13 @@
 from pathlib import Path
-from typing import Union, Any
+from typing import Any, Union
 
 from nebullvm.operations.inference_learners.base import BuildInferenceLearner
 from nebullvm.operations.inference_learners.deepsparse import (
     PytorchDeepSparseInferenceLearner,
 )
+from nebullvm.operations.inference_learners.faster_transformer import (
+    FasterTransformerInferenceLearner,
+)
 from nebullvm.operations.inference_learners.neural_compressor import (
     PytorchNeuralCompressorInferenceLearner,
 )
@@ -16,16 +19,16 @@
     PytorchBackendInferenceLearner,
 )
 from nebullvm.operations.inference_learners.tensor_rt import (
-    PytorchTensorRTInferenceLearner,
     TENSOR_RT_INFERENCE_LEARNERS,
+    PytorchTensorRTInferenceLearner,
 )
 from nebullvm.operations.inference_learners.tensorflow import (
     TensorflowBackendInferenceLearner,
     TFLiteBackendInferenceLearner,
 )
 from nebullvm.operations.inference_learners.tvm import (
-    PytorchApacheTVMInferenceLearner,
     APACHE_TVM_INFERENCE_LEARNERS,
+    PytorchApacheTVMInferenceLearner,
 )
 from nebullvm.optional_modules.tensor_rt import tensorrt as trt
 from nebullvm.optional_modules.tensorflow import tensorflow as tf
@@ -290,3 +293,19 @@ def execute(
             target=target_device,
             device=self.device,
         )
+
+
+class FasterTransformerBuildInferenceLearner(BuildInferenceLearner):
+    def execute(
+        self,
+        model: ScriptModule,
+        model_params: ModelParams,
+        input_tfms: MultiStageTransformation,
+        **kwargs,
+    ):
+        self.inference_learner = FasterTransformerInferenceLearner(
+            torch_model=model,
+            network_parameters=model_params,
+            input_tfms=input_tfms,
+            device=self.device,
+        )
diff --git a/nebullvm/operations/inference_learners/faster_transformer.py b/nebullvm/operations/inference_learners/faster_transformer.py
@@ -0,0 +1,8 @@
+from nebullvm.operations.inference_learners.pytorch import (
+    PytorchBackendInferenceLearner,
+)
+
+
+class FasterTransformerInferenceLearner(PytorchBackendInferenceLearner):
+    MODEL_NAME = "faster_transformer_model_scripted.pt"
+    name = "FasterTransformer"