# Hardware used AWS c6i.8xlarge 32vCPUs and 64 GB Memory - (in RHOAI)

# NLP Text Classification

#### Inference pipeline

In [2]:
# !pip install transformers

In [1]:
from deepsparse import Pipeline

classification_pipeline = Pipeline.create(
    task="text-classification",
    model_path="zoo:nlp/text_classification/distilbert-none/pytorch/huggingface/mnli/pruned80_quant-none-vnni",
)
inference = classification_pipeline(
    [[
        "Fun for adults and children.",
        "Fun for only children.",
    ]]
)
print(inference)

Downloading (…)ed/deployment.tar.gz:   0%|          | 0.00/26.6M [00:00<?, ?B/s]

DeepSparse, Copyright 2021-present / Neuralmagic, Inc. version: 1.6.1 COMMUNITY | (eff4f95d) (release) (optimized) (system=avx512_vnni, binary=avx512)


labels=['contradiction'] scores=[0.9983579516410828]


#### Inference pipeline with dense model

In [2]:
from deepsparse import Pipeline

classification_pipeline = Pipeline.create(
    task="text-classification",
    model_path="zoo:nlp/text_classification/distilbert-none/pytorch/huggingface/mnli/base-none",
)
inference = classification_pipeline(
    [[
        "Fun for adults and children.",
        "Fun for only children.",
    ]]
)
print(inference)

Downloading (…)se/deployment.tar.gz:   0%|          | 0.00/236M [00:00<?, ?B/s]

labels=['contradiction'] scores=[0.979464054107666]


#### Inference pipeline with local model

In [3]:
# TODO as it will be need experiments of CLI based transfer learning

#### Inference pipeline with zero shot learning

In [4]:
from deepsparse import Pipeline

zero_shot_pipeline = Pipeline.create(
    task="zero_shot_text_classification",
    model_path="zoo:nlp/text_classification/distilbert-none/pytorch/huggingface/mnli/pruned80_quant-none-vnni",
    model_scheme="mnli",
    model_config={"hypothesis_template": "This text is related to {}"},
)
inference = zero_shot_pipeline(
    sequences='Who are you voting for in 2020?',
    labels=['politics', 'public health', 'Europe'],
)
print(inference)

sequences='Who are you voting for in 2020?' labels=['politics', 'Europe', 'public health'] scores=[0.9345628619194031, 0.039115309715270996, 0.026321841403841972]


#### Benchmarking

In [5]:
!deepsparse.benchmark zoo:nlp/text_classification/distilbert-none/pytorch/huggingface/mnli/base-none ## Dense model

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


2024-01-24 00:12:20 deepsparse.benchmark.helpers INFO     Thread pinning to cores enabled
DeepSparse, Copyright 2021-present / Neuralmagic, Inc. version: 1.6.1 COMMUNITY | (eff4f95d) (release) (optimized) (system=avx512_vnni, binary=avx512)
2024-01-24 00:12:23 deepsparse.benchmark.benchmark_model INFO     deepsparse.engine.Engine:
	onnx_file_path: /opt/app-root/src/.cache/sparsezoo/neuralmagic/distilbert-mnli_wikipedia_bookcorpus-base/deployment/model.onnx
	batch_size: 1
	num_cores: 16
	num_streams: 1
	scheduler: Scheduler.default
	fraction_of_supported_ops: 0.982
	cpu_avx_type: avx512
	cpu_vnni: True
2024-01-24 00:12:23 deepsparse.utils.onnx INFO     Generating input 'input_ids', type = int64, shape = [1, 128]
2024-01-24 00:12:23 deepsparse.utils.onnx INFO     Generating input 'attention_mask', type = int64, shape = [1, 128]
2024-01-24 00:12:23 deepsparse.benchmark.benchmark_model INFO     Starting 'singlestream' performance measurements for 10 seconds
Original Model Path: zoo:nlp/tex

In [6]:
!deepsparse.benchmark zoo:nlp/text_classification/distilbert-none/pytorch/huggingface/mnli/pruned80_quant-none-vnni ## Sparse model

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


2024-01-24 00:12:44 deepsparse.benchmark.helpers INFO     Thread pinning to cores enabled
DeepSparse, Copyright 2021-present / Neuralmagic, Inc. version: 1.6.1 COMMUNITY | (eff4f95d) (release) (optimized) (system=avx512_vnni, binary=avx512)
2024-01-24 00:12:48 deepsparse.benchmark.benchmark_model INFO     deepsparse.engine.Engine:
	onnx_file_path: /opt/app-root/src/.cache/sparsezoo/neuralmagic/distilbert-mnli_wikipedia_bookcorpus-pruned80.4block_quantized/deployment/model.onnx
	batch_size: 1
	num_cores: 16
	num_streams: 1
	scheduler: Scheduler.default
	fraction_of_supported_ops: 0.9882
	cpu_avx_type: avx512
	cpu_vnni: True
2024-01-24 00:12:48 deepsparse.utils.onnx INFO     Generating input 'input_ids', type = int64, shape = [1, 128]
2024-01-24 00:12:48 deepsparse.utils.onnx INFO     Generating input 'attention_mask', type = int64, shape = [1, 128]
2024-01-24 00:12:48 deepsparse.benchmark.benchmark_model INFO     Starting 'singlestream' performance measurements for 10 seconds
Original M