diff --git a/examples/sparseserver-ui/README.md b/examples/sparseserver-ui/README.md new file mode 100644 index 0000000000..65a5bc659f --- /dev/null +++ b/examples/sparseserver-ui/README.md @@ -0,0 +1,88 @@ + +![#00F](https://via.placeholder.com/15/00F/000000?text=+)![#00F](https://via.placeholder.com/15/00F/000000?text=+)![#00F](https://via.placeholder.com/15/00F/000000?text=+)![#00F](https://via.placeholder.com/15/00F/000000?text=+)![#00F](https://via.placeholder.com/15/00F/000000?text=+)![#00F](https://via.placeholder.com/15/00F/000000?text=+)![#00F](https://via.placeholder.com/15/00F/000000?text=+)![#00F](https://via.placeholder.com/15/00F/000000?text=+)![#00F](https://via.placeholder.com/15/00F/000000?text=+)![#00F](https://via.placeholder.com/15/00F/000000?text=+)![#00F](https://via.placeholder.com/15/00F/000000?text=+)![#00F](https://via.placeholder.com/15/00F/000000?text=+)![#00F](https://via.placeholder.com/15/00F/000000?text=+)![#00F](https://via.placeholder.com/15/00F/000000?text=+)![#00F](https://via.placeholder.com/15/00F/000000?text=+)![#00F](https://via.placeholder.com/15/00F/000000?text=+)![#00F](https://via.placeholder.com/15/00F/000000?text=+)![#00F](https://via.placeholder.com/15/00F/000000?text=+)![#00F](https://via.placeholder.com/15/00F/000000?text=+)![#00F](https://via.placeholder.com/15/00F/000000?text=+)![#00F](https://via.placeholder.com/15/00F/000000?text=+)![#00F](https://via.placeholder.com/15/00F/000000?text=+)![#00F](https://via.placeholder.com/15/00F/000000?text=+) [**NEURAL MAGIC**](https://neuralmagic.com) ![#00F](https://via.placeholder.com/15/00F/000000?text=+)![#00F](https://via.placeholder.com/15/00F/000000?text=+)![#00F](https://via.placeholder.com/15/00F/000000?text=+)![#00F](https://via.placeholder.com/15/00F/000000?text=+)![#00F](https://via.placeholder.com/15/00F/000000?text=+)![#00F](https://via.placeholder.com/15/00F/000000?text=+)![#00F](https://via.placeholder.com/15/00F/000000?text=+)![#00F](https://via.placeholder.com/15/00F/000000?text=+)![#00F](https://via.placeholder.com/15/00F/000000?text=+)![#00F](https://via.placeholder.com/15/00F/000000?text=+)![#00F](https://via.placeholder.com/15/00F/000000?text=+)![#00F](https://via.placeholder.com/15/00F/000000?text=+)![#00F](https://via.placeholder.com/15/00F/000000?text=+)![#00F](https://via.placeholder.com/15/00F/000000?text=+)![#00F](https://via.placeholder.com/15/00F/000000?text=+)![#00F](https://via.placeholder.com/15/00F/000000?text=+)![#00F](https://via.placeholder.com/15/00F/000000?text=+)![#00F](https://via.placeholder.com/15/00F/000000?text=+)![#00F](https://via.placeholder.com/15/00F/000000?text=+)![#00F](https://via.placeholder.com/15/00F/000000?text=+)![#00F](https://via.placeholder.com/15/00F/000000?text=+)![#00F](https://via.placeholder.com/15/00F/000000?text=+)![#00F](https://via.placeholder.com/15/00F/000000?text=+) + + ███████╗██████╗ █████╗ ██████╗ ███████╗███████╗ ███████╗███████╗██████╗ ██╗ ██╗███████╗██████╗ ██╗ ██╗ ██╗ + ██╔════╝██╔══██╗██╔══██╗██╔══██╗██╔════╝██╔════╝ ██╔════╝██╔════╝██╔══██╗██║ ██║██╔════╝██╔══██╗ ██║ ██║ ██║ + ███████╗██████╔╝███████║██████╔╝███████╗█████╗ ███████╗█████╗ ██████╔╝██║ ██║█████╗ ██████╔╝ ██║ ██║ ██║ + ╚════██║██╔═══╝ ██╔══██║██╔══██╗╚════██║██╔══╝ ╚════██║██╔══╝ ██╔══██╗╚██╗ ██╔╝██╔══╝ ██╔══██╗ ██║ ██║ ██║ + ███████║██║ ██║ ██║██║ ██║███████║███████╗ ███████║███████╗██║ ██║ ╚████╔╝ ███████╗██║ ██║ ██╗ ╚██████╔ ██║ + ╚══════╝╚═╝ ╚═╝ ╚═╝╚═╝ ╚═╝╚══════╝╚══════╝ ╚══════╝╚══════╝╚═╝ ╚═╝ ╚═══╝ ╚══════╝╚═╝ ╚═╝ ╚═╝ ╚═════╝ ╚═╝ + + + *** A Streamlit app for deploying the DeepSparse Server *** +![#00F](https://via.placeholder.com/15/00F/000000?text=+)![#00F](https://via.placeholder.com/15/00F/000000?text=+)![#00F](https://via.placeholder.com/15/00F/000000?text=+)![#00F](https://via.placeholder.com/15/00F/000000?text=+)![#00F](https://via.placeholder.com/15/00F/000000?text=+)![#00F](https://via.placeholder.com/15/00F/000000?text=+)![#00F](https://via.placeholder.com/15/00F/000000?text=+)![#00F](https://via.placeholder.com/15/00F/000000?text=+)![#00F](https://via.placeholder.com/15/00F/000000?text=+)![#00F](https://via.placeholder.com/15/00F/000000?text=+)![#00F](https://via.placeholder.com/15/00F/000000?text=+)![#00F](https://via.placeholder.com/15/00F/000000?text=+)![#00F](https://via.placeholder.com/15/00F/000000?text=+)![#00F](https://via.placeholder.com/15/00F/000000?text=+)![#00F](https://via.placeholder.com/15/00F/000000?text=+)![#00F](https://via.placeholder.com/15/00F/000000?text=+)![#00F](https://via.placeholder.com/15/00F/000000?text=+)![#00F](https://via.placeholder.com/15/00F/000000?text=+)![#00F](https://via.placeholder.com/15/00F/000000?text=+)![#00F](https://via.placeholder.com/15/00F/000000?text=+)![#00F](https://via.placeholder.com/15/00F/000000?text=+)![#00F](https://via.placeholder.com/15/00F/000000?text=+)![#00F](https://via.placeholder.com/15/00F/000000?text=+)![#00F](https://via.placeholder.com/15/00F/000000?text=+)![#00F](https://via.placeholder.com/15/00F/000000?text=+)![#00F](https://via.placeholder.com/15/00F/000000?text=+)![#00F](https://via.placeholder.com/15/00F/000000?text=+)![#00F](https://via.placeholder.com/15/00F/000000?text=+)![#00F](https://via.placeholder.com/15/00F/000000?text=+)![#00F](https://via.placeholder.com/15/00F/000000?text=+)![#00F](https://via.placeholder.com/15/00F/000000?text=+)![#00F](https://via.placeholder.com/15/00F/000000?text=+)![#00F](https://via.placeholder.com/15/00F/000000?text=+)![#00F](https://via.placeholder.com/15/00F/000000?text=+)![#00F](https://via.placeholder.com/15/00F/000000?text=+)![#00F](https://via.placeholder.com/15/00F/000000?text=+)![#00F](https://via.placeholder.com/15/00F/000000?text=+)![#00F](https://via.placeholder.com/15/00F/000000?text=+)![#00F](https://via.placeholder.com/15/00F/000000?text=+)![#00F](https://via.placeholder.com/15/00F/000000?text=+)![#00F](https://via.placeholder.com/15/00F/000000?text=+)![#00F](https://via.placeholder.com/15/00F/000000?text=+)![#00F](https://via.placeholder.com/15/00F/000000?text=+)![#00F](https://via.placeholder.com/15/00F/000000?text=+)![#00F](https://via.placeholder.com/15/00F/000000?text=+)![#00F](https://via.placeholder.com/15/00F/000000?text=+)![#00F](https://via.placeholder.com/15/00F/000000?text=+)![#00F](https://via.placeholder.com/15/00F/000000?text=+)![#00F](https://via.placeholder.com/15/00F/000000?text=+)![#00F](https://via.placeholder.com/15/00F/000000?text=+)![#00F](https://via.placeholder.com/15/00F/000000?text=+)![#00F](https://via.placeholder.com/15/00F/000000?text=+) + + +##
`INTRO`
+ + + +
+SparseServer.UI allows you to serve a streamlit app running on top of the DeepSparse Server for comparing the latency speeds of sparse transformer models. The purpose of this app is for you to familiarize and compare the inference performance of transformers trained with various sparse approaches. +
+ +
+ +[Getting Started with the DeepSparse Server](https://github.com/neuralmagic/deepsparse/tree/main/src/deepsparse/server) + +
+ +![alt text](./img/demo_screenshot.png) + +
+ +##
`INSTALLATION`
+ +```bash +git clone https://github.com/neuralmagic/deepsparse.git +cd deepsparse/examples/sparseserver-ui +pip install -r requirements.txt +``` +
+ +The `config.yaml` file in the `server` directory includes a list of four BERT QA models for the DeepSparse Server to get started. If you prefer to add additional models to the `config.yaml` file, make sure to also add a `MultiPipelineClient` object to the `variants` attribute in the `settings.py` module. + +Currently, the SparseZoo contains 20 BERT models, and the `big-config.yaml` file contains the full list in case you want to load them all 🤯. To load all of the 20 models at once, make sure you have at least 16GB of RAM available, otherwise you will get out of memory errors. In addition, uncomment the pipelines in the `settings.py` module. + +For more details on question answering models, please refer to our [updated list](https://sparsezoo.neuralmagic.com/?domain=nlp&sub_domain=question_answering&page=1). + +##
`START SERVER`
+ +To download and initialize the four models in the `config.yaml` file, run: +```bash +deepsparse.server --config_file server/config.yaml +``` + +After downloading, the DeepSparse Server should now be running on host `0.0.0.0` and port `5543`. + +##
`START CLIENT`
+ +Open a new terminal (make sure you are in your environment) and run the following command to start the Streamlit app: + +```bash +streamlit run client/app.py --browser.serverAddress="localhost" +``` + +This will start the Streamlit app on `localhost` and port `8501`. +Visit `http://localhost:8501` in your browser to view the demo. + +### Testing + +- 20 models should fit on 16GB RAM of a c2-standard-4 VM instance on GCP +- Ubuntu 20.04.4 LTS +- Python 3.8.10 +
diff --git a/examples/sparseserver-ui/client/app.py b/examples/sparseserver-ui/client/app.py new file mode 100644 index 0000000000..8e8a7b323d --- /dev/null +++ b/examples/sparseserver-ui/client/app.py @@ -0,0 +1,54 @@ +# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from time import perf_counter + +import streamlit as st +from samples import sample +from settings import FeatureHandler as feat + + +# Titles +st.markdown(feat.title, unsafe_allow_html=True) +st.markdown(feat.subtitle, unsafe_allow_html=True) + +# Sidebar +st.sidebar.selectbox(feat.tasks_desc, feat.tasks) +model_choice = st.sidebar.radio(feat.variants_desc, feat.variants.keys()) +st.sidebar.markdown(feat.code_banner) +st.sidebar.code(body=feat.code_text, language=feat.language) +st.sidebar.markdown(feat.repo_test) + +# Footer +st.markdown(feat.footer, unsafe_allow_html=True) + +# Inference +model = feat.variants[model_choice] +selection = st.selectbox(feat.example_index_label, feat.example_index) +context = st.text_area( + label=feat.example_context_label, value=sample[selection]["context"], height=300 +) +question = st.text_area( + label=feat.example_question_label, value=sample[selection]["question"] +) +start = perf_counter() +answer = model(question=question, context=context) +end = perf_counter() +infer_time = end - start +infer_time = round(infer_time, 4) +st.markdown(feat.markdown_style, unsafe_allow_html=True) +st.markdown( + f'

ANSWER: {answer["answer"]}

', unsafe_allow_html=True +) +st.markdown(f'

{infer_time} secs.

', unsafe_allow_html=True) diff --git a/examples/sparseserver-ui/client/pipelineclient.py b/examples/sparseserver-ui/client/pipelineclient.py new file mode 100644 index 0000000000..dda88347ad --- /dev/null +++ b/examples/sparseserver-ui/client/pipelineclient.py @@ -0,0 +1,47 @@ +# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +from typing import List + +import numpy +import requests + + +class MultiPipelineClient: + """ + Client object for making requests to the example DeepSparse BERT inference server + + :param model: model alias of FastAPI route + :param address: IP address of the server, default is 0.0.0.0 + :param port: Port the server is hosted on, default is 5543 + """ + + def __init__(self, model: str, address: str = "0.0.0.0", port: str = "5543"): + + self.model = model + self._url = f"http://{address}:{port}/predict/{self.model}" + + def __call__(self, **kwargs) -> List[numpy.ndarray]: + + """ + :param kwargs: named inputs to the model server pipeline. e.g. for + question-answering - `question="...", context="..." + + :return: json outputs from running the model server pipeline with the given + input(s) + """ + + response = requests.post(self._url, json=kwargs) + return json.loads(response.content) diff --git a/examples/sparseserver-ui/client/samples.py b/examples/sparseserver-ui/client/samples.py new file mode 100644 index 0000000000..a3e09bed5d --- /dev/null +++ b/examples/sparseserver-ui/client/samples.py @@ -0,0 +1,55 @@ +# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +sample = { + "example 1": { + "context": ( + "The DeepSparse Engine is a CPU runtime that delivers " + "GPU-class performance by taking advantage of sparsity within neural " + "networks to reduce compute required as well as accelerate memory bound " + "workloads. It is focused on model deployment and scaling machine " + "learning pipelines, fitting seamlessly into your existing deployments " + "as an inference backend. " + ), + "question": ( + "What does the DeepSparse Engine take advantage of within neural networks?" + ), + }, + "example 2": { + "context": ( + "Concerns were raised over whether Levi's Stadium's field was of a high " + "enough quality to host a Super Bowl; during the inaugural season, the " + "field had to be re-sodded multiple times due to various issues, and " + "during a week 6 game earlier in the 2015 season, a portion of the turf " + "collapsed under Baltimore Ravens kicker Justin Tucker, causing him " + "to slip and miss a field goal. " + ), + "question": ("What collapsed on Justin Tucker?"), + }, + "example 3": { + "context": ( + "The league announced on October 16, 2012, that the two finalists were Sun " + "Life Stadium and Levi's Stadium. The South Florida/Miami area has " + "previously hosted the event 10 times (tied for most with New Orleans), " + "with the most recent one being Super Bowl XLIV in 2010. The San Francisco " + "Bay Area last hosted in 1985 (Super Bowl XIX), held at Stanford Stadium " + "in Stanford, California, won by the home team 49ers. The Miami bid " + "depended on whether the stadium underwent renovations. " + ), + "question": ( + "What was the most recent Super Bowl that took place at Sun " + "Life Stadium in Miami?" + ), + }, +} diff --git a/examples/sparseserver-ui/client/settings.py b/examples/sparseserver-ui/client/settings.py new file mode 100644 index 0000000000..3f5b03e33f --- /dev/null +++ b/examples/sparseserver-ui/client/settings.py @@ -0,0 +1,131 @@ +# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from pipelineclient import MultiPipelineClient + + +class FeatureHandler: + + """ + Class with front-end streamlit content features. + """ + + tasks_desc = "Select task:" + tasks = [ + "Question Answering", + ] + + variants_desc = "Select your sparse model:" + variants = { + "12-Layer BERT Base, Not Sparsified 😢": MultiPipelineClient( + model="question_answering/base" + ), + "12-Layer BERT, Quantized, 99% of Base Accuracy": MultiPipelineClient( + model="question_answering/12l_pruned80_quant" + ), + "6-Layer BERT, Quantized, 96% of Base Accuracy": MultiPipelineClient( + model="question_answering/quant6lagg96" + ), + "3-Layer BERT, Quantized, 89% of Base Accuracy": MultiPipelineClient( + model="question_answering/quant3lagg89" + ), + # "12-Layer BERT, Quantized, 95% of Base Accuracy": MultiPipelineClient( + # model="question_answering/pruned_quant" + # ), + # "12-Layer BERT, Quantized, 99% of Base Accuracy": MultiPipelineClient( + # model="question_answering/quantmod" + # ), + # "12-Layer BERT, 98% of Base Accuracy ": MultiPipelineClient( + # model="question_answering/agg98" + # ), + # "12-Layer BERT, 94% of Base Accuracy ": MultiPipelineClient( + # model="question_answering/agg94" + # ), + # "12-Layer BERT, 100% of Base Accuracy": MultiPipelineClient( + # model="question_answering/conserv" + # ), + # "6-Layer BERT, Quantized, 91% of Base Accuracy": MultiPipelineClient( + # model="question_answering/quant6lagg91" + # ), + # "6-Layer BERT, 98% of Base Accuracy": MultiPipelineClient( + # model="question_answering/6lagg98" + # ), + # "6-Layer BERT, 97% of Base Accuracy": MultiPipelineClient( + # model="question_answering/6lagg97" + # ), + # "6-Layer BERT, 96% of Base Accuracy": MultiPipelineClient( + # model="question_answering/6lagg96" + # ), + # "6-Layer BERT, 94% of Base Accuracy": MultiPipelineClient( + # model="question_answering/6lagg94" + # ), + # "3-Layer BERT, Quantized, 84% of Base Accuracy": MultiPipelineClient( + # model="question_answering/quant3lagg84" + # ), + # "3-Layer BERT, 90% of Base Accuracy": MultiPipelineClient( + # model="question_answering/3lagg90" + # ), + # "3-Layer BERT, 89% of Base Accuracy": MultiPipelineClient( + # model="question_answering/3lagg89" + # ), + # "3-Layer BERT, 86% of Base Accuracy": MultiPipelineClient( + # model="question_answering/3lagg86" + # ), + # "3-Layer BERT, 83% of Base Accuracy": MultiPipelineClient( + # model="question_answering/3lagg83" + # ), + # "12-Layer BERT, 90% of Base Accuracy": MultiPipelineClient( + # model="question_answering/12layer_pruned90" + # ), + } + + title = "

✨ Neural Magic ✨

" + subtitle = "

DeepSparse Server

" + + code_banner = "Get started with faster inference 👇" + code_text = "pip install deepsparse[server]" + language = "python" + repo_test = ( + "For code: [DeepSparse repo](https://github.com/neuralmagic/deepsparse)." + ) + + example_context_label = "Enter Context" + example_question_label = "Enter Question" + example_index_label = "Choose an example" + example_index = ["example 1", "example 2", "example 3"] + markdown_style = """ + + """ + footer = """ + + + """ diff --git a/examples/sparseserver-ui/img/demo_screenshot.png b/examples/sparseserver-ui/img/demo_screenshot.png new file mode 100644 index 0000000000..6ee72f6161 Binary files /dev/null and b/examples/sparseserver-ui/img/demo_screenshot.png differ diff --git a/examples/sparseserver-ui/requirements.txt b/examples/sparseserver-ui/requirements.txt new file mode 100644 index 0000000000..3b75e7e6a2 --- /dev/null +++ b/examples/sparseserver-ui/requirements.txt @@ -0,0 +1,2 @@ +deepsparse[server]>=0.11 +streamlit==1.8.1 \ No newline at end of file diff --git a/examples/sparseserver-ui/server/big-config.yaml b/examples/sparseserver-ui/server/big-config.yaml new file mode 100644 index 0000000000..e7179842cf --- /dev/null +++ b/examples/sparseserver-ui/server/big-config.yaml @@ -0,0 +1,95 @@ +# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +models: + - task: question_answering + model_path: zoo:nlp/question_answering/bert-base/pytorch/huggingface/squad/pruned_quant-aggressive_95 + batch_size: 1 + alias: question_answering/pruned_quant + - task: question_answering + model_path: zoo:nlp/question_answering/bert-base/pytorch/huggingface/squad/pruned_quant-moderate + batch_size: 1 + alias: question_answering/quantmod + - task: question_answering + model_path: zoo:nlp/question_answering/bert-base/pytorch/huggingface/squad/pruned-aggressive_98 + batch_size: 1 + alias: question_answering/agg98 + - task: question_answering + model_path: zoo:nlp/question_answering/bert-base/pytorch/huggingface/squad/pruned-aggressive_94 + batch_size: 1 + alias: question_answering/agg94 + - task: question_answering + model_path: zoo:nlp/question_answering/bert-base/pytorch/huggingface/squad/pruned-conservative + batch_size: 1 + alias: question_answering/conserv + - task: question_answering + model_path: zoo:nlp/question_answering/bert-base/pytorch/huggingface/squad/pruned_quant_6layers-aggressive_96 + batch_size: 1 + alias: question_answering/quant6lagg96 + - task: question_answering + model_path: zoo:nlp/question_answering/bert-base/pytorch/huggingface/squad/pruned_quant_6layers-aggressive_91 + batch_size: 1 + alias: question_answering/quant6lagg91 + - task: question_answering + model_path: zoo:nlp/question_answering/bert-base/pytorch/huggingface/squad/pruned_6layers-aggressive_98 + batch_size: 1 + alias: question_answering/6lagg98 + - task: question_answering + model_path: zoo:nlp/question_answering/bert-base/pytorch/huggingface/squad/pruned_6layers-aggressive_97 + batch_size: 1 + alias: question_answering/6lagg97 + - task: question_answering + model_path: zoo:nlp/question_answering/bert-base/pytorch/huggingface/squad/pruned_6layers-aggressive_96 + batch_size: 1 + alias: question_answering/6lagg96 + - task: question_answering + model_path: zoo:nlp/question_answering/bert-base/pytorch/huggingface/squad/pruned_6layers-aggressive_94 + batch_size: 1 + alias: question_answering/6lagg94 + - task: question_answering + model_path: zoo:nlp/question_answering/bert-base/pytorch/huggingface/squad/pruned_quant_3layers-aggressive_89 + batch_size: 1 + alias: question_answering/quant3lagg89 + - task: question_answering + model_path: zoo:nlp/question_answering/bert-base/pytorch/huggingface/squad/pruned_quant_3layers-aggressive_84 + batch_size: 1 + alias: question_answering/quant3lagg84 + - task: question_answering + model_path: zoo:nlp/question_answering/bert-base/pytorch/huggingface/squad/pruned_3layers-aggressive_90 + batch_size: 1 + alias: question_answering/3lagg90 + - task: question_answering + model_path: zoo:nlp/question_answering/bert-base/pytorch/huggingface/squad/pruned_3layers-aggressive_89 + batch_size: 1 + alias: question_answering/3lagg89 + - task: question_answering + model_path: zoo:nlp/question_answering/bert-base/pytorch/huggingface/squad/pruned_3layers-aggressive_86 + batch_size: 1 + alias: question_answering/3lagg86 + - task: question_answering + model_path: zoo:nlp/question_answering/bert-base/pytorch/huggingface/squad/pruned_3layers-aggressive_83 + batch_size: 1 + alias: question_answering/3lagg83 + - task: question_answering + model_path: zoo:nlp/question_answering/bert-base/pytorch/huggingface/squad/12layer_pruned80_quant-none-vnni + batch_size: 1 + alias: question_answering/12l_pruned80_quant + - task: question_answering + model_path: zoo:nlp/question_answering/bert-base/pytorch/huggingface/squad/12layer_pruned90-none + batch_size: 1 + alias: question_answering/12layer_pruned90 + - task: question_answering + model_path: zoo:nlp/question_answering/bert-base/pytorch/huggingface/squad/base-none + batch_size: 1 + alias: question_answering/base diff --git a/examples/sparseserver-ui/server/config.yaml b/examples/sparseserver-ui/server/config.yaml new file mode 100644 index 0000000000..fe63b81d18 --- /dev/null +++ b/examples/sparseserver-ui/server/config.yaml @@ -0,0 +1,32 @@ +# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +models: + - task: question_answering + model_path: zoo:nlp/question_answering/bert-base/pytorch/huggingface/squad/12layer_pruned80_quant-none-vnni + batch_size: 1 + alias: question_answering/12l_pruned80_quant + - task: question_answering + model_path: zoo:nlp/question_answering/bert-base/pytorch/huggingface/squad/pruned_quant_6layers-aggressive_96 + batch_size: 1 + alias: question_answering/quant6lagg96 + - task: question_answering + model_path: zoo:nlp/question_answering/bert-base/pytorch/huggingface/squad/pruned_quant_3layers-aggressive_89 + batch_size: 1 + alias: question_answering/quant3lagg89 + - task: question_answering + model_path: zoo:nlp/question_answering/bert-base/pytorch/huggingface/squad/base-none + batch_size: 1 + alias: question_answering/base + \ No newline at end of file