diff --git a/examples/sparseserver-ui/README.md b/examples/sparseserver-ui/README.md new file mode 100644 index 0000000000..65a5bc659f --- /dev/null +++ b/examples/sparseserver-ui/README.md @@ -0,0 +1,88 @@ + + [**NEURAL MAGIC**](https://neuralmagic.com)  + + ███████╗██████╗ █████╗ ██████╗ ███████╗███████╗ ███████╗███████╗██████╗ ██╗ ██╗███████╗██████╗ ██╗ ██╗ ██╗ + ██╔════╝██╔══██╗██╔══██╗██╔══██╗██╔════╝██╔════╝ ██╔════╝██╔════╝██╔══██╗██║ ██║██╔════╝██╔══██╗ ██║ ██║ ██║ + ███████╗██████╔╝███████║██████╔╝███████╗█████╗ ███████╗█████╗ ██████╔╝██║ ██║█████╗ ██████╔╝ ██║ ██║ ██║ + ╚════██║██╔═══╝ ██╔══██║██╔══██╗╚════██║██╔══╝ ╚════██║██╔══╝ ██╔══██╗╚██╗ ██╔╝██╔══╝ ██╔══██╗ ██║ ██║ ██║ + ███████║██║ ██║ ██║██║ ██║███████║███████╗ ███████║███████╗██║ ██║ ╚████╔╝ ███████╗██║ ██║ ██╗ ╚██████╔ ██║ + ╚══════╝╚═╝ ╚═╝ ╚═╝╚═╝ ╚═╝╚══════╝╚══════╝ ╚══════╝╚══════╝╚═╝ ╚═╝ ╚═══╝ ╚══════╝╚═╝ ╚═╝ ╚═╝ ╚═════╝ ╚═╝ + + + *** A Streamlit app for deploying the DeepSparse Server *** + + + +##
ANSWER: {answer["answer"]}
', unsafe_allow_html=True +) +st.markdown(f'{infer_time} secs.
', unsafe_allow_html=True) diff --git a/examples/sparseserver-ui/client/pipelineclient.py b/examples/sparseserver-ui/client/pipelineclient.py new file mode 100644 index 0000000000..dda88347ad --- /dev/null +++ b/examples/sparseserver-ui/client/pipelineclient.py @@ -0,0 +1,47 @@ +# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +from typing import List + +import numpy +import requests + + +class MultiPipelineClient: + """ + Client object for making requests to the example DeepSparse BERT inference server + + :param model: model alias of FastAPI route + :param address: IP address of the server, default is 0.0.0.0 + :param port: Port the server is hosted on, default is 5543 + """ + + def __init__(self, model: str, address: str = "0.0.0.0", port: str = "5543"): + + self.model = model + self._url = f"http://{address}:{port}/predict/{self.model}" + + def __call__(self, **kwargs) -> List[numpy.ndarray]: + + """ + :param kwargs: named inputs to the model server pipeline. e.g. for + question-answering - `question="...", context="..." + + :return: json outputs from running the model server pipeline with the given + input(s) + """ + + response = requests.post(self._url, json=kwargs) + return json.loads(response.content) diff --git a/examples/sparseserver-ui/client/samples.py b/examples/sparseserver-ui/client/samples.py new file mode 100644 index 0000000000..a3e09bed5d --- /dev/null +++ b/examples/sparseserver-ui/client/samples.py @@ -0,0 +1,55 @@ +# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +sample = { + "example 1": { + "context": ( + "The DeepSparse Engine is a CPU runtime that delivers " + "GPU-class performance by taking advantage of sparsity within neural " + "networks to reduce compute required as well as accelerate memory bound " + "workloads. It is focused on model deployment and scaling machine " + "learning pipelines, fitting seamlessly into your existing deployments " + "as an inference backend. " + ), + "question": ( + "What does the DeepSparse Engine take advantage of within neural networks?" + ), + }, + "example 2": { + "context": ( + "Concerns were raised over whether Levi's Stadium's field was of a high " + "enough quality to host a Super Bowl; during the inaugural season, the " + "field had to be re-sodded multiple times due to various issues, and " + "during a week 6 game earlier in the 2015 season, a portion of the turf " + "collapsed under Baltimore Ravens kicker Justin Tucker, causing him " + "to slip and miss a field goal. " + ), + "question": ("What collapsed on Justin Tucker?"), + }, + "example 3": { + "context": ( + "The league announced on October 16, 2012, that the two finalists were Sun " + "Life Stadium and Levi's Stadium. The South Florida/Miami area has " + "previously hosted the event 10 times (tied for most with New Orleans), " + "with the most recent one being Super Bowl XLIV in 2010. The San Francisco " + "Bay Area last hosted in 1985 (Super Bowl XIX), held at Stanford Stadium " + "in Stanford, California, won by the home team 49ers. The Miami bid " + "depended on whether the stadium underwent renovations. " + ), + "question": ( + "What was the most recent Super Bowl that took place at Sun " + "Life Stadium in Miami?" + ), + }, +} diff --git a/examples/sparseserver-ui/client/settings.py b/examples/sparseserver-ui/client/settings.py new file mode 100644 index 0000000000..3f5b03e33f --- /dev/null +++ b/examples/sparseserver-ui/client/settings.py @@ -0,0 +1,131 @@ +# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from pipelineclient import MultiPipelineClient + + +class FeatureHandler: + + """ + Class with front-end streamlit content features. + """ + + tasks_desc = "Select task:" + tasks = [ + "Question Answering", + ] + + variants_desc = "Select your sparse model:" + variants = { + "12-Layer BERT Base, Not Sparsified 😢": MultiPipelineClient( + model="question_answering/base" + ), + "12-Layer BERT, Quantized, 99% of Base Accuracy": MultiPipelineClient( + model="question_answering/12l_pruned80_quant" + ), + "6-Layer BERT, Quantized, 96% of Base Accuracy": MultiPipelineClient( + model="question_answering/quant6lagg96" + ), + "3-Layer BERT, Quantized, 89% of Base Accuracy": MultiPipelineClient( + model="question_answering/quant3lagg89" + ), + # "12-Layer BERT, Quantized, 95% of Base Accuracy": MultiPipelineClient( + # model="question_answering/pruned_quant" + # ), + # "12-Layer BERT, Quantized, 99% of Base Accuracy": MultiPipelineClient( + # model="question_answering/quantmod" + # ), + # "12-Layer BERT, 98% of Base Accuracy ": MultiPipelineClient( + # model="question_answering/agg98" + # ), + # "12-Layer BERT, 94% of Base Accuracy ": MultiPipelineClient( + # model="question_answering/agg94" + # ), + # "12-Layer BERT, 100% of Base Accuracy": MultiPipelineClient( + # model="question_answering/conserv" + # ), + # "6-Layer BERT, Quantized, 91% of Base Accuracy": MultiPipelineClient( + # model="question_answering/quant6lagg91" + # ), + # "6-Layer BERT, 98% of Base Accuracy": MultiPipelineClient( + # model="question_answering/6lagg98" + # ), + # "6-Layer BERT, 97% of Base Accuracy": MultiPipelineClient( + # model="question_answering/6lagg97" + # ), + # "6-Layer BERT, 96% of Base Accuracy": MultiPipelineClient( + # model="question_answering/6lagg96" + # ), + # "6-Layer BERT, 94% of Base Accuracy": MultiPipelineClient( + # model="question_answering/6lagg94" + # ), + # "3-Layer BERT, Quantized, 84% of Base Accuracy": MultiPipelineClient( + # model="question_answering/quant3lagg84" + # ), + # "3-Layer BERT, 90% of Base Accuracy": MultiPipelineClient( + # model="question_answering/3lagg90" + # ), + # "3-Layer BERT, 89% of Base Accuracy": MultiPipelineClient( + # model="question_answering/3lagg89" + # ), + # "3-Layer BERT, 86% of Base Accuracy": MultiPipelineClient( + # model="question_answering/3lagg86" + # ), + # "3-Layer BERT, 83% of Base Accuracy": MultiPipelineClient( + # model="question_answering/3lagg83" + # ), + # "12-Layer BERT, 90% of Base Accuracy": MultiPipelineClient( + # model="question_answering/12layer_pruned90" + # ), + } + + title = "