openvinotoolkit · dtrawins · Mar 4, 2024 · Mar 5, 2024 · Mar 5, 2024
diff --git a/demos/python_demos/llm_text_generation/README.md b/demos/python_demos/llm_text_generation/README.md
@@ -27,10 +27,12 @@ Building the image with all required python dependencies is required. Follow the
 ```bash
 git clone https://github.com/openvinotoolkit/model_server.git
 cd model_server
-make python_image
+make python_image OVMS_CPP_DOCKER_IMAGE=openvino/model_server OVMS_CPP_IMAGE_TAG=latest-gpu
 ```
 It will create an image called `openvino/model_server:py`
 
+> **NOTE** To use the model server compiled from the latest `main` branch, build the base image using the command `make release_image RUN_TESTS=0 GPU=1` and update the parameter `OVMS_CPP_DOCKER_IMAGE` to the generated image name.
+
 ## Download model
 
 Download the model using `download_model.py` script:

diff --git a/demos/python_demos/requirements.txt b/demos/python_demos/requirements.txt
@@ -1,6 +1,6 @@
 --extra-index-url https://download.pytorch.org/whl/cpu 
-git+https://github.com/dtrawins/optimum-intel.git@concurrency_support
-## used a fork to provide support for multi concurrency https://github.com/huggingface/optimum-intel/pull/519
+git+https://github.com/dtrawins/optimum-intel.git@concurrency_support_cloneall
+## used a fork to provide support for multi concurrency https://github.com/huggingface/optimum-intel/pull/564
 onnx
 pillow
 optimum[diffusers]

diff --git a/demos/python_demos/seq2seq_translation/README.md b/demos/python_demos/seq2seq_translation/README.md
@@ -2,13 +2,17 @@
 
 ## Build image
 
-From the root of the repository run:
+Building the image with all required python dependencies is required. Follow the commands:
 
 ```bash
 git clone https://github.com/openvinotoolkit/model_server.git
 cd model_server
-make python_image
+make python_image OVMS_CPP_DOCKER_IMAGE=openvino/model_server OVMS_CPP_IMAGE_TAG=latest-gpu
 ```
+It will create an image called `openvino/model_server:py`
+
+> **NOTE** To use the model server compiled from the latest `main` branch, build the base image using the command `make release_image RUN_TESTS=0 GPU=1` and update the parameter `OVMS_CPP_DOCKER_IMAGE` to the generated image name.
+
 
 ## Deploy OpenVINO Model Server with the Python calculator
 Prerequisites:

diff --git a/demos/python_demos/seq2seq_translation/servable/model.py b/demos/python_demos/seq2seq_translation/servable/model.py
@@ -17,7 +17,6 @@
 from pyovms import Tensor
 from optimum.intel import OVModelForSeq2SeqLM
 from transformers import AutoTokenizer, pipeline
-import time
 
 class OvmsPythonModel:
 
@@ -34,3 +33,4 @@ def execute(self, inputs: list):
         translation = results[0]["translation_text"]
         return [Tensor("translation", translation.encode())]
 
+
diff --git a/demos/python_demos/stable_diffusion/README.md b/demos/python_demos/stable_diffusion/README.md
@@ -10,14 +10,16 @@ Here we present two scenarios:
 
 ## Build image
 
-From the root of the repository run:
+Building the image with all required python dependencies is required. Follow the commands:
 
 ```bash
 git clone https://github.com/openvinotoolkit/model_server.git
 cd model_server
-make python_image
+make python_image OVMS_CPP_DOCKER_IMAGE=openvino/model_server OVMS_CPP_IMAGE_TAG=latest-gpu
 ```
-It will create an image called `openvino/model_server:py`.
+It will create an image called `openvino/model_server:py`
+
+> **NOTE** To use the model server compiled from the latest `main` branch, build the base image using the command `make release_image RUN_TESTS=0 GPU=1` and update the parameter `OVMS_CPP_DOCKER_IMAGE` to the generated image name.
 
 ## Download models
 

diff --git a/demos/python_demos/stable_diffusion/servable_stream/model.py b/demos/python_demos/stable_diffusion/servable_stream/model.py
@@ -35,6 +35,7 @@ def initialize(self, kwargs: dict):
 
     def execute(self, inputs: list):
         print("Running execute")
+        pipe_exec = self.pipe.clone()
         text = bytes(inputs[0]).decode()
 
         q = Queue()
@@ -46,15 +47,15 @@ def callback_on_step_end_impl(step, timestep,
                 image = np.concatenate(
                     [self.pipe.vae_decoder(latent_sample=latents[i : i + 1])[0] for i in range(latents.shape[0])]
                 )
-                pil_images = self.pipe.image_processor.postprocess(image, output_type='pil', do_denormalize=[True])
+                pil_images = pipe_exec.image_processor.postprocess(image, output_type='pil', do_denormalize=[True])
                 pil_image = pil_images[0]
                 output = io.BytesIO()
                 pil_image.save(output, format='PNG')
                 q.put((output.getvalue(),False))
                 print('end callback')
 
             print('generating for prompt:', text)
-            image = self.pipe(
+            image = pipe_exec(
                 text,
                 num_inference_steps=50,
                 callback=callback_on_step_end_impl,

diff --git a/demos/python_demos/stable_diffusion/servable_unary/model.py b/demos/python_demos/stable_diffusion/servable_unary/model.py
@@ -36,7 +36,8 @@ def initialize(self, kwargs: dict):
     def execute(self, inputs: list):
         print("Running execute")
         text = bytes(inputs[0]).decode()
-        image = self.pipe(text).images[0]
+        pipe_exec = self.pipe.clone()
+        image = pipe_exec(text).images[0]
         output = io.BytesIO()
         image.save(output, format='PNG')
         return [Tensor("image", output.getvalue())]