In [1]:
import sys,os,os.path
os.environ['http_proxy']="http://proxy-igk.intel.com:911"
os.environ['https_proxy']="http://proxy-igk.intel.com:911"

In [2]:
!pip install -q -r requirements.txt

In [3]:
import os

from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain.prompts import PromptTemplate

# Document Splitter
from typing import List
from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter, MarkdownTextSplitter
from langchain_community.document_loaders import (
    CSVLoader,
    EverNoteLoader,
    PDFMinerLoader,
    TextLoader,
    UnstructuredEPubLoader,
    UnstructuredHTMLLoader,
    UnstructuredMarkdownLoader,
    UnstructuredODTLoader,
    UnstructuredPowerPointLoader,
    UnstructuredWordDocumentLoader, )

from langchain_community.vectorstores import Chroma
from langchain.docstore.document import Document

In [4]:
TARGET_FOLDER = "./ovms/html_files/"

TEXT_SPLITERS = {
    "Character": CharacterTextSplitter,
    "RecursiveCharacter": RecursiveCharacterTextSplitter,
    "Markdown": MarkdownTextSplitter,
}

LOADERS = {
    ".csv": (CSVLoader, {}),
    ".doc": (UnstructuredWordDocumentLoader, {}),
    ".docx": (UnstructuredWordDocumentLoader, {}),
    ".enex": (EverNoteLoader, {}),
    ".epub": (UnstructuredEPubLoader, {}),
    ".html": (UnstructuredHTMLLoader, {}),
    ".md": (UnstructuredMarkdownLoader, {}),
    ".odt": (UnstructuredODTLoader, {}),
    ".pdf": (PDFMinerLoader, {}),
    ".ppt": (UnstructuredPowerPointLoader, {}),
    ".pptx": (UnstructuredPowerPointLoader, {}),
    ".txt": (TextLoader, {"encoding": "utf8"}),
}

In [5]:
def load_single_document(file_path: str) -> List[Document]:
    """
    helper for loading a single document

    Params:
      file_path: document path
    Returns:
      documents loaded

    """
    ext = "." + file_path.rsplit(".", 1)[-1]
    if ext in LOADERS:
        loader_class, loader_args = LOADERS[ext]
        loader = loader_class(file_path, **loader_args)
        return loader.load()

    raise ValueError(f"File does not exist '{ext}'")

In [6]:
embeddings=HuggingFaceEmbeddings(
            model_name="sentence-transformers/all-mpnet-base-v2",
            model_kwargs={"device":"cpu"},
            show_progress=True
            )


  from .autonotebook import tqdm as notebook_tqdm


In [7]:
documents = []
for file_path in os.listdir(TARGET_FOLDER):
    if not file_path.endswith('.html'):
        continue
    abs_path = os.path.join(TARGET_FOLDER, file_path)
    print(f"Reading document {abs_path}...", flush=True)
    documents.extend(load_single_document(abs_path))

Reading document ./ovms/html_files/ovms_docs_quick_start_guide.html...
Reading document ./ovms/html_files/ovms_docs_parameters.html...
Reading document ./ovms/html_files/ovms_docs_demo_mediapipe_holistic.html...
Reading document ./ovms/html_files/ovms_docs_serving_model.html...
Reading document ./ovms/html_files/ovms_docs_mediapipe_conversion.html...
Reading document ./ovms/html_files/ovms_docs_troubleshooting.html...
Reading document ./ovms/html_files/ovms_docs_dynamic_shape_custom_node.html...
Reading document ./ovms/html_files/ovms_docs_demos.html...
Reading document ./ovms/html_files/ovms_docs_dynamic_shape_auto_reload.html...
Reading document ./ovms/html_files/ovms_docs_dynamic_shape_binary_inputs.html...
Reading document ./ovms/html_files/ovms_docs_clients_kfs.html...
Reading document ./ovms/html_files/ovms_docs_c_api.html...
Reading document ./ovms/html_files/ovms_docs_shape_batch_layout.html...
Reading document ./ovms/html_files/ovms_docs_demo_mediapipe_object_detection.html...

In [8]:
spliter_name = "RecursiveCharacter"  # PARAM
chunk_size=1000  # PARAM
chunk_overlap=200  # PARAM
text_splitter = TEXT_SPLITERS[spliter_name](chunk_size=chunk_size, chunk_overlap=chunk_overlap)

texts = text_splitter.split_documents(documents)



In [9]:
try:
    db.delete_collection()
except:
    pass
db = Chroma.from_documents(texts, embeddings)

Batches: 100%|██████████| 15/15 [00:45<00:00,  3.01s/it]


In [10]:
vector_search_top_k = 4
retriever = db.as_retriever(search_kwargs={"k": vector_search_top_k})

retrieved_docs = retriever.invoke("How to start model server container?")
print(retrieved_docs[0])
print(retrieved_docs[1])
print(retrieved_docs[2])
print(retrieved_docs[3])

Batches: 100%|██████████| 1/1 [00:00<00:00, 51.66it/s]

page_content='sudo\n\ndnf\n\ninstall\n\ny\n\npkg-config\n\n&&\n\nsudo\n\nrpm\n\nivh\n\nhttps://vault.centos.org/centos/8/AppStream/x86_64/os/Packages/tbb-2018.2-9.el8.x86_64.rpm\n\nStart the server:\n\nwget\n\nhttps://storage.openvinotoolkit.org/repositories/open_model_zoo/2022.1/models_bin/2/resnet50-binary-0001/FP32-INT1/resnet50-binary-0001.\n\n{xml,bin\n\nP\n\nmodels/resnet50/1\n\n./ovms/bin/ovms\n\n--model_name\n\nresnet\n\n--model_path\n\nmodels/resnet50\n\nor start as a background process or a daemon initiated by systemctl/initd depending on the Linux distribution and specific hosting requirements.\n\nMost of the Model Server documentation demonstrate containers usage, but the same can be achieved with just the binary package.\n\nLearn more about model server\n\nstarting parameters.\n\nNOTE:\nWhen serving models on AI accelerators, some additional steps may be required to install device drivers and dependencies.\nLearn more in the Additional Configurations for Hardware documenta




In [11]:
llm = ChatOpenAI(
    openai_api_key="EMPTY",
    openai_api_base="http://ov-spr-19.sclab.intel.com:8002/v1",
    model_name="mistralai/Mistral-7B-Instruct-v0.1",
    temperature=0.1,
    seed=5,
    verbose=True
)


                seed was transferred to model_kwargs.
                Please confirm that seed is what you intended.


In [12]:

prompt=PromptTemplate(input_variables=['context', 'question'], 
                      template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know.\nQuestion: {question} \nContext: {context} \nAnswer:")

print("prompt", prompt)
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

prompt input_variables=['context', 'question'] template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know.\nQuestion: {question} \nContext: {context} \nAnswer:"


In [13]:
for chunk in rag_chain.stream("How to start model server container?"):
    print(chunk, end="", flush=True)

Batches: 100%|██████████| 1/1 [00:00<00:00, 44.81it/s]


 To start the model server container, you can use the following command:
```
docker run --rm -v $(pwd)/models:/models:ro -p 9100:9100 -p 8100:8100 openvino/model_server:latest --config_path /models/config.json --port 9100 --rest_port 8100 --log_level DEBUG
```
This command assumes that you have a configuration file named `config.json` in the `models` directory. You can modify the parameters as needed for your specific use case.

If you don't have a configuration file, you can still start the model server by specifying the model path and name as command-line arguments. For example:
```
docker run --rm -v $(pwd)/models:/models:ro -p 9100:9100 -p 8100:8100 openvino/model_server:latest --model_path /models/resnet50 --model_name resnet --port 9100 --rest_port 8100 --log_level DEBUG
```
This command assumes that you have a model named `resnet50` in the `models` directory. You can replace `resnet50` with the name of your own model.

I hope this helps! Let me know if you have any other questio

In [14]:
for chunk in rag_chain.stream("Which metrics are supported in the model server? Give examples."):
    print(chunk, end="", flush=True)

Batches: 100%|██████████| 1/1 [00:00<00:00, 43.69it/s]


 The OpenVINO Model Server supports various metrics for monitoring and benchmarking purposes, auto scaling of model server instances, and tracking performance without any extra logic on the client side or using network traffic monitoring tools. Some examples of metrics include inference execution queue statistics, model runtime parameters, and usage based on model version, API type, or requested endpoint methods. These metrics are exposed on the /metrics endpoint and are compatible with the Prometheus standard. You can enable additional metrics by listing them in the metric_list flag or json configuration.