From 73412b64c86a987ddd38b8e08ada4ac99e6624da Mon Sep 17 00:00:00 2001 From: Ziqun Ye Date: Tue, 5 Dec 2023 10:19:00 -0800 Subject: [PATCH 1/3] adding documentation --- ads/llm/serializers/retrieval_qa.py | 2 +- .../user_guide/large_language_model/index.rst | 37 ++++ .../large_language_model/retrieval.rst | 194 ++++++++++++++++++ 3 files changed, 232 insertions(+), 1 deletion(-) create mode 100644 docs/source/user_guide/large_language_model/retrieval.rst diff --git a/ads/llm/serializers/retrieval_qa.py b/ads/llm/serializers/retrieval_qa.py index adfbf2164..ce6cabb70 100644 --- a/ads/llm/serializers/retrieval_qa.py +++ b/ads/llm/serializers/retrieval_qa.py @@ -30,7 +30,7 @@ def load(config: dict, **kwargs): os.environ.get("OCI_OPENSEARCH_PASSWORD", None), ), verify_certs=True - if os.environ.get("OCI_OPENSEARCH_VERIFY_CERTS", None).lower() == "true" + if os.environ.get("OCI_OPENSEARCH_VERIFY_CERTS", None) == "True" else False, ca_certs=os.environ.get("OCI_OPENSEARCH_CA_CERTS", None), ) diff --git a/docs/source/user_guide/large_language_model/index.rst b/docs/source/user_guide/large_language_model/index.rst index a16be7e50..3d0686b11 100644 --- a/docs/source/user_guide/large_language_model/index.rst +++ b/docs/source/user_guide/large_language_model/index.rst @@ -4,9 +4,46 @@ Large Language Model #################### +Oracle Cloud Infrastructure (OCI) provides fully managed Infrastructure to work with Large Language Model (LLM). You can train LLM at scale with `Data Science Jobs (Jobs) `_, and deploy it with `Data Science Model Deployment (Model Deployments) `_. On top of that, you can build and test your LLM applications with LangChain, then deploy it as your own API using the model deployment. + + +Compatibility with LangChain +**************************** +ADS is designed to be compatible with LangChain, enabling developers to incorporate various LangChain components seamlessly into their langchain applications. + +Deployment Requirements +----------------------- +For successful deployment of LangChain components within ADS, it is crucial to ensure that each component used in the chain is serializable. This is because ADS requires all components to be serializable in order to deploy them as a single unit. + +ADS-Supported Components +------------------------ +ADS natively supports serialization of all its components. This ensures that any component developed or integrated within ADS adheres to the serialization standards. + +Additional LangChain Component Support +-------------------------------------- +ADS extends its serialization support to two specific components from the LangChain vector store. These components are: + +- ``OpenSearchVectorSearch``: You can connect to the OCI OpenSearch cluster to perform semantic search along with your embedding model. + +- ``FAISS`` (Facebook AI Similarity Search): If you dont have an OCI OpenSearch cluster, you can use FAISS which is a in-memory vector store to perform semantic search along with your embedding model. + + + +.. admonition:: Installation + :class: note + + Install ADS and other dependencies for LLM integrations. + + .. code-block:: bash + + $ python3 -m pip install "oracle-ads[llm]" + + .. toctree:: :hidden: :maxdepth: 2 + training_llm deploy_langchain_application + retrieval \ No newline at end of file diff --git a/docs/source/user_guide/large_language_model/retrieval.rst b/docs/source/user_guide/large_language_model/retrieval.rst new file mode 100644 index 000000000..1d00f5a15 --- /dev/null +++ b/docs/source/user_guide/large_language_model/retrieval.rst @@ -0,0 +1,194 @@ +.. _vector_store: + +########################################################## +Extensive Support of Langchain Vector Stores serialization +########################################################## + +.. versionadded:: 2.9.1 + +Current version of Langchain does not support serialization of any vector stores. This will be a problem when you want to deploy a langchain application with the vector store being one of the components using data science model deployment service. To solve this problem, we extended our support of vector stores serialization: + +- ``OpenSearchVectorSearch`` +- ``FAISS`` + +OpenSearchVectorSearch Serialization +------------------------------------ + +langchain does not automatically support serialization of ``OpenSearchVectorSearch``. However, ADS provides a way to serialize ``OpenSearchVectorSearch``. To serialize ``OpenSearchVectorSearch``, you need to use environment variables to pass in the credentials. The following variables can be passed in through the corresponding environment variables: + +- http_auth: (``OCI_OPENSEARCH_USERNAME``, ``OCI_OPENSEARCH_PASSWORD``) +- verify_certs: ``OCI_OPENSEARCH_VERIFY_CERTS`` +- ca_certs: ``OCI_OPENSEARCH_CA_CERTS`` + +The following code snippet shows how to use ``OpenSearchVectorSearch`` with environment variables: + +.. code-block:: python3 + + os.environ['OCI_OPENSEARCH_USERNAME'] = "username" + os.environ['OCI_OPENSEARCH_PASSWORD'] = "password" + os.environ['OCI_OPENSEARCH_VERIFY_CERTS'] = "False" + + INDEX_NAME = "your_index_name" + opensearch_vector_search = OpenSearchVectorSearch( + "https://localhost:9200", + embedding_function=oci_embedings, + index_name=INDEX_NAME, + engine="lucene", + http_auth=(os.environ["OCI_OPENSEARCH_USERNAME"], os.environ["OCI_OPENSEARCH_PASSWORD"]), + verify_certs=os.environ["OCI_OPENSEARCH_VERIFY_CERTS"], + ) + +.. admonition:: Deployment + :class: note + +During deployment, it is very important that you remember to pass in those environment variables as well: + +.. code-block:: python3 + + .deploy(deployment_log_group_id="ocid1.loggroup.####", + deployment_access_log_id="ocid1.log.####", + deployment_predict_log_id="ocid1.log.####", + environment_variables={"OCI_OPENSEARCH_USERNAME":"", + "OCI_OPENSEARCH_PASSWORD": "", + "OCI_OPENSEARCH_VERIFY_CERTS": "",) + +OpenSearchVectorSearch Deployment +--------------------------------- + +Here is an example code snippet for OpenSearchVectorSearch deployment: + +.. code-block:: python3 + + from langchain.vectorstores import OpenSearchVectorSearch + from ads.llm import GenerativeAIEmbeddings, GenerativeAI + import ads + + ads.set_auth("resource_principal") + + oci_embedings = GenerativeAIEmbeddings( + compartment_id="ocid1.compartment.oc1..aaaaaaaapvb3hearqum6wjvlcpzm5ptfxqa7xfftpth4h72xx46ygavkqteq", + client_kwargs=dict(service_endpoint="https://generativeai.aiservice.us-chicago-1.oci.oraclecloud.com") # this can be omitted after Generative AI service is GA. + ) + + oci_llm = GenerativeAI( + compartment_id="ocid1.compartment.oc1..aaaaaaaapvb3hearqum6wjvlcpzm5ptfxqa7xfftpth4h72xx46ygavkqteq", + client_kwargs=dict(service_endpoint="https://generativeai.aiservice.us-chicago-1.oci.oraclecloud.com") # this can be omitted after Generative AI service is GA. + ) + + import os + os.environ['OCI_OPENSEARCH_USERNAME'] = "username" + os.environ['OCI_OPENSEARCH_PASSWORD'] = "password" + os.environ['OCI_OPENSEARCH_VERIFY_CERTS'] = "True" # make sure this is capitalized. + os.environ['OCI_OPENSEARCH_CA_CERTS'] = "path/to/oci_opensearch_ca.pem" + + INDEX_NAME = "your_index_name" + opensearch_vector_search = OpenSearchVectorSearch( + "https://localhost:9200", # your endpoint + embedding_function=oci_embedings, + index_name=INDEX_NAME, + engine="lucene", + http_auth=(os.environ["OCI_OPENSEARCH_USERNAME"], os.environ["OCI_OPENSEARCH_PASSWORD"]), + verify_certs=os.environ["OCI_OPENSEARCH_VERIFY_CERTS"], + ca_certs=os.environ["OCI_OPENSEARCH_CA_CERTS"], + ) + from langchain.chains import RetrievalQA + retriever = opensearch_vector_search.as_retriever(search_kwargs={"vector_field": "embeds", + "text_field": "text", + "k": 3, + "size": 3}, + max_tokens_limit=1000) + qa = RetrievalQA.from_chain_type( + llm=oci_llm, + chain_type="stuff", + retriever=retriever, + chain_type_kwargs={ + "verbose": True + } + ) + from ads.llm.deploy import ChainDeployment + model = ChainDeployment(qa) + model.prepare(force_overwrite=True, + inference_conda_env="your_conda_pack", + ) + + model.save() + res = model.verify("your prompt") + model.deploy(deployment_log_group_id="ocid1.loggroup.####", + deployment_access_log_id="ocid1.log.####", + deployment_predict_log_id="ocid1.log.####", + environment_variables={"OCI_OPENSEARCH_USERNAME":"", + "OCI_OPENSEARCH_PASSWORD": "", + "OCI_OPENSEARCH_VERIFY_CERTS": "", + "OCI_OPENSEARCH_CA_CERTS": ""},) + + model.predict("your prompt") + + +FAISS Serialization +------------------- + +If your documents are not too large and you dont have a OCI OpenSearch cluster, you can use ``FAISS`` as your in-memory vector store, which can also do similarty search very efficiently. For ``FAISS``, you can just use it and deploy it as it is. + + +FAISS Deployment +---------------- + +Here is an example code snippet for FAISS deployment: + +.. code-block:: python3 + + import ads + from ads.llm import GenerativeAIEmbeddings, GenerativeAI + from langchain.document_loaders import TextLoader + from langchain.text_splitter import CharacterTextSplitter + from langchain.vectorstores import FAISS + from langchain.chains import RetrievalQA + + ads.set_auth("resource_principal") + oci_embedings = GenerativeAIEmbeddings( + compartment_id="ocid1.compartment.####", + client_kwargs=dict(service_endpoint="https://generativeai.aiservice.us-chicago-1.oci.oraclecloud.com") # this can be omitted after Generative AI service is GA. + ) + + oci_llm = GenerativeAI( + compartment_id="ocid1.compartment.####", + client_kwargs=dict(service_endpoint="https://generativeai.aiservice.us-chicago-1.oci.oraclecloud.com") # this can be omitted after Generative AI service is GA. + ) + + loader = TextLoader("your.txt") + documents = loader.load() + text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=50) + docs = text_splitter.split_documents(documents) + + l = len(docs) + embeddings = [] + for i in range(l // 16 + 1): + subdocs = [item.page_content for item in docs[i * 16: (i + 1) * 16]] + embeddings.extend(oci_embedings.embed_documents(subdocs)) + + texts = [item.page_content for item in docs] + text_embedding_pairs = [(text, embed) for text, embed in zip(texts, embeddings)] + db = FAISS.from_embeddings(text_embedding_pairs, oci_embedings) + + retriever = db.as_retriever() + qa = RetrievalQA.from_chain_type( + llm=oci_llm, + chain_type="stuff", + retriever=retriever, + chain_type_kwargs={ + "verbose": True + } + ) + + from ads.llm.deploy import ChainDeployment + model.prepare(force_overwrite=True, + inference_conda_env="your_conda_pack", + ) + + model.save() + res = model.verify("your prompt") + model.deploy(deployment_log_group_id="ocid1.loggroup.####", + deployment_access_log_id="ocid1.log.####", + deployment_predict_log_id="ocid1.log.####") + + model.predict("your prompt") From 3266ac08941651014082ee59117c70b48c73b465 Mon Sep 17 00:00:00 2001 From: Ziqun Ye Date: Tue, 5 Dec 2023 10:32:17 -0800 Subject: [PATCH 2/3] update the index file --- .../user_guide/large_language_model/index.rst | 29 +++++++------------ .../large_language_model/retrieval.rst | 2 +- 2 files changed, 11 insertions(+), 20 deletions(-) diff --git a/docs/source/user_guide/large_language_model/index.rst b/docs/source/user_guide/large_language_model/index.rst index 3d0686b11..7e2d2a4be 100644 --- a/docs/source/user_guide/large_language_model/index.rst +++ b/docs/source/user_guide/large_language_model/index.rst @@ -4,29 +4,20 @@ Large Language Model #################### -Oracle Cloud Infrastructure (OCI) provides fully managed Infrastructure to work with Large Language Model (LLM). You can train LLM at scale with `Data Science Jobs (Jobs) `_, and deploy it with `Data Science Model Deployment (Model Deployments) `_. On top of that, you can build and test your LLM applications with LangChain, then deploy it as your own API using the model deployment. +Oracle Cloud Infrastructure (OCI) provides fully managed infrastructure to work with Large Language Model (LLM). +Train and Deploy LLM +******************** +You can train LLM at scale with multi-node and multi-GPU using `Data Science Jobs (Jobs) `_, and deploy it with `Data Science Model Deployment (Model Deployments) `_. The following blog posts show examples training and deploying Llama2 models: -Compatibility with LangChain -**************************** -ADS is designed to be compatible with LangChain, enabling developers to incorporate various LangChain components seamlessly into their langchain applications. +* `Multi-GPU multinode fine-tuning Llama2 on OCI Data Science `_ +* `Deploy Llama 2 in OCI Data Science `_ +* `Quantize and deploy Llama 2 70B on cost-effective NVIDIA A10 Tensor Core GPUs in OCI Data Science `_ -Deployment Requirements ------------------------ -For successful deployment of LangChain components within ADS, it is crucial to ensure that each component used in the chain is serializable. This is because ADS requires all components to be serializable in order to deploy them as a single unit. - -ADS-Supported Components ------------------------- -ADS natively supports serialization of all its components. This ensures that any component developed or integrated within ADS adheres to the serialization standards. - -Additional LangChain Component Support --------------------------------------- -ADS extends its serialization support to two specific components from the LangChain vector store. These components are: - -- ``OpenSearchVectorSearch``: You can connect to the OCI OpenSearch cluster to perform semantic search along with your embedding model. - -- ``FAISS`` (Facebook AI Similarity Search): If you dont have an OCI OpenSearch cluster, you can use FAISS which is a in-memory vector store to perform semantic search along with your embedding model. +Integration with LangChain +************************** +ADS is designed to work with LangChain, enabling developers to incorporate various LangChain components and models deployed on OCI seamlessly into their applications. Additionally, ADS can package LangChain applications and deploy it as a REST API endpoint using OCI Data Science Model Deployment. .. admonition:: Installation diff --git a/docs/source/user_guide/large_language_model/retrieval.rst b/docs/source/user_guide/large_language_model/retrieval.rst index 1d00f5a15..c8acb1641 100644 --- a/docs/source/user_guide/large_language_model/retrieval.rst +++ b/docs/source/user_guide/large_language_model/retrieval.rst @@ -40,7 +40,7 @@ The following code snippet shows how to use ``OpenSearchVectorSearch`` with envi .. admonition:: Deployment :class: note - + During deployment, it is very important that you remember to pass in those environment variables as well: .. code-block:: python3 From 83c036fc1b57708bf1f964384f508230c09c29b7 Mon Sep 17 00:00:00 2001 From: Ziqun Ye Date: Tue, 5 Dec 2023 13:43:45 -0800 Subject: [PATCH 3/3] change the name --- docs/source/user_guide/large_language_model/retrieval.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/source/user_guide/large_language_model/retrieval.rst b/docs/source/user_guide/large_language_model/retrieval.rst index c8acb1641..0aa8f7040 100644 --- a/docs/source/user_guide/large_language_model/retrieval.rst +++ b/docs/source/user_guide/large_language_model/retrieval.rst @@ -1,8 +1,8 @@ .. _vector_store: -########################################################## -Extensive Support of Langchain Vector Stores serialization -########################################################## +######################## +Vector Store integration +######################## .. versionadded:: 2.9.1 @@ -40,7 +40,7 @@ The following code snippet shows how to use ``OpenSearchVectorSearch`` with envi .. admonition:: Deployment :class: note - + During deployment, it is very important that you remember to pass in those environment variables as well: .. code-block:: python3