In [1]:
import os
from typing import Type, List
from pydantic import BaseModel, Field
from dotenv import load_dotenv
from crewai import Agent, Task, Crew, Process
from crewai.tools import BaseTool
from ibm_watsonx_ai.foundation_models import ModelInference
from crewai import Agent, Task, Crew, Process
from crewai.tools import BaseTool
from crewai import LLM
from elasticsearch import Elasticsearch
from ibm_watsonx_ai.foundation_models.schema import TextChatParameters
from ibm_watsonx_ai import Credentials
from crewai.memory import LongTermMemory
from crewai.memory.storage.ltm_sqlite_storage import LTMSQLiteStorage
from langchain_ibm import WatsonxEmbeddings
from chromadb.utils.embedding_functions import create_langchain_embedding
from langchain_chroma import Chroma


In [2]:
# For IBM WatsonX API:
api_key = os.getenv("WATSONX_API_KEY")
api_url = os.getenv("WATSONX_URL")
project_id = os.getenv("WATSONX_PROJECT_ID")
index_name = os.getenv("INDEX_NAME")

In [4]:
credentials = Credentials(
    url="https://us-south.ml.cloud.ibm.com",
    api_key=api_key
)

params = TextChatParameters(
    temperature=0.7,
    max_tokens=2000
)

model = ModelInference(
    model_id="mistralai/mistral-large",
    credentials=credentials,
    project_id=project_id,
    params=params
)


embeddings = WatsonxEmbeddings(model_id='ibm/slate-125m-english-rtrvr',
                               apikey=credentials.get('apikey'),
                               url=credentials.get('url'),
                               project_id=project_id)

#### Custom DB tools Chroma
vector_store_chroma = Chroma(
    collection_name=index_name,
    embedding_function=embeddings,
    persist_directory="./data",  # Where to save data locally, remove if not necessary
)

def _run(query: str, top_n: int=10) -> List[str]:
    """Retrieves course materials filtered by course name."""
    results = vector_store_chroma.similarity_search(query, k=top_n)
    return "\n\n".join([doc.page_content for doc in results])

In [9]:
index_name

'rag-chroma-watsonx'

In [10]:
vector_store_chroma = Chroma(
    collection_name=index_name,
    embedding_function=embeddings,
    persist_directory="./data",  # Where to save data locally, remove if not necessary
)
vector_store_chroma.similarity_search_with_score("What are the use cases for IBM watsonx Orchestrate? ")

[]

In [14]:
print("Number of documents in ChromaDB:", vector_store_chroma._collection.count())


Number of documents in ChromaDB: 0


In [18]:
vector_store_chroma = Chroma(
    collection_name=index_name,
    embedding_function=embeddings,
    persist_directory="./data"  # Reload stored data
)

all_docs = vector_store_chroma._collection.get(include=["documents"])
print("All stored documents:", all_docs["documents"])


All stored documents: []


In [33]:
vector_store_chroma = Chroma(
    collection_name=index_name,
    embedding_function=embeddings,
    persist_directory="./data",
)

retrieved_docs = vector_store_chroma.similarity_search("What are the use")
print(f"Retrieved {len(retrieved_docs)} documents")
for doc in retrieved_docs:
    print(doc.page_content[:300])  # Print first 300 characters of each document


Retrieved 0 documents


In [34]:
from langchain_chroma import Chroma

vector_store_chroma = Chroma(
    collection_name=index_name,
    embedding_function=embeddings,
    persist_directory="./data",
)

docs = vector_store_chroma.similarity_search("IBM watsonx Orchestrate")
print(f"Retrieved {len(docs)} documents")

for doc in docs:
    print(doc.page_content[:300])  # Print first 300 characters


Retrieved 0 documents


In [35]:
len(docs)

0

In [5]:
import os
from typing import Type, List
from pydantic import BaseModel, Field
from dotenv import load_dotenv
from crewai import Agent, Task, Crew, Process
from crewai.tools import BaseTool
from ibm_watsonx_ai.foundation_models import ModelInference
from crewai import Agent, Task, Crew, Process
from crewai.tools import BaseTool
from crewai import LLM
from elasticsearch import Elasticsearch
from ibm_watsonx_ai.foundation_models.schema import TextChatParameters
from ibm_watsonx_ai import Credentials
from crewai.memory import LongTermMemory
from crewai.memory.storage.ltm_sqlite_storage import LTMSQLiteStorage
from langchain_ibm import WatsonxEmbeddings
from chromadb.utils.embedding_functions import create_langchain_embedding
from langchain_chroma import Chroma

api_key = os.getenv("WATSONX_API_KEY")
api_url = os.getenv("WATSONX_URL")
project_id = os.getenv("WATSONX_PROJECT_ID")
index_name = os.getenv("INDEX_NAME")

credentials = Credentials(
    url="https://us-south.ml.cloud.ibm.com",
    api_key=api_key
)

embeddings = WatsonxEmbeddings(model_id='ibm/slate-125m-english-rtrvr',
                               apikey=credentials.get('apikey'),
                               url=credentials.get('url'),
                               project_id=project_id)

In [6]:
from langchain_chroma import Chroma
from dotenv import load_dotenv
import os
load_dotenv(override=True)

index_name=os.getenv('INDEX_NAME')
vector_store_chroma = Chroma(
    collection_name=index_name,
    embedding_function=embeddings,
    persist_directory="./data",
)

# Get document count
doc_count = vector_store_chroma._collection.count()
print(f"Documents in Chroma: {doc_count}")

# Retrieve sample documents
docs = vector_store_chroma.similarity_search("IBM watsonx Orchestrate")
print(f"Retrieved {len(docs)} documents")

# Print sample content
for i, doc in enumerate(docs[:3]):  # Show first 3 docs
    print(f"Doc {i+1} Content:\n{doc.page_content[:300]}\n")


Documents in Chroma: 54
Retrieved 4 documents
Doc 1 Content:
IBM watsonx Orchestrate













































































Home




AI and ML




watsonx




watsonx Orchestrate





                



  
    watsonx Orchestrate: AI for business productivity






    


            

                    


  
  
      

Doc 2 Content:
IBM saw 94% of its company-wide HR requests handled using watsonx Orchestrate.



See for yourself
            
        









            



  
    AI that makes your work matter more






    


        




Easily build and deploy AI agents and assistants powered by your data with watsonx Or

Doc 3 Content:
watsonx Orchestrate is an enterprise-ready solution that helps create, deploy, and manage AI assistants and agents to automate processes and workflows, from HR and Procurement to Sales and Customer Experience


Learn more









            
            

     
    Use cases 




    


        





In [43]:
from langchain_community.document_loaders import UnstructuredURLLoader

urls = [
    "https://www.ibm.com/products/watsonx-ai?lnk=flatitem",
    "https://www.ibm.com/products/watsonx-orchestrate?lnk=flatitem",
    "https://www.ibm.com/products/watsonx-assistant?lnk=flatitem"
]

loader = UnstructuredURLLoader(urls=urls)
docs = loader.load()

# Print content from the first document
if docs:
    print(f"Doc 1 Content: {docs[0].page_content[:500]}")


Doc 1 Content: 


In [48]:
elasticsearch_url = os.getenv("hostname_url", "") + ":" + os.getenv("port", "")
username = os.getenv("username", None)
password = os.getenv("password", None)


es = Elasticsearch(
    elasticsearch_url,
    basic_auth=(username, password),
    max_retries=10,
    retry_on_timeout=True,
    verify_certs=False,  # Disable for production
    request_timeout=300
)

  _transport = transport_class(


In [49]:
# Build the query body
body = {
    "sort": [
        {"_score": "desc"}
    ],
    "query": {
        "bool": {
            "should": [
                {
                    "multi_match": {
                        "query": "what is watsonx orchestrate",
                        "fields": [
                            "text_field"
                        ],
                        "boost": 1.0
                    }
                },
                {
                    "text_expansion": {
                        "ml.tokens": {
                            "model_id": ".elser_model_2_linux-x86_64",
                            "model_text": "what is watsonx orchestrate"
                        }
                    }
                }
            ]
        }
    },
    "min_score": 15,
    "_source": ["text_field", "vector_query_field"],
    "size": 5
}

        

# Send the request with a 60-second timeout
response = es.search(
    index=index_name,
    body=body,
    request_timeout=60
)

  response = es.search(


In [51]:
index_name

'rag-chroma-watsonx'