In [4]:
%pip install -U langchain langchain-community pydantic faiss-cpu

134.73s - pydevd: Sending message related to process being replaced timed-out after 5 seconds



[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0[0m[39;49m -> [0m[32;49m25.1.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [5]:
import json
from langchain_community.document_loaders import JSONLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import OllamaEmbeddings # Or any other embedding model you prefer
from langchain_community.vectorstores import FAISS
from langchain.prompts import ChatPromptTemplate
from langchain_community.llms import Ollama # The LLM import
from langchain.schema.runnable import RunnablePassthrough
from langchain.schema.output_parser import StrOutputParser



#Load Json
with open('products1.json', 'r',encoding="utf-8") as file:
    data = json.load(file)

# loader = JSONLoader(file_path='./products_data.json', jq_schema='.', text_content=False)
# data = loader.load()

print(f"Loaded {len(data)} documents.")


Loaded 2 documents.


In [6]:
# 2. Document Preprocessing and Chunking

# The JSONLoader might load the entire JSON as a single document or multiple.
# For better retrieval, we want to break down the information into smaller, meaningful chunks.
# For this specific JSON structure, it's better to iterate and create documents for each product.

processed_documents = []
for taxonomy_product_entry in data:
    taxonomy_product_name = taxonomy_product_entry.get("taxonomyProductName", "N/A")
    taxonomy_product_id = taxonomy_product_entry.get("taxonomyProductId", "N/A")

    for product in taxonomy_product_entry.get("products", []):
        product_name = product.get("productName", "N/A")
        product_id = product.get("productId", "N/A")
        product_sme = product.get("productSmeName", "N/A")

        # Create a detailed string for each product
        content = (
            f"Taxonomy Product ID: {taxonomy_product_id}\n"
            f"Taxonomy Product Name: {taxonomy_product_name}\n"
            f"Product ID: {product_id}\n"
            f"Product Name: {product_name}\n"
            f"Product SME: {product_sme}\n"
        )
        
        # Add components if they exist
        components = product.get("components", [])
        if components:
            content += "Components:\n"
            for comp in components:
                content += f"  - ID: {comp.get('componentID', 'N/A')}, Name: {comp.get('componentName', 'N/A')}\n"

        # Add market segments if they exist
        market_segments = product.get("marketSegments", [])
        if market_segments:
            content += "Market Segments:\n"
            for segment in market_segments:
                content += (
                    f"  - Segment Name: {segment.get('marketSegmentName', 'N/A')}\n"
                    f"    Availability: {segment.get('productAvailability', 'N/A')}\n"
                    f"    Status: {segment.get('status', 'N/A')}\n"
                    f"    Membership Types: {', '.join(segment.get('membershipTypes', []))}\n"
                )
        
        # Create a LangChain Document object for each product
        from langchain.docstore.document import Document
        processed_documents.append(Document(page_content=content, metadata={"product_id": product_id, "product_name": product_name}))

print(f"Created {len(processed_documents)} processed documents from product data.")
# Example of a processed document
# print("\nExample Processed Document:")
# print(processed_documents[0].page_content)

# Using a Text Splitter (even though our docs are already per-product, it's good practice)
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200,
    length_function=len,
    is_separator_regex=False,
)
chunks = text_splitter.split_documents(processed_documents)
print(f"Number of chunks after splitting: {len(chunks)}")



Created 4 processed documents from product data.
Number of chunks after splitting: 4


In [8]:
# 3. Create Embeddings and Vector Store

# Initialize your embedding model (e.g., Ollama, OpenAIEmbeddings, HuggingFaceEmbeddings)
# For Ollama, ensure you have Ollama running and a model pulled (e.g., 'llama2')
# Example: ollama run llama2
embeddings = OllamaEmbeddings(model="nomic-embed-text") # You can change to other models like "nomic-embed-text" or "mxbai-embed-large" for better performance

# Create a FAISS vector store from the document chunks
print("Creating FAISS vector store...")
vectorstore = FAISS.from_documents(chunks, embeddings)
print("FAISS vector store created.")

# Create a retriever
retriever = vectorstore.as_retriever()

Creating FAISS vector store...
FAISS vector store created.


In [14]:
# 4. Set up the RAG Chain

# Initialize your Language Model (LLM)
# For Ollama, ensure it's running
llm = Ollama(model="llama2") # Or "mistral", "gemma", etc.

# Define the prompt template
template = """
You are a helpful assistant for product information. Use the following context to answer the user's question.
If you don't know the answer, just say that you don't have enough information to answer.

Context:
{context}

Question: {question}

Answer:
"""
prompt = ChatPromptTemplate.from_template(template)

# Construct the RAG chain
rag_chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)


In [None]:
# 5. Ask Questions!

print("\n--- Chatbot Ready ---")
print("Type 'exit' to quit.")

while True:
    user_query = input("You: ")
    if user_query.lower() == 'exit':
        print("Goodbye!")
        break
    
    response = rag_chain.invoke(user_query)
    print(f"Bot: {response}\n")


# Example Queries you can try:
# - What is the Diabetes Management Program?
# - Tell me about the LiverManagementProgram.
# - What are the components of NewManagementProgram?
# - Which programs are available for the 'Employer' market segment with 'Medicaid' membership type?
# - Who is the SME for HeartManagementProgram?
# - Do you have information about a product with ID 42424?
# - What are the membership types for the DiabetesManagementProgram?
# - What is the status of the LiverManagementProgram?
    


--- Chatbot Ready ---
Type 'exit' to quit.
Bot: Hello! I'm happy to help you with any questions you may have about the products listed in the documents you provided. Could you please specify which product you are interested in, or provide more context about what you are looking for?

Bot:  Thank you for providing the context! Based on the documents provided, I can see that there are three different products mentioned: DiabetesManagementProgram, LiverManagementProgram, and HeartManagementProgram.

Unfortunately, I don't have enough information to answer your question about the Diabetes Management Program as the document you provided does not mention this specific product. The document only mentions two products: LiverManagementProgram and HeartManagementProgram.

Could you please provide more context or clarify which product you are referring to? I'll do my best to help you with the information available.

Bot:  Thank you for providing me with the context. Based on the documents provid

KeyboardInterrupt: Interrupted by user