In [1]:
import json
from langchain.docstore.document import Document
import glob

In [2]:
import os

os.environ['LANGCHAIN_TRACING_V2'] = 'true'
os.environ['LANGCHAIN_ENDPOINT'] = 'https://api.smith.langchain.com'
os.environ['LANGCHAIN_API_KEY'] = '#key here'
os.environ['OPENAI_API_KEY'] = '#key here'
os.environ['LANGCHAIN_PROJECT']='RAG-for-Search'

from langchain_openai import ChatOpenAI
from openai import OpenAI

In [11]:
def load_okw_json(path):
    documents = []
    for file_path in glob.glob(f'{path}/*'):
        with open(file_path, 'r') as f:
            data = json.load(f)
            content = create_document_content(data)
            doc = Document(page_content=content, metadata={'title':data.get('title', '')})
            documents.append(doc)
    return documents
        

In [4]:
def create_document_content(data):
    content_parts = []
    content_parts.append(f"Title: {data.get('title', '')}")
    content_parts.append(f"Description: {data.get('description', '')}")
    content_parts.append(f"Keywords: {', '.join(data.get('keywords', []))}")
    
    # Process Inventory Atoms
    inventory_atoms = data.get('inventory-atoms', [])
    if inventory_atoms:
        inventory_descriptions = []
        for atom in inventory_atoms:
            atom_desc = f"{atom.get('identifier', '')}: {atom.get('description', '')}"
            inventory_descriptions.append(atom_desc)
        content_parts.append(f"Inventory Atoms: {', '.join(inventory_descriptions)}")
    
    # Process Product Atoms
    product_atoms = data.get('product-atoms', [])
    if product_atoms:
        product_descriptions = []
        for atom in product_atoms:
            atom_desc = f"{atom.get('identifier', '')}: {atom.get('description', '')}"
            product_descriptions.append(atom_desc)
        content_parts.append(f"Product Atoms: {', '.join(product_descriptions)}")
    
    # Process Tool List Atoms
    tool_list_atoms = data.get('tool-list-atoms', [])
    if tool_list_atoms:
        tool_descriptions = []
        for atom in tool_list_atoms:
            atom_desc = f"{atom.get('identifier', '')}: {atom.get('description', '')}"
            tool_descriptions.append(atom_desc)
        content_parts.append(f"Tool List Atoms: {', '.join(tool_descriptions)}")
    
    # Process Processes
    processes = data.get('processes', [])
    if processes:
        process_descriptions = [atom.get('identifier', '') for atom in processes]
        content_parts.append(f"Processes: {', '.join(process_descriptions)}")
    
    return '\n'.join(content_parts)

In [5]:
directory_path = './OKWs/'
documents = load_okw_json(directory_path)


In [6]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 100,
    chunk_overlap = 20
    )

splits = text_splitter.split_documents(documents)

In [7]:
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma

embeddings = OpenAIEmbeddings()

vectorstore = Chroma.from_documents(
    documents=splits,
    embedding=embeddings
)

retriever = vectorstore.as_retriever()



  embeddings = OpenAIEmbeddings()


AuthenticationError: Error code: 401 - {'error': {'message': 'Incorrect API key provided: sk-coMOR**********************************YN2c. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}

In [None]:
from langchain.prompts import ChatPromptTemplate

template = """You are an AI assistant helping to find entities based on user requirements. 
Generate five different version of the given user query to retrieve entities that match the requested inventory items, products, tools, or processes. 
Consider different combinations and synonyms of the keywords. 
Provide these alternative queries separated by newlines. 

Original query: {query}"""
prompt_perspectives = ChatPromptTemplate.from_template(template)

In [None]:
from langchain.output_parsers import StrOutputParser
from langchain.chat_models import ChatOpenAI

generate_queries = (
    prompt_perspectives
    | ChatOpenAI(temperature=0)
    | StrOutputParser()
    | (lambda x: x.strip().split('\n'))
)



In [None]:
def get_unique_union(documents_list):
    """Unique union of retrieved docs"""
    unique_docs = {}
    for docs in documents_list:
        for doc in docs:
            unique_docs[doc.page_content] = doc  # Assuming page_content is unique
    return list(unique_docs.values())

In [None]:
from langchain.chains import SequentialChain

# Define the question
question = "I need entities that can produce solar panels and have welding capabilities."

# Create the retrieval chain
retrieval_chain = generate_queries | retriever.map() | get_unique_union


In [None]:
from operator import itemgetter
from langchain.chains import LLMChain
from langchain.prompts import ChatPromptTemplate

template = """Based on the following context, list the entities that best match the user's requirements.

Context:
{context}

Question: {question}

Provide a list of matching entities with their titles and brief descriptions.
"""

prompt = ChatPromptTemplate.from_template(template)
llm = ChatOpenAI(temperature=0)

# Combine the context and question
final_chain = (
    {"context": retrieval_chain, "question": itemgetter("question")} 
    | prompt 
    | llm 
    | StrOutputParser()
)

# Get the final answer
final_answer = final_chain.invoke({"question": question})
print(final_answer)
