In [1]:
from langchain_pinecone import PineconeVectorStore
from langchain_community.document_loaders.readthedocs import ReadTheDocsLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from dotenv import load_dotenv
load_dotenv()

import os
os.environ["OPENAI_API_KEY"] = os.environ["OPENAI_API_KEY_PERSONAL"]

## Ingestion Section

In [7]:
def ingest_docs(filePath, index_name, embedding_model):
    
    loader = ReadTheDocsLoader(filePath)
    text = loader.load()
    splitter = RecursiveCharacterTextSplitter(chunk_size = 600, chunk_overlap = 50)
    documents = splitter.split_documents(text)
    embeddings = OpenAIEmbeddings(model = embedding_model)
    vector_store = PineconeVectorStore(index_name = index_name, embedding=embeddings)
    
    batch_size, ingestion_id_all = 256, []
    for index in range(0, len(documents), batch_size):
        try:
            batch_insert_entitiy = documents[index: index + batch_size]
        except Exception as e:
            batch_insert_entitiy = documents[index:]
    
        ingestion_ids = vector_store.add_documents(batch_insert_entitiy)
        ingestion_id_all.extend(ingestion_ids)
        print(".", end="")

file_path = "../data/langchain-docs/api.python.langchain.com/en/latest/"
ingest_docs(filePath = file_path, index_name = "langchain-chabot", embedding_model = "text-embedding-3-small")

In [43]:
from langchain.chains.combine_documents.stuff import create_stuff_documents_chain
from langchain.chains.retrieval import create_retrieval_chain
from langchain import hub
from langchain_openai import ChatOpenAI

In [58]:
def get_rag_agent(model, index_name, embeddings):
    retrieval_qa = hub.pull("langchain-ai/retrieval-qa-chat")
    llm = ChatOpenAI(model = model)
    embeddings = OpenAIEmbeddings(model = embeddings)
    vector_store = PineconeVectorStore(index_name = index_name, embedding=embeddings)
    combine_docs_chain = create_stuff_documents_chain(llm = llm, prompt = retrieval_qa)
    retrieval_qa_agent = create_retrieval_chain(retriever = vector_store.as_retriever(), 
                                                combine_docs_chain = combine_docs_chain)
    return retrieval_qa_agent

retrieval_qa_agent = get_rag_agent(model = "gpt-4o-mini", index_name = "langchain-chabot", embeddings = "text-embedding-3-small")

In [61]:
sample_result = retrieval_qa_agent.invoke({"input": "How to use OllamaEmbeddings in Langchain"})

In [62]:
print(sample_result['answer'])

To use `OllamaEmbeddings` in Langchain, you can follow these general steps:

1. **Import the necessary module**: Ensure you have imported the `OllamaEmbeddings` from the embeddings package.

   ```python
   from embeddings.ollama import OllamaEmbeddings
   ```

2. **Instantiate the `OllamaEmbeddings` model**: Create an instance of the `OllamaEmbeddings` class. You'll need to configure any required parameters if necessary.

   ```python
   ollama_embeddings = OllamaEmbeddings()
   ```

3. **Prepare your data**: You need to have strings or `_Embed` objects that you want to embed.

4. **Embed your data**: Use the instance you created to embed your strings or objects.

   ```python
   embeddings = ollama_embeddings.embed(["Your text string here"])
   ```

5. **Utilize the embeddings**: Once you have your embeddings, you can use them in your Langchain pipeline or for any further processing or analysis.

This is a high-level overview, and the exact implementation might vary based on your spe

In [34]:
ingestion_ids.extend(["A"])

In [35]:
ingestion_ids

['cb53a5ab-d051-49e9-95e9-6f73bfe6eaaf',
 '3f618d82-f889-4b01-acd3-e667a8254089',
 '5e5bfca4-8984-4a2f-b833-4f205f355c44',
 'e0ed979f-7c81-4f2c-85ef-34f884f03627',
 '27398b8c-2f18-4e21-b2fa-5e07f6096418',
 '7ff53580-72cf-493d-90c5-6000b2557be4',
 '4636c174-e7b0-442c-a0f2-3daf2195d45c',
 'ca05f607-d66a-499e-8c84-4815bf544467',
 'fcfe550a-cef2-4320-b4ab-602de68d994e',
 'c53df134-8023-412b-b60c-bc126866f0ea',
 '918689be-2620-4ab5-90bf-cbf9028a88f8',
 '07e9806f-e8fc-4f1c-a596-a8dc980e8493',
 'c0c4e569-9441-400a-875f-98c2a4461634',
 '0eb61616-3be0-41e2-b53a-604f205f172f',
 '76de2a4b-cb1c-464e-9130-100ea69369b0',
 'c970793a-c081-4b65-9ce5-3a767c8205d8',
 'bd2b8fcc-03d6-4b8b-97bf-0e45d94127f9',
 'b4ed5ca2-b7b1-4ebf-a02c-12057549c5aa',
 '4a3a09de-9d05-4f33-9e4a-b3f455b7174d',
 '25249280-6e31-47d1-a91b-4a7108b4e943',
 'e54ebfb0-4462-4fc0-a071-4082ba964d72',
 '695f28ed-2bed-4a26-9fb7-93e03aee5a50',
 '57f87d5b-b556-4e33-8dc5-5e1bf5a7406f',
 'A']

In [15]:
documents[2]

Document(metadata={'source': '../data/langchain-docs/api.python.langchain.com/en/latest/experimental_api_reference.html'}, page_content='Load the ResponseGenerator.\nautonomous_agents.hugginggpt.task_planner.load_chat_planner(llm)\nLoad the chat planner.\nlangchain_experimental.chat_models¶\nChat Models are a variation on language models.\nWhile Chat Models use language models under the hood, the interface they expose\nis a bit different. Rather than expose a “text in, text out” API, they expose\nan interface where “chat messages” are the inputs and outputs.\nClass hierarchy:\nBaseLanguageModel --> BaseChatModel --> <name>  # Examples: ChatOpenAI, ChatGooglePalm\nMain helpers:\nAIMessage, BaseMessage, HumanMessage\nClasses¶\nchat_models.llm_wrapper.ChatWrapper\nWrapper for chat LLMs.\nchat_models.llm_wrapper.Llama2Chat\nWrapper for Llama-2-chat model.\nchat_models.llm_wrapper.Mixtral\nSee https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1#instruction-format\nchat_models.llm_wr

In [9]:
text[1]

Document(metadata={'source': '../data/langchain-docs/api.python.langchain.com/en/latest/experimental_api_reference.html'}, page_content='langchain_experimental 0.0.62¶\nlangchain_experimental.agents¶\nAgent is a class that uses an LLM to choose\na sequence of actions to take.\nIn Chains, a sequence of actions is hardcoded. In Agents,\na language model is used as a reasoning engine to determine which actions\nto take and in which order.\nAgents select and use Tools and Toolkits for actions.\nFunctions¶\nagents.agent_toolkits.csv.base.create_csv_agent(...)\nCreate pandas dataframe agent by loading csv to a dataframe.\nagents.agent_toolkits.pandas.base.create_pandas_dataframe_agent(llm,\xa0df)\nConstruct a Pandas agent from an LLM and dataframe(s).\nagents.agent_toolkits.python.base.create_python_agent(...)\nConstruct a python agent from an LLM and tool.\nagents.agent_toolkits.spark.base.create_spark_dataframe_agent(llm,\xa0df)\nConstruct a Spark agent from an LLM and dataframe.\nagents.a