In [25]:
from dotenv import load_dotenv
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_chroma import Chroma
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import SKLearnVectorStore
from langchain_core.messages import HumanMessage, SystemMessage
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.tools import tool
from langchain_nomic.embeddings import NomicEmbeddings
from langchain_ollama.llms import OllamaLLM
from langchain_openai import ChatOpenAI, OpenAIEmbeddings

load_dotenv()

model = ChatOpenAI(model="gpt-4o-mini")

In [3]:
model.invoke("What is your name?")

AIMessage(content='I’m called ChatGPT. How can I assist you today?', response_metadata={'token_usage': {'completion_tokens': 13, 'prompt_tokens': 12, 'total_tokens': 25}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_0f03d4f0ee', 'finish_reason': 'stop', 'logprobs': None}, id='run-615823cb-71bf-4a42-a408-111af66c089f-0', usage_metadata={'input_tokens': 12, 'output_tokens': 13, 'total_tokens': 25})

In [4]:
from langchain_core.documents import Document

documents = [
    Document(
        page_content="Dogs are great companions, known for their loyalty and friendliness.",
        metadata={"source": "mammal-pets-doc"},
    ),
    Document(
        page_content="Cats are independent pets that often enjoy their own space.",
        metadata={"source": "mammal-pets-doc"},
    ),
    Document(
        page_content="Goldfish are popular pets for beginners, requiring relatively simple care.",
        metadata={"source": "fish-pets-doc"},
    ),
    Document(
        page_content="Parrots are intelligent birds capable of mimicking human speech.",
        metadata={"source": "bird-pets-doc"},
    ),
    Document(
        page_content="Rabbits are social animals that need plenty of space to hop around.",
        metadata={"source": "mammal-pets-doc"},
    ),
]

In [5]:
documents

[Document(metadata={'source': 'mammal-pets-doc'}, page_content='Dogs are great companions, known for their loyalty and friendliness.'),
 Document(metadata={'source': 'mammal-pets-doc'}, page_content='Cats are independent pets that often enjoy their own space.'),
 Document(metadata={'source': 'fish-pets-doc'}, page_content='Goldfish are popular pets for beginners, requiring relatively simple care.'),
 Document(metadata={'source': 'bird-pets-doc'}, page_content='Parrots are intelligent birds capable of mimicking human speech.'),
 Document(metadata={'source': 'mammal-pets-doc'}, page_content='Rabbits are social animals that need plenty of space to hop around.')]

In [7]:
vectorstore = Chroma.from_documents(
    documents,
    embedding=OpenAIEmbeddings(),
)

In [10]:
doc_1 = Document(page_content="My computer is an ASUS.", metadata={"source": "memory"})

In [11]:
vectorstore.from_documents([doc_1], embedding=OpenAIEmbeddings())

<langchain_chroma.vectorstores.Chroma at 0x1761de0f490>

In [13]:
vectorstore.similarity_search("Asus")

[Document(metadata={'source': 'memory'}, page_content='My computer is an ASUS.'),
 Document(metadata={'source': 'fish-pets-doc'}, page_content='Goldfish are popular pets for beginners, requiring relatively simple care.'),
 Document(metadata={'source': 'mammal-pets-doc'}, page_content='Dogs are great companions, known for their loyalty and friendliness.'),
 Document(metadata={'source': 'bird-pets-doc'}, page_content='Parrots are intelligent birds capable of mimicking human speech.')]

In [15]:
vectorstore.similarity_search_with_score("ASUS")



[(Document(metadata={'source': 'memory'}, page_content='My computer is an ASUS.'),
  0.17501795291900635),
 (Document(metadata={'source': 'mammal-pets-doc'}, page_content='Dogs are great companions, known for their loyalty and friendliness.'),
  0.5251173377037048),
 (Document(metadata={'source': 'fish-pets-doc'}, page_content='Goldfish are popular pets for beginners, requiring relatively simple care.'),
  0.5368958115577698),
 (Document(metadata={'source': 'mammal-pets-doc'}, page_content='Rabbits are social animals that need plenty of space to hop around.'),
  0.5589426755905151)]

### Retrievers

In [16]:
from typing import List

from langchain_core.documents import Document
from langchain_core.runnables import RunnableLambda

retriever = RunnableLambda(vectorstore.similarity_search).bind(k=1)
retriever.batch(["cat", "shark"])

[[Document(metadata={'source': 'mammal-pets-doc'}, page_content='Cats are independent pets that often enjoy their own space.')],
 [Document(metadata={'source': 'fish-pets-doc'}, page_content='Goldfish are popular pets for beginners, requiring relatively simple care.')]]

In [19]:
retriever = vectorstore.as_retriever(
    search_type="similarity",
    search_kwargs={"k": 1},
)
retriever.batch(["Asus"])

[[Document(metadata={'source': 'memory'}, page_content='My computer is an ASUS.')]]

In [22]:
message = """
Answer this question using the provided context only

{question}

Context:
{context}
"""
prompt = ChatPromptTemplate.from_messages([("human", message)])

raq_chain = {"context": retriever, "question": RunnablePassthrough()} | prompt | model | StrOutputParser()

In [23]:
raq_chain.invoke("Tell me about cats!")

'Cats are independent pets that often enjoy their own space.'