**Make sure you load the API keys for cloud providers!**

You can set your environment keys yourself or use a script. Please note that since keys are private, they are not included in the repository.

In [1]:
# setting the environment variables, the keys
import sys
import os

sys.path.insert(0, os.path.abspath('..'))

from config import set_environment
# for the keys - as explained early in chapter 2
set_environment()

# Basic RAG Implementation

In [4]:
# For query transformation
from langchain.prompts import PromptTemplate
from langchain_openai import ChatOpenAI
from langchain_core.output_parsers import StrOutputParser

# For basic RAG implementation
from langchain_community.document_loaders import JSONLoader
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS

# 1. Load documents
loader = JSONLoader(
    file_path="knowledge_base.json",
    jq_schema=".[].content",  # This extracts the content field from each array item
    text_content=True
)
documents = loader.load()

# 2. Convert to vectors
embedder = OpenAIEmbeddings()
embeddings = embedder.embed_documents([doc.page_content for doc in documents])

# 3. Store in vector database
vector_db = FAISS.from_documents(documents, embedder)

# 4. Retrieve similar docs
query = "What are the effects of climate change?"
results = vector_db.similarity_search(query)

In [5]:
print(results)

[Document(id='6153c01c-db7c-40a3-9eaa-b49b270e8a20', metadata={'source': '/home/ben/generative_ai_with_langchain/chapter4/knowledge_base.json', 'seq_num': 1}, page_content="Transformer models were introduced in the paper 'Attention Is All You Need' by Vaswani et al. in 2017. The architecture relies on self-attention mechanisms rather than recurrent or convolutional neural networks. This design allows for more parallelization during training and better handling of long-range dependencies in text."), Document(id='131c5049-2858-4562-b331-c3492f92ed15', metadata={'source': '/home/ben/generative_ai_with_langchain/chapter4/knowledge_base.json', 'seq_num': 4}, page_content='Retrieval-Augmented Generation (RAG) combines a retrieval system with a text generator. The retriever fetches relevant documents from a knowledge base, and these documents are then provided as context to the generator. RAG models can be fine-tuned end-to-end and leverage large pre-trained models like BART or T5 for generat

# KNN Retriever

In [6]:
from langchain_community.retrievers import KNNRetriever
from langchain_openai import OpenAIEmbeddings

retriever = KNNRetriever.from_documents(documents, OpenAIEmbeddings())
results = retriever.invoke("query")

# External Search API Retriever

In [8]:
from langchain_community.retrievers.pubmed import PubMedRetriever

retriever = PubMedRetriever()
results = retriever.invoke("COVID research")