In [2]:
import os
from dotenv import load_dotenv
load_dotenv()

True

In [3]:
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
os.environ["LANGCHAIN_API_KEY"] = os.getenv("LANGCHAIN_API_KEY")
os.environ["LANGCHAIN_TRACING_V2"] = os.getenv("LANGCHAIN_TRACING_V2")
os.environ["LANGCHAIN_PROJECT"] = os.getenv("LANGCHAIN_PROJECT")
os.environ['USER_AGENT'] = 'myagent'

In [4]:
## Data Ingestion
DATA_URL = "https://www.investopedia.com/updates/top-developing-countries/"

from langchain_community.document_loaders import WebBaseLoader
loader = WebBaseLoader(DATA_URL)
docs = loader.load()

In [5]:
## Data Splitting

from langchain_text_splitters import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
documents = text_splitter.split_documents(docs)

In [6]:
## Vector Embedding

from langchain_openai import OpenAIEmbeddings
embeddings = OpenAIEmbeddings()

embeddings

OpenAIEmbeddings(client=<openai.resources.embeddings.Embeddings object at 0x00000200855A4C20>, async_client=<openai.resources.embeddings.AsyncEmbeddings object at 0x00000200855A5550>, model='text-embedding-ada-002', dimensions=None, deployment='text-embedding-ada-002', openai_api_version=None, openai_api_base=None, openai_api_type=None, openai_proxy=None, embedding_ctx_length=8191, openai_api_key=SecretStr('**********'), openai_organization=None, allowed_special=None, disallowed_special=None, chunk_size=1000, max_retries=2, request_timeout=None, headers=None, tiktoken_enabled=True, tiktoken_model_name=None, show_progress_bar=False, model_kwargs={}, skip_empty=False, default_headers=None, default_query=None, retry_min_seconds=4, retry_max_seconds=20, http_client=None, http_async_client=None, check_embedding_ctx_length=True)

In [7]:
## Save embedding to a Vector DB

from langchain_community.vectorstores import FAISS
vector_store_db = FAISS.from_documents(documents, embeddings)
vector_store_db

RateLimitError: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}

In [None]:
## Query From a vector db
query="The country's average life expectancy at birth is 82.3 years"
result=vector_store_db.similarity_search(query)
result[0].page_content

In [9]:
## Use LLM
from langchain_openai import ChatOpenAI
llm=ChatOpenAI(model="gpt-4o")

In [10]:
## Retrieval Chain, Document chain
## Just querying is not enough, we need results based on the context, so we go for Retrievel Chain
## Context + Input = Prompt and chain it to LLM

from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

prompt=ChatPromptTemplate.from_template(
    """
Answer the following question based only on the provided context:
<context>
{context}
</context>


"""
)

document_chain=create_stuff_documents_chain(llm,prompt)
document_chain

RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableLambda(format_docs)
}), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
| ChatPromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template='\nAnswer the following question based only on the provided context:\n<context>\n{context}\n</context>\n\n\n'), additional_kwargs={})])
| ChatOpenAI(client=<openai.resources.chat.completions.completions.Completions object at 0x000002009D97E900>, async_client=<openai.resources.chat.completions.completions.AsyncCompletions object at 0x000002009D97ECF0>, root_client=<openai.OpenAI object at 0x00000200A0075090>, root_async_client=<openai.AsyncOpenAI object at 0x00000200A00756D0>, model_name='gpt-4o', model_kwargs={}, openai_api_key=SecretStr('**********'))
| StrOutputParser(), kwargs={}, confi

In [11]:
# Retrieval Chain
retriever=vector_store_db.as_retriever()
from langchain.chains import create_retrieval_chain
retrieval_chain=create_retrieval_chain(retriever,document_chain)

NameError: name 'vector_store_db' is not defined

In [None]:
## Get the response form the LLM
response=retrieval_chain.invoke({"input":"Belgium is a developed country."})
response['answer']

response['context']