## Simple Gen AI APP Using Langchain

In [9]:
import os
from dotenv import load_dotenv
load_dotenv()

True

In [10]:
os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY')
## Langsmith Tracking
os.environ['LANGCHAIN_API_KEY'] = os.getenv('LANGCHAIN_API_KEY')
os.environ['LANGCHAIN_TRACING_V2'] = 'true'
os.environ['LANGCHAIN_PROJECT'] = os.getenv('LANGCHAIN_PROJECT')

#### 1 - Load Data - Neste caso um página web

In [11]:
## Data Ingestion -- From the website we need to scrape the data
from langchain_community.document_loaders import WebBaseLoader


In [12]:
loader = WebBaseLoader('https://docs.smith.langchain.com/prompt_engineering/concepts')
loader

<langchain_community.document_loaders.web_base.WebBaseLoader at 0x73fde2b95e70>

In [13]:
docs = loader.load()
docs

[Document(metadata={'source': 'https://docs.smith.langchain.com/prompt_engineering/concepts', 'title': 'Concepts | 🦜️🛠️ LangSmith', 'description': 'Prompt engineering is one the core pillars of LangSmith.', 'language': 'en'}, page_content='\n\n\n\n\nConcepts | 🦜️🛠️ LangSmith\n\n\n\n\n\n\n\n\nSkip to main contentWe are growing and hiring for multiple roles for LangChain, LangGraph and LangSmith. Join our team!API ReferenceRESTPythonJS/TSSearchRegionUSEUGo to AppGet StartedObservabilityEvaluationPrompt EngineeringQuickstartsTutorialsOptimize a classifierHow-to GuidesCreate a promptRun the playground against a custom LangServe model serverRun the playground against an OpenAI-compliant model provider/proxyUpdate a promptManage prompts programmaticallyManaging Prompt SettingsPrompt TagsOpen a prompt from a traceLangChain HubPrompt CanvasInclude multimodal content in a promptHow to use multiple messages in the playgroundConceptual GuideDeployment (LangGraph Platform)AdministrationSelf-hostin

#### Split - (Transforma os dados em chunks)

In [14]:
## Primeiro fizemos o carregamento dos dados (neste caso uma página web)
## Agora faremos aqui a divisão do texto em chunks
## Depois faremos o Embedding, que transforma os textos divididos em vetores (Vector Embedding)
## A sequência é: Load => Split (text into chunks) => Embed => Store (Vector Store DB)

from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap = 200)
documents = text_splitter.split_documents(docs)
documents


[Document(metadata={'source': 'https://docs.smith.langchain.com/prompt_engineering/concepts', 'title': 'Concepts | 🦜️🛠️ LangSmith', 'description': 'Prompt engineering is one the core pillars of LangSmith.', 'language': 'en'}, page_content='Concepts | 🦜️🛠️ LangSmith'),
 Document(metadata={'source': 'https://docs.smith.langchain.com/prompt_engineering/concepts', 'title': 'Concepts | 🦜️🛠️ LangSmith', 'description': 'Prompt engineering is one the core pillars of LangSmith.', 'language': 'en'}, page_content='Skip to main contentWe are growing and hiring for multiple roles for LangChain, LangGraph and LangSmith. Join our team!API ReferenceRESTPythonJS/TSSearchRegionUSEUGo to AppGet StartedObservabilityEvaluationPrompt EngineeringQuickstartsTutorialsOptimize a classifierHow-to GuidesCreate a promptRun the playground against a custom LangServe model serverRun the playground against an OpenAI-compliant model provider/proxyUpdate a promptManage prompts programmaticallyManaging Prompt SettingsPro

#### Embed - Usamos a técnica de Embeddings (OpenAIEmbeddings)

In [15]:
from langchain_openai import OpenAIEmbeddings
embeddings = OpenAIEmbeddings()


#### Store - Vector Data Base (Vector DB)

In [16]:
from langchain_community.vectorstores import FAISS
vectorstoredb = FAISS.from_documents(documents, embeddings)

In [17]:
vectorstoredb

<langchain_community.vectorstores.faiss.FAISS at 0x73fd9fe24ca0>

### Query From a vector DB

In [21]:
query = "A prompt sets the stage for the model, like an audience member at an improv show directing the actor's next performance"
result = vectorstoredb.similarity_search(query)
result[0].page_content

'Prompt engineering is one the core pillars of LangSmith.\nWhile traditional software application are built by writing code, AI applications often involve a good amount of writing prompts.\nWe aim to make this as easy possible by providing a set of tools designed to enable and facilitate prompt engineering.\nWhy prompt engineering?\u200b\nA prompt sets the stage for the model, like an audience member at an improv show directing the actor\'s next performance - it guides the model\'s\nbehavior without changing its underlying capabilities. Just as telling an actor to "be a pirate" determines how they act,\na prompt provides instructions, examples, and context that shape how the model responds.\nPrompt engineering is important because it allows you to change the way the model behaves.\nWhile there are other ways to change the model\'s behavior (like fine-tuning), prompt engineering is usually the simplest to get started with\nand often provides the highest ROI.'

### Retrieval Chain, Document Chain

In [None]:
from langchain_openai import ChatOpenAI
llm = ChatOpenAI(model = 'gpt-4o')

In [23]:
## Create a Chain for passing a list of Documents to a model
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

prompt = ChatPromptTemplate.from_template(
    """
    Answer the following question based only on the provided context:
    <context>
    {context}
    </context>
    """
)

document_chain = create_stuff_documents_chain(llm, prompt)
document_chain

RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableLambda(format_docs)
}), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
| ChatPromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template='\n    Answer the following question based only on the provided context:\n    <context>\n    {context}\n    </context>\n    '), additional_kwargs={})])
| ChatOpenAI(client=<openai.resources.chat.completions.completions.Completions object at 0x73fd9ad8e0e0>, async_client=<openai.resources.chat.completions.completions.AsyncCompletions object at 0x73fd9ad8d3f0>, root_client=<openai.OpenAI object at 0x73fda02b4400>, root_async_client=<openai.AsyncOpenAI object at 0x73fd9ad8d420>, model_name='gpt-4o', model_kwargs={}, openai_api_key=SecretStr('**********'))
| StrOutputParser(), kwargs={}, confi

In [24]:
from langchain_core.documents import Document

document_chain.invoke({
    "input": "A prompt sets the stage for the model, like an audience member at an improv show directing the actor's next performance",
    "context": [Document(page_content="A prompt sets the stage for the model, like an audience member at an improv show directing the actor's next performance - it guides the model's behavior without changing its underlying capabilities")]
})

"A prompt influences the model's behavior by guiding its responses, similar to how an audience member at an improv show suggests the direction for the actor's next performance, but it does not alter the model's fundamental capabilities."

However, we want the documents to first come from the retriever we just set up. That way, we can use the retriever to dynamically select the most relevant documents and pass those in for a given question.

In [25]:
## Return VectorStoreRetriever initialized from this VectorStore
retriever = vectorstoredb.as_retriever()

In [26]:
from langchain.chains import create_retrieval_chain
retrieval_chain = create_retrieval_chain(retriever, document_chain)

In [27]:
retrieval_chain

RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableBinding(bound=RunnableLambda(lambda x: x['input'])
           | VectorStoreRetriever(tags=['FAISS', 'OpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x73fd9fe24ca0>, search_kwargs={}), kwargs={}, config={'run_name': 'retrieve_documents'}, config_factories=[])
})
| RunnableAssign(mapper={
    answer: RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
              context: RunnableLambda(format_docs)
            }), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
            | ChatPromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template='\n    Answer the following question based only on the provided context:\n    <context>\n    {context}\n    </context>\n    '), additional_kwargs={})])
  

In [28]:
## Get the response from the LLM
response = retrieval_chain.invoke({"input": "A prompt sets the stage for the model, like an audience member at an improv show directing the actor's next performance"})
response['answer']

"What is the importance of prompt engineering in AI applications according to the provided context?\n\nPrompt engineering is crucial because it allows for altering the behavior of the model without changing its underlying capabilities. It's compared to giving directions to an actor, where the prompt sets the stage for the model and guides its responses through instructions, examples, and context. While there are other methods like fine-tuning that can change a model's behavior, prompt engineering is often the simplest to start with and can provide the highest return on investment (ROI)."

In [29]:
response

{'input': "A prompt sets the stage for the model, like an audience member at an improv show directing the actor's next performance",
 'context': [Document(id='b80d37eb-837d-4034-8add-4242bbf6b7bf', metadata={'source': 'https://docs.smith.langchain.com/prompt_engineering/concepts', 'title': 'Concepts | 🦜️🛠️ LangSmith', 'description': 'Prompt engineering is one the core pillars of LangSmith.', 'language': 'en'}, page_content='Prompt engineering is one the core pillars of LangSmith.\nWhile traditional software application are built by writing code, AI applications often involve a good amount of writing prompts.\nWe aim to make this as easy possible by providing a set of tools designed to enable and facilitate prompt engineering.\nWhy prompt engineering?\u200b\nA prompt sets the stage for the model, like an audience member at an improv show directing the actor\'s next performance - it guides the model\'s\nbehavior without changing its underlying capabilities. Just as telling an actor to "b

In [30]:
response['context']

[Document(id='b80d37eb-837d-4034-8add-4242bbf6b7bf', metadata={'source': 'https://docs.smith.langchain.com/prompt_engineering/concepts', 'title': 'Concepts | 🦜️🛠️ LangSmith', 'description': 'Prompt engineering is one the core pillars of LangSmith.', 'language': 'en'}, page_content='Prompt engineering is one the core pillars of LangSmith.\nWhile traditional software application are built by writing code, AI applications often involve a good amount of writing prompts.\nWe aim to make this as easy possible by providing a set of tools designed to enable and facilitate prompt engineering.\nWhy prompt engineering?\u200b\nA prompt sets the stage for the model, like an audience member at an improv show directing the actor\'s next performance - it guides the model\'s\nbehavior without changing its underlying capabilities. Just as telling an actor to "be a pirate" determines how they act,\na prompt provides instructions, examples, and context that shape how the model responds.\nPrompt engineerin