Simple Gen AI application using Langchain

In [7]:
import os
from dotenv import load_dotenv
load_dotenv()

os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_PROJECT"] = "first-gen-ai-app"



Data Ingestion 

In [8]:
## Data Ingestion -- From the website we need to scrape the data
from langchain_community.document_loaders import WebBaseLoader

loader=WebBaseLoader("https://docs.smith.langchain.com/prompt_engineering/concepts")
docs=loader.load()


1. Load Data
2. Split the data to Chunks
3. Convert these chunks to Vectors using Embedding techniques
4. Vector storage to vector DBs (like FAISS, Chroma, Astra DB)

In [9]:
## Data Splitting -- Split the data into smaller chunks
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200
)
documents=text_splitter.split_documents(docs)
documents

[Document(metadata={'source': 'https://docs.smith.langchain.com/prompt_engineering/concepts', 'title': 'Concepts | ğŸ¦œï¸�ğŸ›\xa0ï¸� LangSmith', 'description': 'Prompt engineering is one the core pillars of LangSmith.', 'language': 'en'}, page_content='Concepts | ğŸ¦œï¸�ğŸ›\xa0ï¸� LangSmith'),
 Document(metadata={'source': 'https://docs.smith.langchain.com/prompt_engineering/concepts', 'title': 'Concepts | ğŸ¦œï¸�ğŸ›\xa0ï¸� LangSmith', 'description': 'Prompt engineering is one the core pillars of LangSmith.', 'language': 'en'}, page_content='Skip to main contentOur Building Ambient Agents with LangGraph course is now available on LangChain Academy!API ReferenceRESTPythonJS/TSSearchRegionUSEUGo to AppGet StartedObservabilityEvaluationPrompt EngineeringQuickstartsTutorialsOptimize a classifierSync Prompts with GitHubHow-to GuidesCreate a promptRun the playground against a custom LangServe model serverRun the playground against an OpenAI-compliant model provider/proxyUpdate a promptManage

Convert these Chunks to Vectors using Embedding techniques

In [10]:
from langchain.embeddings import OpenAIEmbeddings
embeddings=OpenAIEmbeddings()

In [11]:
# Using Embeddings to convert the documents to vectors and
#  store them in a vector database

from langchain_community.vectorstores import FAISS
vectorstoredb=FAISS.from_documents(documents, embeddings)

In [12]:
## Now you can use the `vectorstoredb` to perform similarity searches 
# or other operations as needed.
query = "what is is a key part of iterating and collaborating on your different prompts?"
result=vectorstoredb.similarity_search(query)
print(result)

[Document(id='440f7cdb-924e-48b2-b830-e8cebda07be0', metadata={'source': 'https://docs.smith.langchain.com/prompt_engineering/concepts', 'title': 'Concepts | ğŸ¦œï¸�ğŸ›\xa0ï¸� LangSmith', 'description': 'Prompt engineering is one the core pillars of LangSmith.', 'language': 'en'}, page_content='Prompt Versioningâ€‹\nVerisioning is a key part of iterating and collaborating on your different prompts.\nCommitsâ€‹\nEvery saved update to a prompt creates a new commit. You can view previous commits, making it easy to review earlier prompt versions or revert to a previous state if needed. In the SDK, you can access a specific commit of a prompt by specifying the commit hash along with the prompt name (e.g. prompt_name:commit_hash).\nIn the UI, you can compare a commit with its previous version by toggling the "diff" button in the top-right corner of the Commits tab.'), Document(id='7bb6a1fe-7d4c-48b5-8a45-ac80c3ff0b8a', metadata={'source': 'https://docs.smith.langchain.com/prompt_engineering/