In [None]:
from dotenv import load_dotenv

load_dotenv()

In [10]:
from langchain import hub
from langchain_community.document_loaders import DirectoryLoader
from langchain_community.vectorstores import Chroma
from langchain_community.document_loaders import TextLoader
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings, ChatOpenAI

In [11]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

Creating loader and loading documents from directory

In [12]:
loader = DirectoryLoader("./lang_chain_docs_2024", glob="**/*.txt", show_progress=True, recursive=True,
                         silent_errors=True, loader_cls=TextLoader)
documents = loader.load()
print("First document:", documents[0])

100%|██████████████████████████████████████████████████████████████████████████████████| 28/28 [00:00<00:00, 122.15it/s]

First document: page_content='{\n "cells": [\n  {\n   "cell_type": "raw",\n   "id": "366a0e68-fd67-4fe5-a292-5c33733339ea",\n   "metadata": {},\n   "source": [\n    "---\\n",\n    "sidebar_position: 0\\n",\n    "title: Get started\\n",\n    "keywords: [chain.invoke]\\n",\n    "---"\n   ]\n  },\n  {\n   "cell_type": "markdown",\n   "id": "befa7fd1",\n   "metadata": {},\n   "source": [\n    "LCEL makes it easy to build complex chains from basic components, and supports out of the box functionality such as streaming, parallelism, and logging."\n   ]\n  },\n  {\n   "cell_type": "markdown",\n   "id": "9a9acd2e",\n   "metadata": {},\n   "source": [\n    "## Basic example: prompt + model + output parser\\n",\n    "\\n",\n    "The most basic and common use case is chaining a prompt template and a model together. To see how this works, let\'s create a chain that takes a topic and generates a joke:"\n   ]\n  },\n  {\n   "cell_type": "raw",\n   "id": "278b0027",\n   "metadata": {},\n   "source": 




Splitting documents into chunks

In [13]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(documents)

Creating and indexing into a vectorstore (ChromaDB)

In [14]:
vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())

Creating retriever from vectorstore

In [15]:
retriever = vectorstore.as_retriever()

Setting up a prompt template

In [16]:
prompt = hub.pull("rlm/rag-prompt")
print(prompt)

input_variables=['context', 'question'] messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:"))]


Connecting to OpenAI LLM API...

In [17]:
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)

Creating RAG chain...

In [18]:
rag_chain = (
        {"context": retriever | format_docs, "question": RunnablePassthrough()}
        | prompt
        | llm
        | StrOutputParser()
)

Invoking RAG...

In [19]:
result = rag_chain.invoke("In LangChain, how can I get the graph"
                          " of a chain and print it as ascii?")
print(result)

To get the graph of a chain in LangChain, you can use the command "chain.get_graph()". To print the graph as ASCII, you can use the command "chain.get_graph().print_ascii()".


In [20]:
rag_chain.get_graph().print_ascii()

             +---------------------------------+           
             | Parallel<context,question>Input |           
             +---------------------------------+           
                    ****               ****                
                 ***                       ***             
               **                             ***          
+----------------------+                         **        
| VectorStoreRetriever |                          *        
+----------------------+                          *        
            *                                     *        
            *                                     *        
            *                                     *        
+---------------------+                   +-------------+  
| Lambda(format_docs) |                   | Passthrough |  
+---------------------+                   +-------------+  
                    ****               ***                 
                        ***         *** 

In [21]:
vectorstore.delete_collection()