In [None]:
%pip install langchain langchain_openai beautifulsoup4 langchain-community --upgrade

In [10]:
# Get the openai secret key:
import getpass

secret_key = getpass.getpass('Please enter your openai key:')

In [14]:
from bs4 import BeautifulSoup
from langchain_community.document_loaders import TextLoader
import requests
import os

# Get this file and save it locally:
url = "https://python.langchain.com/docs/tutorials/"

# Save it locally:
request = requests.get(url)

# Extract the text from the HTML:
soup = BeautifulSoup(request.text, 'html.parser')
text = soup.get_text()

resources_dir = "resources"
os.makedirs(resources_dir, exist_ok=True)

file_path = os.path.join(resources_dir, "Tutorials.md")
with open(file_path, "w", encoding="utf-8") as f:
    f.write(text)

loader = TextLoader(file_path)
docs = loader.load()
print(docs)

[Document(metadata={'source': 'resources\\Tutorials.md'}, page_content='\n\n\n\n\nTutorials | đź¦śď¸Źđź”— LangChain\n\n\n\n\n\n\nSkip to main contentJoin us at  Interrupt: The Agent AI Conference by LangChain on May 13 & 14 in San Francisco!IntegrationsAPI ReferenceMoreContributingPeopleError referenceLangSmithLangGraphLangChain HubLangChain JS/TSv0.3v0.3v0.2v0.1đź’¬SearchIntroductionTutorialsBuild a Question Answering application over a Graph DatabaseTutorialsBuild a simple LLM application with chat models and prompt templatesBuild a ChatbotBuild a Retrieval Augmented Generation (RAG) App: Part 2Build an Extraction ChainBuild an AgentTaggingBuild a Retrieval Augmented Generation (RAG) App: Part 1Build a semantic search engineBuild a Question/Answering system over SQL dataSummarize TextHow-to guidesHow-to guidesHow to use tools in a chainHow to use a vectorstore as a retrieverHow to add memory to chatbotsHow to use example selectorsHow to add a semantic layer over graph databaseHow to 

In [15]:
from langchain_core.documents import Document
[ Document(page_content='test', metadata={'test': 'test'}) ] 

[Document(metadata={'test': 'test'}, page_content='test')]

In [17]:
# Split the text into sentences:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    # Set a really small chunk size, just to show.
    chunk_size = 300,
    chunk_overlap  = 50,
    length_function = len,
    is_separator_regex = False,
)

final_docs = text_splitter.split_documents(loader.load())

In [18]:
len(final_docs)

42

In [19]:
from langchain_openai.chat_models import ChatOpenAI
from langchain.chains.summarize import load_summarize_chain

chat = ChatOpenAI(api_key=secret_key, model="gpt-4o-mini")

In [20]:
chain = load_summarize_chain(llm=chat, chain_type="map_reduce")
result = chain.invoke({
    "input_documents": final_docs,
})

print(result)

{'input_documents': [Document(metadata={'source': 'resources\\Tutorials.md'}, page_content='Tutorials | đź¦śď¸Źđź”— LangChain'), Document(metadata={'source': 'resources\\Tutorials.md'}, page_content='Skip to main contentJoin us at  Interrupt: The Agent AI Conference by LangChain on May 13 & 14 in San Francisco!IntegrationsAPI ReferenceMoreContributingPeopleError referenceLangSmithLangGraphLangChain HubLangChain JS/TSv0.3v0.3v0.2v0.1đź’¬SearchIntroductionTutorialsBuild a Question Answering'), Document(metadata={'source': 'resources\\Tutorials.md'}, page_content='a Question Answering application over a Graph DatabaseTutorialsBuild a simple LLM application with chat models and prompt templatesBuild a ChatbotBuild a Retrieval Augmented Generation (RAG) App: Part 2Build an Extraction ChainBuild an AgentTaggingBuild a Retrieval Augmented Generation (RAG) App:'), Document(metadata={'source': 'resources\\Tutorials.md'}, page_content='a Retrieval Augmented Generation (RAG) App: Part 1Build a se

In [27]:
display(result)

{'input_documents': [Document(metadata={'source': 'resources\\Tutorials.md'}, page_content='Tutorials | đź¦śď¸Źđź”— LangChain'),
  Document(metadata={'source': 'resources\\Tutorials.md'}, page_content='Skip to main contentJoin us at  Interrupt: The Agent AI Conference by LangChain on May 13 & 14 in San Francisco!IntegrationsAPI ReferenceMoreContributingPeopleError referenceLangSmithLangGraphLangChain HubLangChain JS/TSv0.3v0.3v0.2v0.1đź’¬SearchIntroductionTutorialsBuild a Question Answering'),
  Document(metadata={'source': 'resources\\Tutorials.md'}, page_content='a Question Answering application over a Graph DatabaseTutorialsBuild a simple LLM application with chat models and prompt templatesBuild a ChatbotBuild a Retrieval Augmented Generation (RAG) App: Part 2Build an Extraction ChainBuild an AgentTaggingBuild a Retrieval Augmented Generation (RAG) App:'),
  Document(metadata={'source': 'resources\\Tutorials.md'}, page_content='a Retrieval Augmented Generation (RAG) App: Part 1Buil

In [28]:
display(result["output_text"])

'The "Tutorials | LangChain" section offers a comprehensive collection of instructional resources aimed at helping users understand and implement LangChain, a framework for building language model-powered applications. The tutorials encompass setup, use cases, advanced features, and specific applications like Question Answering systems using Graph Databases, Retrieval Augmented Generation (RAG), and chatbot enhancements. Additional topics include managing LLM responses, data extraction techniques, and integrating with various tools and APIs. Users can also find guidelines for developing custom applications, migrating between versions, and utilizing tools like LangSmith for performance monitoring. Furthermore, resources are available for beginners, covering essential components and integrations to facilitate quick application development.'