In [1]:
import os
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv())
openai_api_key = os.environ["OPENAI_API_KEY"]

## Basic App: Question & Answering from Document

In [2]:
# from langchain_openai import OpenAI
from langchain_openai import ChatOpenAI

In [3]:
MODEL_GPT = 'gpt-4o-mini'

In [4]:
# llm = OpenAI()
# llm = ChatOpenAI()
llm = ChatOpenAI(model=MODEL_GPT)

### Load text file

In [5]:
from langchain.document_loaders import TextLoader

In [6]:
# loader = TextLoader("data/be-good-and-how-not-to-die.txt")
loader = TextLoader("../../data/be-good-and-how-not-to-die.txt")

In [7]:
document = loader.load()

### Document is loaded as Python list with metadata

In [8]:
print(type(document))

<class 'list'>


In [9]:
print(len(document))

1


In [10]:
print(document[0].metadata)

{'source': '../../data/be-good-and-how-not-to-die.txt'}


In [11]:
print(f"You have {len(document)} document.")

You have 1 document.


In [12]:
print(f"Your document has {len(document[0].page_content)} characters")

Your document has 27423 characters


### Split document in small chunks

In [13]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [14]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=3000,
    chunk_overlap=400
)

In [15]:
document_chunks = text_splitter.split_documents(document)

In [16]:
print(f"Now you have {len(document_chunks)} chunks.")

Now you have 12 chunks.


### Convert text chunks in numeric vectors (called "embeddings")

In [17]:
# from langchain.embeddings.openai import OpenAIEmbeddings
from langchain_openai import OpenAIEmbeddings

In [18]:
embeddings = OpenAIEmbeddings()

### Load embeddings to vector database

In [19]:
from langchain.vectorstores import FAISS

In [20]:
stored_embeddings = FAISS.from_documents(document_chunks, embeddings)

### Create Retrieval Question & Answering Chain

In [21]:
from langchain.chains import RetrievalQA

In [22]:
QA_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=stored_embeddings.as_retriever()
)

### Now we have Question & Answering App

In [23]:
question = """
What is this article about? 
Describe it in less than 100 words.
"""

In [24]:
# QA_chain.run(question)
QA_chain.invoke(question)

{'query': '\nWhat is this article about? \nDescribe it in less than 100 words.\n',
 'result': "The article discusses key insights from Y Combinator's experiences with startups, emphasizing the importance of creating something people want and not overly worrying about the business model initially. It highlights that many successful startups resemble charities in their early stages, focusing on user needs. The author also stresses that a significant factor in startup success is persistence, advising founders to avoid giving up, as many challenges and setbacks are inevitable. Ultimately, the piece serves as a motivational guide for entrepreneurs navigating the startup landscape."}

## New way: with LCEL

In [25]:
from langchain.prompts import PromptTemplate

template = """Answer the question based only on the following context:
{context}

Question: {question}
"""
prompt = PromptTemplate.from_template(template)

In [26]:
retriever = stored_embeddings.as_retriever()

In [27]:
from langchain.schema.output_parser import StrOutputParser
from langchain.schema.runnable import RunnablePassthrough

In [28]:
chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [29]:
chain.invoke("What is this article about? Describe it in less than 100 words.")

'The article discusses key insights and lessons from Y Combinator regarding startup success and failure. It emphasizes the importance of creating something people want while not overly focusing on immediate profits, suggesting that successful startups often operate with a charitable mindset. It also highlights the significance of commitment and public accountability in motivating founders to persevere through challenges. Moreover, it outlines strategies for avoiding failure, such as maintaining communication and adaptability, while acknowledging that encountering difficulties is a common aspect of the startup journey.'