In [1]:
import os
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv())
openai_api_key = os.environ["OPENAI_API_KEY"]

In [1]:
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import CharacterTextSplitter
from langchain_community.document_loaders import TextLoader

In [2]:
loader = TextLoader("./Data/dswithbappy_data.txt")
loaded_document = loader.load()

In [3]:
loaded_document

[Document(metadata={'source': './Data/dswithbappy_data.txt'}, page_content='dswithbappy focuses on providing content on Data Science, Artificial Intelligence, Machine Learning, \nDeep Learning, Computer Vision, Natural language processing, Python programming, etc. in English. \n"Real creativity won\'t make things more complex. Instead, I will simplify them." \nI have been working in the Data Science field, spearheading in machine learning, \ndeep learning, and computer vision. Typically this is my YouTube channel where I clarify \ndifferent themes on machine learning, deep learning, and AI with numerous real-world issue scenarios. \nMy primary point is to create everybody commonplace of ML and AI. Please subscribe and support the channel. \nAs I come up with new innovations. These recordings are free and I guarantee to create more curiously substance as we go ahead.\n😀Please donate if you want to support the channel through Buy me a coffee: https://www.buymeacoffee.com/dswithbappy\n\nP

In [4]:
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
chunks_of_text = text_splitter.split_documents(loaded_document)

In [5]:
len(chunks_of_text)

1

In [6]:
embeddings = OpenAIEmbeddings()

vector_db = FAISS.from_documents(chunks_of_text, embeddings)

In [7]:
retriever = vector_db.as_retriever()

In [8]:
response = retriever.invoke("what is dswithbappy?")

In [9]:
response

[Document(id='1ad2b2ad-3ab7-4aa4-8a04-9c775ce5e51c', metadata={'source': './Data/dswithbappy_data.txt'}, page_content='dswithbappy focuses on providing content on Data Science, Artificial Intelligence, Machine Learning, \nDeep Learning, Computer Vision, Natural language processing, Python programming, etc. in English. \n"Real creativity won\'t make things more complex. Instead, I will simplify them." \nI have been working in the Data Science field, spearheading in machine learning, \ndeep learning, and computer vision. Typically this is my YouTube channel where I clarify \ndifferent themes on machine learning, deep learning, and AI with numerous real-world issue scenarios. \nMy primary point is to create everybody commonplace of ML and AI. Please subscribe and support the channel. \nAs I come up with new innovations. These recordings are free and I guarantee to create more curiously substance as we go ahead.\n😀Please donate if you want to support the channel through Buy me a coffee: ht

## Simple use with LCEL

In [10]:
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

In [11]:
template = """Answer the question based only on the following context:

{context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

model = ChatOpenAI()

In [12]:
def format_docs(docs):
    return "\n\n".join([d.page_content for d in docs])


chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | model
    | StrOutputParser()
)


In [13]:
response = chain.invoke("what is dswithbappy?")

In [14]:
response

'dswithbappy is a YouTube channel focused on providing content on Data Science, Artificial Intelligence, Machine Learning, Deep Learning, Computer Vision, Natural language processing, Python programming, etc. The creator of the channel works in the Data Science field and aims to simplify complex topics in ML and AI for a wider audience. They also accept donations through Buy me a coffee and are open to collaborations and item unboxing.'

In [15]:
type(response)

str