# Install necessary packages

In [None]:
! pip install pypdf
! pip install sentence-transformers==2.2.2

In [None]:
! pip install langchain-openai
! pip install tiktoken
! pip install faiss-cpu

In [None]:
! pip install -U langchain-community

# Load the Data

In [4]:
# Import the PDF loader from langchain_community
from langchain_community.document_loaders import PyPDFDirectoryLoader

In [5]:
# Load PDF documents from a specified directory
loader = PyPDFDirectoryLoader("llm")

data = loader.load() # Load all PDF documents

In [6]:
# Display loaded data
data

[Document(page_content='Generative AI ', metadata={'source': 'llm/Generative AI.pdf', 'page': 0}),
 Document(page_content='What you will learn? \n●Generative AI? \n●Large Language Models (LLMs) \n●OpenAI \n●Langchain \n●Vector Database \n●Llama Index \n●Open Source LLM model \n●End to End Project ', metadata={'source': 'llm/Generative AI.pdf', 'page': 1}),
 Document(page_content='Generative AI \n●ChatGPT \n●Google Bard \n●Meta Llama 2 ', metadata={'source': 'llm/Generative AI.pdf', 'page': 2}),
 Document(page_content='What is Generative AI? \nGenerative AI generate new data based on training sample.Generative model \ncan generate Image,Text, Audio, Videos etc. data as output. \nSo generative AI is a very huge topics, \n-Generative Image model \n-Generative Language model ', metadata={'source': 'llm/Generative AI.pdf', 'page': 3}),
 Document(page_content='Generative Model: \nQuestions R esponses ', metadata={'source': 'llm/Generative AI.pdf', 'page': 4}),
 Document(page_content='Where G

In [7]:
# Import the text splitter
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [8]:
# Split the documents into chunks for embedding
text_split = RecursiveCharacterTextSplitter(chunk_size=500,chunk_overlap=20)
chunks = text_split.split_documents(data)

In [9]:
# Display the number of chunks
len(chunks)

15

In [10]:
# the first chunk
chunks[0]

Document(page_content='Generative AI', metadata={'source': 'llm/Generative AI.pdf', 'page': 0})

# Generate Embedding

In [11]:
# Import the HuggingFace embedding model
from langchain.embeddings import HuggingFaceEmbeddings

In [None]:
# Initialize the embedding model
embedding = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# Setting up the Vector Database

In [13]:
# Import FAISS vector store from langchain
from langchain.vectorstores import FAISS

In [14]:
# Create a FAISS vector store from the document chunks and embeddings
vectorstore = FAISS.from_documents(chunks,embedding)

In [15]:
# Display the vector store
vectorstore

<langchain_community.vectorstores.faiss.FAISS at 0x7b7bc5102e00>

In [16]:
# Perform a similarity search on the vector store
query = "What is LLM?"
docs = vectorstore.similarity_search(query,k=3)  # Retrieve top 3 relevant documents
print(docs)

[Document(page_content='What is LLMs? \nLarge Language Models (LLMs) are foundational machine learning models that use deep learning  \nalgorithms to process and understand natural language. These models are trained on massive amounts  \nof text data to learn patterns and entity relationships in the language.  \nIt is a language  model which is responsible for performing task such as text to text generation  , text to  \nimage generation  and image to text generations .', metadata={'source': 'llm/Generative AI.pdf', 'page': 10}), Document(page_content='Why LLM so Powerful? \n●Train the model for a speciﬁc task', metadata={'source': 'llm/Generative AI.pdf', 'page': 14}), Document(page_content='What makes LLM so Powerful? \n●In case of LLM, one model can be used for a whole variety of tasks like:- \nText generation, Chatbot, summarizer, translation, code generation \n& so on … \nSo, LLM is subset of Deep Learning & it has some properties merge with \nGenerative AI', metadata={'source': '

In [17]:
# Print the retrieved documents
len(docs)

3

# Setting Up the Environment

In [18]:
# Import userdata from google.colab to retrieve OpenAI API key
from google.colab import userdata

# Retrieve OpenAI API key from user data
OPEN_AI_KEY = userdata.get('OPEN_AI_KEY')

In [19]:
import os


# Set the OpenAI API key as an environment variable
os.environ['OPENAI_API_KEY'] = OPEN_AI_KEY

# Setting up LLM

In [20]:
# Import OpenAI from langchain_openai
from langchain_openai import OpenAI

In [21]:
# Initialize the OpenAI language model
llm = OpenAI()

# Creating a Chain

In [22]:
# Import RetrievalQA chain from langchain
from langchain.chains import RetrievalQA

In [23]:
# Set up a retrieval-based QA system
chain_llm = RetrievalQA.from_chain_type(llm,chain_type="stuff",retriever=vectorstore.as_retriever())

# Getting Response

In [24]:
# Perform a QA query using the chain
chain_llm.invoke("What is llm")

{'query': 'What is llm',
 'result': ' LLM stands for Large Language Models and it is a powerful machine learning model that is trained on massive amounts of text data to understand natural language and perform various tasks such as text to text generation, chatbot, summarization, translation, and code generation. LLM is a subset of deep learning and is a type of generative AI that does not require labeled data during training. '}