In [16]:
from langchain_community.document_loaders import PyPDFLoader #To load the pdf document
from langchain_text_splitters import RecursiveCharacterTextSplitter #Perform chunking
from langchain_huggingface import HuggingFaceEndpoint ,ChatHuggingFace ,HuggingFaceEmbeddings#To initilazie the model
from dotenv import load_dotenv #Load environmental variables
load_dotenv()
import os
from langchain_community.vectorstores import FAISS #Faiss vectorstore
from langchain_core.prompts import PromptTemplate




## Step-1: Load the PDF Document

In [2]:
file_path = "Intro_about_AI.pdf" #Path to pdf file
loader = PyPDFLoader(file_path) #Load the file
docs= []
for doc in loader.lazy_load():
    docs.append(doc)

In [3]:
len(docs) #Page wise document

13

## Step-2: Perform Chunking using RecursiveCharacterTextSplitter

In [4]:
text_splitter = RecursiveCharacterTextSplitter(
    separators = ["\n\n","\n","."], #pripority based seperation 
    chunk_size = 1000,  
    chunk_overlap = 120,
    length_function = len
)

#Apply splitting/chunking
chunks = text_splitter.split_documents(docs)


In [None]:
len(chunks) #Total number of chunks formed

30

## Step-3: Load the LLM and Embedding Model

In [6]:
#Loading the embedding model from huggingface
embedding_model = HuggingFaceEmbeddings(model_name ="sentence-transformers/all-mpnet-base-v2")

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
#loading the llm model from hugging face
llm = HuggingFaceEndpoint(
    repo_id = "mistralai/Mistral-7B-Instruct-v0.3", #Mistral model
    temperature= 0.4,
    max_new_tokens= 200,  #Maximun number of tokens to be generated in output
)
model = ChatHuggingFace(llm=llm)

### Step-4: Creating a VectorStore

In [11]:
VECTOR_STORE_PATH = "faiss_index"  # Directory to save the vector store

# Check if vector store exists
if os.path.exists(VECTOR_STORE_PATH):
    print("Loading existing vector store...")
    vector_store = FAISS.load_local(VECTOR_STORE_PATH, embedding_model, allow_dangerous_deserialization=True)
else:
    print("Creating new vector store...")
    # Assuming 'chunks' contains your documents
    vector_store = FAISS.from_documents(
        documents=chunks,
        embedding=embedding_model
    )
    # Save the vector store locally
    vector_store.save_local(VECTOR_STORE_PATH)
    print(f"Vector store saved to {VECTOR_STORE_PATH}")

Creating new vector store...
Vector store saved to faiss_index


## Step-5: Creating a prompt template

In [18]:
prompt_template = PromptTemplate(
    template = """You are a Smart Chat Assistant who answers the user question , based on the context provided. Only answer based on the provided context .
    If you dont know the answer simply respond with I do not have answer to your question.
    'Context':
    {context}
    'Question':
    {input}""",
    input_variables=['context','input']
)

## Step-6: Creating a retriever component 

In [None]:
retriever = vector_store.as_retriever(
    search_type="mmr",  #Maximum marginal relevance
    search_kwargs={'k':6,'lambda_mult':0.4} # 'k' : number of similari documents to retrieve , 'lambda_mult': to retriever the diverse documents and reduce redundancy
    )

In [26]:
retriever.invoke("What is AI?")

[Document(id='de2125d3-0d61-4f5e-b40a-d27ac026b9f7', metadata={'producer': 'Skia/PDF m123 Google Docs Renderer', 'creator': 'PyPDF', 'creationdate': '', 'title': 'chapter_1_Introductions', 'source': 'Intro_about_AI.pdf', 'total_pages': 13, 'page': 4, 'page_label': '5'}, page_content='● CustomerExpectations:MeetingcustomerexpectationsforAIperformanceandusabilitycanbechallengingduetohighexpectationsandevolvingneeds.● Cybersecurity:AIsystemsmaybevulnerabletocyberattacks,requiringrobustsecuritymeasurestoprotectagainstthreats.\n1.3GoalsofAI\n● AI canbeachievedbyreadingthebehavior of humans andusingtheresultstodevelopintelligent systems. For example, theylearn,makedecisionsandactincertain situations. Observing humans while problem-solving in simple tasksandusingitsresultstodevelopintelligentsystems.● The overall research goal of artificial intelligence is to createtechnologythatallows computers and machines to work intelligently. Thegeneral problemofsimulating(orcreating)intelligenceisbroken

## Step-7: Creating a RAG_Chain / Pipeline

In [29]:
from langchain_core.runnables import RunnableParallel ,RunnableSequence, RunnablePassthrough

parallel_chain = RunnableParallel({
    'context' : retriever,
    'input' :RunnablePassthrough()
})

In [30]:
rag_chain = parallel_chain | prompt_template | model

In [36]:
result = rag_chain.invoke("What are the AI techniques discussed.")

In [37]:
print(result.content)

 The AI techniques discussed in the provided context are:

1. Planning: This technique involves intelligent agents being able to set goals and achieve them. It requires the ability to envision the future, make predictions about how actions will change the state of the world, and make choices that maximize the utility (or "value") of the options available.

2. Learning: This is a fundamental concept of AI research since its inception. It is the study of algorithms that automatically improve through experience. Unsupervised learning is the ability to find patterns in a stream of input. Supervised learning includes both classification and numerical regression.

3. Reactive machines: These are the most basic form of AI applications. Examples of reactive machines are DeepBlue, IBM's chess-playing supercomputer, and the same computer that defeated the then-grandmaster of the world. AI teams do not use training sets to feed these machines or store subsequent data for future references. Based 

In [38]:
retriever.invoke("What are the AI techniques discussed.")

[Document(id='1c60aa87-0813-4585-9070-3f926a7c4831', metadata={'producer': 'Skia/PDF m123 Google Docs Renderer', 'creator': 'PyPDF', 'creationdate': '', 'title': 'chapter_1_Introductions', 'source': 'Intro_about_AI.pdf', 'total_pages': 13, 'page': 5, 'page_label': '6'}, page_content='● Planning:Intelligentagentsmustbeabletosetgoalsandachievethem.Theyneedawaytoenvisionthefuture-arepresentationofthestateoftheworldandmakepredictionsabouthowtheiractionswillchangeit-andbeabletomakechoicesthatmaximizetheutility(or"value")oftheoptionsavailable.Inclassicalplanningproblems,theagentcanassumethatitistheonlysystemactingintheworld,allowingtheagenttobecertainoftheconsequencesofitsactions.However,iftheagentisnottheonlyactor,itrequiresthattheagentreasonunderuncertainty.Itcallsforanagenttoassessitsenvironment,makepredictions,evaluateitspredictions,andadaptbasedonitsassessment.\n● Learning:Machinelearning,afundamentalconceptofAIresearchsincethefield\'sinception,isthestudyofcomputeralgorithmsthatautomati