In [1]:
## for loading document and saving it in vector store
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
## prompt
from langchain_core.prompts import PromptTemplate
## Hugging face enbeddings
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
from langchain.chains import RetrievalQA


In [2]:
loader=PyPDFDirectoryLoader("cpp pdf")
doc_loader=loader.load()
splitter=RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=200)
document=splitter.split_documents(doc_loader)

In [3]:
len(document)

179

In [4]:
### embedding using hugging face
huggingface_embeddings=HuggingFaceBgeEmbeddings(
    model_name="BAAI/bge-small-en-v1.5",  
    ## we can also use
    #model_name="sentence-transformers/all-MiniLM-L6-v2",
    model_kwargs={'device':'cpu'},
    encode_kwargs={'normalize_embeddings':True}
)

  huggingface_embeddings=HuggingFaceBgeEmbeddings(
  from .autonotebook import tqdm as notebook_tqdm


In [5]:
import numpy as np
np.array(huggingface_embeddings.embed_query(document[0].page_content))

array([-5.20132780e-02, -2.86790859e-02,  2.10943137e-04, -8.24916437e-02,
       -3.62033170e-04,  6.96933549e-03, -2.42415089e-02,  3.04752495e-02,
        1.38576841e-02, -2.01304033e-02,  7.13156490e-03,  1.41586736e-02,
        2.67963894e-02, -2.59662606e-03,  6.35511875e-02,  5.68497069e-02,
       -2.54573952e-02,  6.65250868e-02,  4.12125066e-02,  8.08465993e-04,
       -1.02166273e-03, -8.05424079e-02,  7.58957397e-03, -7.14829341e-02,
        1.43424179e-02, -2.26393268e-02, -2.40932615e-03, -8.30480680e-02,
       -2.64927261e-02, -1.74554512e-01,  4.46340209e-03,  3.99443358e-02,
        7.08629610e-03,  3.28017026e-02, -8.99398234e-03, -5.41813672e-03,
        2.21159812e-02, -5.65193221e-02, -2.17574053e-02,  4.90733646e-02,
        5.07480390e-02, -7.11438339e-03,  9.51047614e-03, -2.21049646e-03,
       -5.47017492e-02, -2.70994231e-02, -3.08531765e-02, -1.04513555e-03,
        1.04262773e-02, -3.19889523e-02,  4.57741553e-03, -2.81249415e-02,
       -1.37352152e-02, -

In [6]:
print(np.array(huggingface_embeddings.embed_query(document[0].page_content)).shape)

(384,)


In [7]:
## creating vectorstore using FAISS
vector_db=FAISS.from_documents(documents=document,embedding=huggingface_embeddings)

In [8]:
## query using similarity search
query="WHAT IS THE PRINCIPLE OF OBJECT ORIENTED PROGRAMMING"
relevant_document=vector_db.similarity_search(query=query)
print(relevant_document[0].page_content)

1. Introduction to Project Management
2. Key Elements of Project Management
3. Importance of Project Management in IT
4. Benefits of Effective Project Management
5. Project Management vs. General Management
6. Project Management in IT Industry
7. Project Management Triangle
8. Project Life Cycle Phases
9. Project Management Methodologies
10. Duties and Responsibilities of Project Managers
11. Skills Required for Successful Project Managers
12. Project Manager's Role in Stakeholder Management
1. Principles of Object Oriented Programming
4
• OOP is a programming paradigm that encapsulates data and behavior into 
objects, enhancing security, scalability, and maintainability.
• Unlike POP , which divides problems into functions and relies on shared global 
data, OOP encapsulates data within objects, reducing interdependencies and 
improving data security.
• Core principles of OOP:
• Encapsulation: Combines data and functions, restricting external access 
using access specifiers.


In [9]:
### to get more relevant and accurate we use retriever
retriever=vector_db.as_retriever(search_type="similarity",search_kwargs={"k":3})  ## k=3 defines that we will select top 3 relevant document
print(retriever)

tags=['FAISS', 'HuggingFaceBgeEmbeddings'] vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x000001FCE8D884F0> search_kwargs={'k': 3}


### Using HUGGING FACE HUB(llm) to use retriever for getting relevant result

The Hugging Face Hub is an platform with over 350k models, 75k datasets, and 150k demo apps (Spaces), all open source and publicly available, in an online platform where people can easily collaborate and build ML together.

### 1. querying about a topic directly from llm

In [None]:
import os
os.environ["HUGGINGFACEHUB_API_TOKEN"]="Hugging_Face_Token"

from langchain_community.llms import HuggingFaceHub

hf=HuggingFaceHub(
    repo_id="mistralai/Mistral-7B-Instruct-v0.3",
    model_kwargs={"temperature":0.1,"max_length":500}
)

query="WHAT IS OBJECT ORIENTED PROGRAMMING?"
answer=hf.invoke(query)
print(answer)

  hf=HuggingFaceHub(


WHAT IS OBJECT ORIENTED PROGRAMMING?

Object-oriented programming (OOP) is a programming paradigm that uses "objects" to represent data and methods. An object is an instance of a class, which is a blueprint for creating objects. In OOP, objects have properties (data) and methods (functions), and they can interact with each other to perform tasks.

The main principles of OOP are:

1. Encapsulation: This is the practice of keeping the data and methods that operate on that data within a single unit, such as a class. This helps to hide the implementation details from the outside world and makes the code more modular and easier to maintain.
2. Inheritance: This is the ability of a class to inherit properties and methods from another class. This allows for code reuse and makes it easier to create new classes that are similar to existing ones.
3. Polymorphism: This is the ability of objects to take on multiple forms. In OOP, this is often achieved through method overloading and method overrid

### 2. Quesrying about topc using hugging face model installed locally

In [None]:
## this will give error as we don't have mistral locally installed in our device

from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline

hf_local=HuggingFacePipeline.from_model_id(
    model_id="mistralai/Mistral-7B-Instruct-v0.3",
    task="text-generation",
    pipeline_kwargs={"temperature":0,"max_new_tokens":300}
)

llm_local=hf_local
llm_local.invoke(query)

OSError: You are trying to access a gated repo.
Make sure to have access to it at https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3.
401 Client Error. (Request ID: Root=1-67d0790f-029cf30e3aba5acd04fb4731;5d245a66-9626-47a8-a5f9-aad848edbed1)

Cannot access gated repo for url https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3/resolve/main/config.json.
Access to model mistralai/Mistral-7B-Instruct-v0.3 is restricted. You must have access to it and be authenticated to access it. Please log in.

### 3. Querying about topic using retriever it will ensure use of llm anf our pdf collectively

In [11]:
## prompt to send to retrieval

prompt_template=""" 
Use the following piece of context to answer the question asked.
Please try to provide the answer only based on context.
{context}
Question:{question}

Helpful Answers:
"""

In [12]:
prompt=PromptTemplate(template=prompt_template,input_variables=['context','question'])

## creating chain of llm,prompt and retriever

In [13]:
## forming chain b/w retriever,llm and prompt
retrieverQa=RetrievalQA.from_chain_type(
    llm=hf,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True,
    chain_type_kwargs={'prompt':prompt}
)

In [14]:
query1='WHAT IS FUNCTION PROTOTYPE'
result=retrieverQa.invoke({'query':query})
print(result['result'])



 
Use the following piece of context to answer the question asked.
Please try to provide the answer only based on context.
1. Introduction to Project Management
2. Key Elements of Project Management
3. Importance of Project Management in IT
4. Benefits of Effective Project Management
5. Project Management vs. General Management
6. Project Management in IT Industry
7. Project Management Triangle
8. Project Life Cycle Phases
9. Project Management Methodologies
10. Duties and Responsibilities of Project Managers
11. Skills Required for Successful Project Managers
12. Project Manager's Role in Stakeholder Management
1. Principles of Object Oriented Programming
4
• OOP is a programming paradigm that encapsulates data and behavior into 
objects, enhancing security, scalability, and maintainability.
• Unlike POP , which divides problems into functions and relies on shared global 
data, OOP encapsulates data within objects, reducing interdependencies and 
improving data security.
• Core princi