In [1]:
# Importing the necessary packages
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.document_loaders import PyMuPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA
import textwrap



In [2]:
# This will load the PDF file
def load_pdf_data(file_path):
    # Creating a PyMuPDFLoader object with file_path
    loader = PyMuPDFLoader(file_path=file_path)
    
    # loading the PDF file
    docs = loader.load()
    
    # returning the loaded document
    return docs

In [3]:
# Responsible for splitting the documents into several chunks
def split_docs(documents, chunk_size=1000, chunk_overlap=20):
    
    # Initializing the RecursiveCharacterTextSplitter with
    # chunk_size and chunk_overlap
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size,
        chunk_overlap=chunk_overlap
    )
    
    # Splitting the documents into chunks
    chunks = text_splitter.split_documents(documents=documents)
    
    # returning the document chunks
    return chunks

In [4]:
# function for loading the embedding model
def load_embedding_model(model_path, normalize_embedding=True):
    return HuggingFaceEmbeddings(
        model_name=model_path,
        model_kwargs={'device':'cpu'}, # here we will run the model with CPU only
        encode_kwargs = {
            'normalize_embeddings': normalize_embedding # keep True to compute cosine similarity
        }
    )


# Function for creating embeddings using FAISS
def create_embeddings(chunks, embedding_model, storing_path="vectorstore"):
    # Creating the embeddings using FAISS
    vectorstore = FAISS.from_documents(chunks, embedding_model)
    
    # Saving the model in current directory
    vectorstore.save_local(storing_path)
    return vectorstore

In [5]:
prompt = """
### System:
You are an AI Assistant that follows instructions extreamly well. \
Help as much as you can.

### User:
{prompt}

### Response:

"""

In [6]:
template = """
### System:
You are an respectful and honest assistant. You have to answer the user's \
questions using only the context provided to you. If you don't know the answer, \
just say you don't know. Don't try to make up an answer.

### Context:
{context}

### User:
{question}

### Response:
"""

In [7]:
# Creating the chain for Question Answering
def load_qa_chain(retriever, llm, prompt):
    return RetrievalQA.from_chain_type(
        llm=llm,
        retriever=retriever, # here we are using the vectorstore as a retriever
        chain_type="stuff",
        return_source_documents=True, # including source documents in output
        chain_type_kwargs={'prompt': prompt} # customizing the prompt
    )

In [8]:
# Prettifying the response
def get_response(query, chain):
    # Getting response from chain
    response = chain({'query': query})
    
    # Wrapping the text for better output in Jupyter Notebook
    wrapped_text = textwrap.fill(response['result'], width=100)
    print(wrapped_text)

In [11]:
#from lang_funcs import *
from langchain.llms import Ollama
from langchain import PromptTemplate

In [12]:
# Loading orca-mini from Ollama
llm = Ollama(model="llama3", temperature=0)

# Loading the Embedding Model
embed = load_embedding_model(model_path="all-MiniLM-L6-v2")

  warn_deprecated(
  from tqdm.autonotebook import tqdm, trange


In [14]:
# loading and splitting the documents
docs = load_pdf_data(file_path="Dee Learning Crash Course For Beginners _240611_180111.pdf")
documents = split_docs(documents=docs)

# creating vectorstore
vectorstore = create_embeddings(documents, embed)

# converting vectorstore to a retriever
retriever = vectorstore.as_retriever()

  return dynamo.is_compiling()
  or (hasattr(torch, "_dynamo") and torch._dynamo.is_compiling())


In [15]:
# Creating the prompt from the template which we created before
prompt = PromptTemplate.from_template(template)

# Creating the chain
chain = load_qa_chain(retriever, llm, prompt)

In [16]:
get_response("How is gradient calculated?", chain)

  warn_deprecated(
  return dynamo.is_compiling()
  or (hasattr(torch, "_dynamo") and torch._dynamo.is_compiling())


According to the provided context, the gradient (da and db) is calculated using the script
`find_derivatives(X, y, z)`.  The script calculates the derivative of the cost function with respect
to the input variables a and b as follows:  1. Calculate the error dz between the predicted values z
and the actual values y. 2. Calculate the derivative da by taking the dot product of the transpose
of X (X.T) and dz. 3. Calculate the derivative db by summing up the elements of dz.  The resultant
derivatives da and db are then returned to the calling function.


In [18]:
import time
while True:
    query = input("type query here.....>>>>>")
    start = time.time()
    response = get_response(query, chain)
    end = time.time()

    print(" ### RESPONSE ### ", response)
    print("TIME TAKEN ::: ", end-start)

type query here.....>>>>> hi


  return dynamo.is_compiling()
  or (hasattr(torch, "_dynamo") and torch._dynamo.is_compiling())


Hello! It seems you're starting a conversation. I'm here to help with any questions or topics you'd
like to discuss. What's on your mind?
 ### RESPONSE ###  None
TIME TAKEN :::  3.8132479190826416


type query here.....>>>>> Explain me Backpropogation


  return dynamo.is_compiling()
  or (hasattr(torch, "_dynamo") and torch._dynamo.is_compiling())


Backpropagation!  In our context, backpropagation is used to minimize the overall loss (cost) by
finding the optimum values of weights. The cost function we're using is the mean squared error
(MSE), which represents the difference between the predicted output (`ao`) and the actual output
(`y`).  The backpropagation process consists of two phases:  **Phase 1:**  * Calculate `dcost_dao`,
the derivative of the total cost with respect to the output (`ao`). This is done using the formula
`(1/m) * (ao - y)`. * Calculate `dao_dzo`, the derivative of `ao` with respect to `zo`. This is done
using the sigmoid derivative function. * Calculate `dzo_dwo`, the derivative of `zo` with respect to
the output weights (`w`). This is simply the transpose of the hidden layer activations (`ah.T`). *
Calculate `dcost_dwo`, the derivative of the total cost with respect to the output weights (`w`).
This is done by multiplying `dcost_dao`, `dao_dzo`, and `dzo_dwo`.  **Phase 2:**  * Calculate
`dcost_dzo`, the de

type query here.....>>>>> python code for binary search


  return dynamo.is_compiling()
  or (hasattr(torch, "_dynamo") and torch._dynamo.is_compiling())


I don't know. The provided context does not contain any information about binary search, which is a
specific algorithm used to find an element in a sorted array. If you're looking for Python code for
binary search, I'd be happy to help you with that! However, please provide more context or clarify
what you mean by "binary search" so I can better assist you.
 ### RESPONSE ###  None
TIME TAKEN :::  8.128325700759888


KeyboardInterrupt: Interrupted by user