In [1]:
import os
os.chdir("../")

In [2]:
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [3]:
# To extract the data from our pdf file
def load_pdf_file(data):
    loader = DirectoryLoader(data,
                             glob="*.pdf",
                             loader_cls=PyPDFLoader)
    documents = loader.load()
    return documents
                             

In [4]:
extracted_data = load_pdf_file(data='data/')

In [5]:
# extracted_data

In [6]:
# Splitting the data into text chunks
def text_split(extracted_data):
    text_splitter=RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=20)
    text_chunks=text_splitter.split_documents(extracted_data)
    return text_chunks

In [7]:
text_chunks = text_split(extracted_data)
print("Length of text chunks:", len(text_chunks))

Length of text chunks: 5860


In [8]:
# text_chunks

In [15]:
# Download the embeddings from Hugging Face
import sentence_transformers
from langchain_community.embeddings import HuggingFaceEmbeddings
def download_hugging_face_embeddings():
    embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
    return embeddings

In [16]:
embeddings = download_hugging_face_embeddings()

In [31]:
from dotenv import load_dotenv
load_dotenv()
PINECONE_API_KEY = os.environ.get('PINECONE_API_KEY')

In [32]:
from pinecone.grpc import PineconeGRPC as Pinecone
from pinecone import ServerlessSpec

pc = Pinecone(api_key=PINECONE_API_KEY)

index_name = "medbot"

pc.create_index(
    name=index_name,
    dimension=384,
    metric="cosine",
    spec=ServerlessSpec(
        cloud="aws",
        region="us-east-1"
    )
)

In [33]:
os.environ["PINECONE_API_KEY"] = PINECONE_API_KEY

In [35]:
from langchain.vectorstores import Pinecone as LangchainPinecone
# We will embed each chunk and insert the embeddings into our Pinecone Index
docsearch = LangchainPinecone.from_documents(
    documents=text_chunks,
    index_name=index_name,
    embedding=embeddings,
)

In [36]:
# Load existing index
docsearch = LangchainPinecone.from_existing_index(
    index_name=index_name,
    embedding=embeddings
)

In [37]:
retriever = docsearch.as_retriever(search_type="similarity", search_kwargs={"k":3})

In [38]:
HUGGINGFACEHUB_API_KEY = os.environ.get('HUGGINGFACEHUB_API_KEY')
os.environ["HUGGINGFACEHUB_API_KEY"] = HUGGINGFACEHUB_API_KEY

In [49]:
'''
from langchain_community.llms import HuggingFaceHub

llm = HuggingFaceHub(
    repo_id="meta-llama/Meta-Llama-3-8B",
    model_kwargs={"temperature": 0.4, "max_new_tokens": 500},
    huggingfacehub_api_token=os.environ["HUGGINGFACEHUB_API_KEY"]
)
'''
from langchain_huggingface import HuggingFaceEndpoint

llm = HuggingFaceEndpoint(
    endpoint_url="https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.1",
    huggingfacehub_api_token=os.getenv("HUGGINGFACEHUB_API_KEY"),
    temperature=0.4,
    max_new_tokens=500
)



In [50]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

system_prompt = (
    "You are an assistant for question-answering tasks."
    "Use the following pieces of retrieved context to answer "
    "the question. if you don't know the answer, say that you "
    "don't know. Use three sentences and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

In [51]:
question_answer_chain = create_stuff_documents_chain(llm , prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

In [56]:
response = rag_chain.invoke({"input": "can you solve the navier stokes eqn"})
print(response["answer"])



?

The Navier-Stokes equations are a set of partial differential equations that describe the motion of fluids, such as air and water. They are used to model the behavior of fluids in a wide range of applications, including weather forecasting, aircraft design, and oceanography. The equations are based on Newton's laws of motion and the principles of conservation of momentum and energy. They describe the motion of fluids in terms of the velocity, pressure, and density of the fluid.

The Navier-Stokes equations are a set of partial differential equations that describe the motion of fluids, such as air and water. They are used to model the behavior of fluids in a wide range of applications, including weather forecasting, aircraft design, and oceanography. The equations are based on Newton's laws of motion and the principles of conservation of momentum and energy. They describe the motion of fluids in terms of the velocity, pressure, and density of the fluid.

The Navier-Stokes equations a