In [None]:
from langchain.document_loaders import PyMuPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
import os

In [2]:
def load_pdf_file(data):
    loader = DirectoryLoader(data,
                             glob="*.pdf",
                             loader_cls=PyMuPDFLoader)
    
    documents = loader.load()
    
    return documents

In [4]:
extracted_data = load_pdf_file("../data")

In [None]:
len(extracted_data)

637

In [6]:
extracted_data[0]

Document(metadata={'producer': 'PDFlib+PDI 5.0.0 (SunOS)', 'creator': '', 'creationdate': '2004-12-18T17:00:02-05:00', 'source': '..\\data\\Medical_book.pdf', 'file_path': '..\\data\\Medical_book.pdf', 'total_pages': 637, 'format': 'PDF 1.5', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': '2004-12-18T16:15:31-06:00', 'trapped': '', 'modDate': "D:20041218161531-06'00'", 'creationDate': "D:20041218170002-05'00'", 'page': 0}, page_content='')

In [7]:
#Split the Data into Text Chunks
def text_split(extracted_data):
    text_splitter=RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=20)
    text_chunks=text_splitter.split_documents(extracted_data)
    return text_chunks

In [8]:
text_chunks=text_split(extracted_data)
print("Length of Text Chunks", len(text_chunks))

Length of Text Chunks 5777


In [11]:
from langchain.embeddings import HuggingFaceEmbeddings

def download_hugging_face_embeddings():
    embeddings=HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
    return embeddings

embeddings = download_hugging_face_embeddings()

In [6]:
from dotenv import load_dotenv

load_dotenv()

True

In [7]:
PINECONE_API_KEY=os.environ.get('PINECONE_API_KEY')
OPENAI_API_KEY=os.environ.get('OPENAI_API_KEY')

In [None]:
# embeddings = OpenAIEmbeddings()

  embeddings = OpenAIEmbeddings()


In [13]:
result = embeddings.embed_query("Hello")

In [14]:
len(result)

1536

In [9]:
os.environ["PINECONE_API_KEY"] = PINECONE_API_KEY

In [13]:
from langchain_pinecone import PineconeVectorStore

In [10]:
index_name = "medicalbot"

In [None]:
# docsearch = PineconeVectorStore.from_documents(
#     text_chunks,
#     index_name=index_name,
#     embedding=embeddings, 
# )

In [14]:
doc = PineconeVectorStore.from_existing_index(index_name=index_name, embedding=embeddings)

In [15]:
retriever = doc.as_retriever(search_type='similarity',search_kwargs={"k": 3})

In [16]:
retrieved_doc = retriever.invoke('What is the symptoms of a headache?')

In [17]:
retrieved_doc

 Document(id='df0d9834-2d2a-4902-a70e-537560c82b0c', metadata={'author': '', 'creationDate': "D:20041218170002-05'00'", 'creationdate': '2004-12-18T17:00:02-05:00', 'creator': '', 'file_path': '/content/data/Medical_book.pdf', 'format': 'PDF 1.5', 'keywords': '', 'modDate': "D:20041218161531-06'00'", 'moddate': '2004-12-18T16:15:31-06:00', 'page': 265.0, 'producer': 'PDFlib+PDI 5.0.0 (SunOS)', 'source': '/content/data/Medical_book.pdf', 'subject': '', 'title': '', 'total_pages': 637.0, 'trapped': ''}, page_content='• painful, bluish or purplish fingers or toes\n• puffy or swollen eyelids, face, feet, or lower legs\n• changes in the color of the face\n• skin rash, itching, or hives\n• yellow eyes or skin\n• severe or continuing headache\n• sore throat and fever, with or without chills\n• breathing problems or wheezing\n• tightness in the chest\n• dizziness\n• unusual tiredness or weakness\n• weight gain\nIn addition, patients taking anticoagulant drugs\nshould check with their physician

In [18]:
from langchain_openai import OpenAI

llm = OpenAI(temperature=0.4, max_tokens=100)

In [19]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
    )

In [21]:
prompt = ChatPromptTemplate.from_messages([
    ("system",system_prompt),
    ("human","{input}"),
])

In [23]:
question_answer_chain = create_stuff_documents_chain(llm,prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

In [26]:
response = rag_chain.invoke({"input":"What is the symptoms of a headache?"})
print(response['answer'])





In [29]:
response = rag_chain.invoke({"input":"What is stats?"})
print(response['answer'])


I'm sorry, I don't know what you mean by "stats." Can you provide more context or clarify your question?
