In [14]:
from langchain.prompts import PromptTemplate
from langchain_huggingface import HuggingFaceEmbeddings
from pinecone import Pinecone
from langchain.document_loaders import DirectoryLoader,PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.llms import CTransformers
import pypdf

In [15]:
def load_pdf(data):
    loader= DirectoryLoader(data,loader_cls=PyPDFLoader,glob='*.pdf')
    
    documents=loader.load()
    return documents

In [17]:
extracted_data=load_pdf('data/')

In [18]:
def text_split(extracted_data):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500,chunk_overlap=50)
    text_chunks=text_splitter.split_documents(extracted_data)

    return text_chunks

In [19]:
text_chunks=text_split(extracted_data)
print(len(text_chunks))


5961


In [20]:
def huggingface_embeddings():
    embeddings=HuggingFaceEmbeddings(model_name='all-MiniLM-L6-v2')
    return embeddings

In [21]:
embeddings= huggingface_embeddings()

In [None]:
embeddings

HuggingFaceEmbeddings(model_name='all-MiniLM-L6-v2', cache_folder=None, model_kwargs={}, encode_kwargs={}, query_encode_kwargs={}, multi_process=False, show_progress=False)

In [None]:
query_result=embeddings.embed_query('Hello world')
print(len(query_result))

384


In [22]:
import os
from dotenv import load_dotenv
load_dotenv()
from pinecone import ServerlessSpec

PINECONE_API_KEY=os.environ.get('PINECONE_API_KEY')

In [23]:
pc =Pinecone(api_key=PINECONE_API_KEY)

In [24]:
index_name='medical-chatbot'

if not  pc.has_index(index_name):
    pc.create_index(
        name=index_name,
        dimension=384,
        metric='cosine',
        spec=ServerlessSpec(cloud='aws',region='us-east-1'),
    )

index= pc.Index(index_name)


In [28]:
from langchain_pinecone import PineconeVectorStore
from langchain_core.documents import Document
vector_store=PineconeVectorStore(index=index,embedding=embeddings)\

vector_store.add_documents(text_chunks)

['78e27816-be4b-4b39-8197-c3a1e0d58dad',
 'a2c1d4fc-ee23-4891-83ed-cbab56671503',
 '1d40f4c1-43ec-4a39-9747-6d77926b33a8',
 '1885e5ee-ec7d-4345-973e-da1c7b34bdc5',
 '789dc72c-3d51-410f-a040-d62384c538f0',
 '8a5ce589-3cf1-4d9c-8f1a-b038c50aa7b6',
 '26f7cb1b-ccac-4df8-93a8-a5936da5ff6a',
 'c49d13f3-af5f-4bce-9463-693f3e6eeadc',
 '86edae4f-1bc5-4eed-8736-9aaba26b0438',
 'd789daeb-ad0b-4294-b282-384a94375b96',
 'c224bd35-f171-4647-b707-c1f8fe507afc',
 '7d4a5944-64f0-43c0-a0c6-4de9da851cf2',
 '9f5ccd77-815b-4e35-931a-4cc4d5995840',
 '618558c3-70ef-4d8f-89af-fb0028a58cf7',
 '41ba1897-068e-48fa-b5bd-beec1c0f2e85',
 '357d4118-c6e1-462d-9994-d92d96d72cfe',
 '71078a47-becd-4865-b943-60e05dedbe83',
 '1f073ecf-6ee9-43de-92df-b280c8b4a980',
 '7e6f9ec1-f25a-42ec-b0c4-a2db55f8b159',
 '38eb3afd-99d7-47f7-a428-310c1953cd31',
 'f94806e6-7ed8-4dcb-bf32-5d4dc5274229',
 'dbe9ac14-af13-40e6-a6da-c6651e06f6df',
 '839f4d09-d08e-4d56-9e60-9cd2c3660961',
 'b378dfe2-0b23-4870-a9ff-0db2b11b651c',
 'd7ecf7df-7392-

In [29]:
vector_store=PineconeVectorStore(index=index,embedding=embeddings)

results= vector_store.similarity_search(
    'what is Blood urea nitrogen (BUN)',
    k=2,

)

In [None]:
for res in results:
    print(f"*{res.page_content} [{res.metadata}]")

*Blood urea nitrogen test
Definition
The blood urea nitrogen (BUN) test measures the
level of urea nitrogen in a sample of the patient’s blood.
Urea is a substance that is formed in the liver when the
body breaks down protein. Urea then circulates in the
blood in the form of urea nitrogen. In healthy people,
most urea nitrogen is filtered out by the kidneys and
leaves the body in the urine. If the patient’s kidneys are
not functioning properly or if the body is using large [{'creationdate': '2004-12-18T17:00:02-05:00', 'creator': 'PyPDF', 'moddate': '2004-12-18T16:15:31-06:00', 'page': 548.0, 'page_label': '549', 'producer': 'PDFlib+PDI 5.0.0 (SunOS)', 'source': 'data\\Gale Encyclopedia of Medicine Vol. 1 (A-B).pdf', 'total_pages': 637.0}]
*are carried out of the body. At the same time, electrolytes
and other chemicals are added to the blood. The purified,
chemically-balanced blood is then returned to the body.
KEY TERMS
Blood urea nitrogen (BUN)—A waste product that
is formed in the l

In [36]:
from langchain_core.prompts import PromptTemplate

prompt=PromptTemplate(
 template= """
use the following pieces of information to answer the user's question.
If you dont know the  answer,just say that you dont know, dont try to make up the answer.

Context:{context}
Question:{input}

Only return the helpful answer below and nothing else.

Helpful answer:  
""",
input_variables=['context','input']
)

In [37]:
from langchain_community.llms import CTransformers
llm = CTransformers(model='model/llama-2-7b-chat.ggmlv3.q4_0.bin',
                    model_type='llama',
                    config={'max_new_tokens':512,
                            'temperature':0.3})

In [43]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain

retriever= vector_store.as_retriever(search_kwargs={'k':2})

question_answer_chain = create_stuff_documents_chain(llm, prompt)
chain = create_retrieval_chain(retriever, question_answer_chain)

llm_response= chain.invoke({"input": 'what is acne'})

In [44]:
print(llm_response['answer'])

Acne is a common skin disease characterized by pimples on the face, chest, and back. It occurs when the pores of the skin become clogged with oil, dead skin cells, and bacteria.
