In [17]:
%pwd

'c:\\Users\\DELL\\Desktop\\ChatBot\\Chat_bot_LLm_Pinecone_Langchain_Flask_Aws\\analysis'

In [18]:
import os
os.chdir("../")
%pwd

'c:\\Users\\DELL\\Desktop\\ChatBot\\Chat_bot_LLm_Pinecone_Langchain_Flask_Aws'

In [10]:
from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter


In [19]:
def load_pdf_files(data):
    loader = DirectoryLoader( 
        data,
        glob = "*.pdf",
        loader_cls = PyPDFLoader )
    documents = loader.load()
    return documents



In [20]:
extracted_data = load_pdf_files("data")

In [26]:
from typing import  List 
from langchain.schema import Document

def filter_to_minimal_docs(docs:List[Document]) -> List[Document]:
    minimal_docs : List[Document] =[]
    for doc in docs:
        src = doc.metadata.get("source")
        minimal_docs.append(Document(page_content =doc.page_content,
                            metadata={"source":src})
        )
    return minimal_docs
        




In [27]:
minimal_data = filter_to_minimal_docs(extracted_data)

In [28]:
#create Chunks from data
def text_split(minimal_docs):
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size = 1000,
        chunk_overlap =20        
    )
    text_chunk = text_splitter.split_documents(minimal_docs)
    return text_chunk

In [29]:
texts_chunk = text_split(minimal_data)
print(len(texts_chunk))

3006


In [30]:
#embedding data for vector calculations 
from langchain.embeddings import HuggingFaceEmbeddings
def download_embeddings():
    model_name ="sentence-transformers/all-MiniLM-L6-v2"
    embeddings = HuggingFaceEmbeddings(
        model_name = model_name
        #model_kwargs = {"device":"cuda" if torch.cuda.is_available() else "cpu"}
    )
    return embeddings

In [35]:
embeddings = download_embeddings()

In [32]:
import os
from  dotenv import load_dotenv
load_dotenv()
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
OPEN_API_KEY = os.getenv("OPEN_API_KEY")


os.environ["PINECONE_API_KEY"] = PINECONE_API_KEY
os.environ["OPEN_API_KEY"] =  OPEN_API_KEY


In [33]:
from  pinecone import Pinecone
pinecone_api_key =PINECONE_API_KEY
pc = Pinecone(api_key= pinecone_api_key)

In [None]:
from pinecone import ServerlessSpec
index_name  ="chatbot"
# Delete the existing index with wrong dimensions

if pc.has_index(index_name):
    pc.delete_index(index_name)
    print(f"Deleted existing index: {index_name}")

if not pc.has_index(index_name):
    pc.create_index(name = index_name
                 ,dimension=384 #embedding dimensions
                ,metric = "cosine" # Cosine similarity
                ,spec = ServerlessSpec(cloud ="aws",
                                       region ="us-east-1")
                 )


#####  Create New index 
from langchain_pinecone import PineconeVectorStore
docsearch = PineconeVectorStore.from_documents(
    documents = texts_chunk
    ,embedding = embeddings
    ,index_name = index_name    
)
print(f"Created new index: {index_name} with dimension 384")

######  Load existing index
#docsearch = PineconeVectorStore.from_existing_index()
#docsearch.add_documents(texts_chunk)

Created new index: chatbot with dimension 384


In [None]:
# Load Existing index 

from langchain_pinecone import PineconeVectorStore
# Embed each chunk and upsert the embeddings into your Pinecone index.
docsearch = PineconeVectorStore.from_existing_index(
    index_name=index_name,
    embedding=embeddings
)

In [37]:
retriever = docsearch.as_retriever(search_type="similarity", search_kwargs={"k":3})


In [38]:
retrieved_docs = retriever.invoke("What is Acne?")
retrieved_docs

[Document(id='c50d9421-1388-46b2-9c10-9818f0f22311', metadata={'source': 'data\\Medical_book.pdf'}, page_content='Acne is a skin condition that occurs when pores or\nhair follicles become blocked. This allows a waxy\nmaterial, sebum, to collect inside the pores or follicles.\nNormally, sebum flows out onto the skin and hair to\nform a protective coating, but when it cannot get out,\nsmall swellings develop on the skin surface. Bacteria\nand dead skin cells can also collect that can cause\ninflammation. Swellings that are small and not\ninflamed are whiteheads or blackheads. When they\nbecome inflamed, they turn into pimples. Pimples that\nfill with pus are called pustules.\nAcne cannot be cured, but acne drugs can help clear\nthe skin. Benzoyl peroxide and tretinoin work by mildly\nirritating the skin. This encourages skin cells to slough\noff, which helps open blocked pores. Benzoyl peroxide\nalso kills bacteria, which helps prevent whiteheads and\nblackheads from turning into pimples

In [39]:
from langchain_openai import ChatOpenAI
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

chatModel = ChatOpenAI(
    model="gpt-4o",
    api_key=OPEN_API_KEY
)

In [41]:
system_prompt = (
    "You are an Medical assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)


prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

In [42]:
question_answer_chain = create_stuff_documents_chain(chatModel, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

In [43]:
response = rag_chain.invoke({"input": "what is Acromegaly and gigantism?"})
print(response["answer"])

Acromegaly is a disorder characterized by the abnormal release of growth hormone (GH) from the pituitary gland, causing increased growth in bone and soft tissue, along with other body disturbances. In children whose bony growth plates have not closed, this results in exceptional long bone growth, known as gigantism, leading to unusual height. The difference between the two lies mainly in the timing: gigantism occurs before bone growth plates close, while acromegaly occurs after.


In [None]:
response = rag_chain.invoke({"input": "what is Acne?"})
print(response["answer"])

In [44]:
response = rag_chain.invoke({"input": "what is the Treatment of Acne?"})
print(response["answer"])

Acne treatment involves reducing sebum production, removing dead skin cells, and killing bacteria using topical drugs and oral medications. Treatment choices depend on the severity of acne, with mild cases often treated with topical agents like tretinoin, benzoyl peroxide, or salicylic acid. More severe cases may require topical antibiotics or isotretinoin, with improvement typically seen in two to four weeks for mild cases and two or more months for moderate to severe cases.
