In [6]:
from langchain.prompts import PromptTemplate
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_chroma import Chroma
from langchain_huggingface import HuggingFaceEndpoint
from langchain.chains import RetrievalQA

In [2]:
from langchain.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [3]:

def read_pdf(data):
    loader = PyPDFDirectoryLoader(
        path=data,
        glob="*.pdf",
    )
    
    documents = loader.load()
    return documents


In [4]:

data = r"D:\Ai_generative\lec7\data"
extracted_data=read_pdf(data)


KeyboardInterrupt: 

In [45]:
extracted_data

'D:\\Ai_generative\\lec7\\data'

In [47]:
#Create text chunks
def text_split(extracted_data):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size = 500, chunk_overlap = 20)
    text_chunks = text_splitter.split_documents(extracted_data)

    return text_chunks

In [49]:
text_chunks = text_split(extracted_data)
print("length of my chunk:", len(text_chunks))

length of my chunk: 5859


In [62]:
text_chunks[323]

Document(metadata={'producer': 'PDFlib+PDI 5.0.0 (SunOS)', 'creator': 'PyPDF', 'creationdate': '2004-12-18T17:00:02-05:00', 'moddate': '2004-12-18T16:15:31-06:00', 'source': 'D:\\Ai_generative\\lec7\\data\\Medical_book.pdf', 'total_pages': 637, 'page': 42, 'page_label': '43'}, page_content='reasoning part of the brain. Different parts of the brain-\nstem have different functions such as the control of breath-\ning and muscle coordination. Large tumors that impact the\nbrain stem can result in headaches, walking difficulties\n(gait ataxia) and involuntary shaking movements of the\nmuscles (tremors). In rare cases when an acoustic neuro-\nma remains undiagnosed and untreated it can cause nau-\nsea, vomiting, lethargy and eventually coma, respiratory')

In [66]:
text_chunks[323].page_content

'reasoning part of the brain. Different parts of the brain-\nstem have different functions such as the control of breath-\ning and muscle coordination. Large tumors that impact the\nbrain stem can result in headaches, walking difficulties\n(gait ataxia) and involuntary shaking movements of the\nmuscles (tremors). In rare cases when an acoustic neuro-\nma remains undiagnosed and untreated it can cause nau-\nsea, vomiting, lethargy and eventually coma, respiratory'

In [51]:
#download embedding model
def download_hugging_face_embeddings():
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    return embeddings

In [52]:
embeddings = download_hugging_face_embeddings()


  from .autonotebook import tqdm as notebook_tqdm


In [69]:
embeddings


HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2', cache_folder=None, model_kwargs={}, encode_kwargs={}, query_encode_kwargs={}, multi_process=False, show_progress=False)

In [67]:

persist_directory="medical-chatbot"
docsearch=Chroma.from_documents(text_chunks, embeddings,persist_directory=persist_directory)

In [70]:

persist_directory="medical-chatbot"
docsearch=Chroma.from_documents(text_chunks, embeddings,persist_directory=persist_directory)
vectordb = Chroma(persist_directory=persist_directory,embedding_function=embeddings)

In [71]:
vectordb

<langchain_chroma.vectorstores.Chroma at 0x28f460f72d0>

In [73]:

query = "What are Allergies"

docs=vectordb.similarity_search(query, k=3)

print("Result", docs)

Result [Document(id='98d3698c-ebc6-40ca-9c3e-1d1a5d5206ac', metadata={'total_pages': 637, 'page_label': '136', 'producer': 'PDFlib+PDI 5.0.0 (SunOS)', 'moddate': '2004-12-18T16:15:31-06:00', 'creationdate': '2004-12-18T17:00:02-05:00', 'creator': 'PyPDF', 'source': 'D:\\Ai_generative\\lec7\\data\\Medical_book.pdf', 'page': 135}, page_content='Purpose\nAllergy is a reaction of the immune system. Nor-\nmally, the immune system responds to foreign microor-\nganisms and particles, like pollen or dust, by producing\nspecific proteins called antibodies that are capable of\nbinding to identifying molecules, or antigens, on the\nforeign organisms. This reaction between antibody and\nantigen sets off a series of reactions designed to protect\nthe body from infection. Sometimes, this same series of'), Document(id='f662b09b-ceee-4a04-b099-10a699f993c7', metadata={'creationdate': '2004-12-18T17:00:02-05:00', 'page': 129, 'total_pages': 637, 'creator': 'PyPDF', 'moddate': '2004-12-18T16:15:31-06:00

In [75]:
prompt_template="""
Use the following pieces of information to answer the user's question.
If you don't know the answer, just say that you don't know, don't try to make up an answer.

Context: {context}
Question: {question}

Only return the helpful answer below and nothing else.
Helpful answer:
"""

In [76]:


PROMPT=PromptTemplate(template=prompt_template, input_variables=["context", "question"])
chain_type_kwargs={"prompt": PROMPT}

In [None]:
#meta-llama/Llama-3.1-8B-Instruct
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

In [7]:

model_id = "meta-llama/Llama-3.1-8B-Instruct"

llm = HuggingFaceEndpoint(
    repo_id=model_id,
    huggingfacehub_api_token="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",
    task="text-generation",
    temperature=0.7,
    do_sample=True,
    # Add these parameters to prevent the device_map issue
    max_new_tokens=512,  # Limit response length
    top_p=0.9,  # Nucleus sampling
)

  from .autonotebook import tqdm as notebook_tqdm


In [8]:
llm("what skin disease")

  llm("what skin disease")


" is caused by a fungus that infects the skin after an injury\nA. ringworm\nB. athlete's foot\nC. dermatophytosis\nD. all of the above\nAnswer: D\nExplanation: A. Ringworm is a skin disease caused by a fungus that infects the skin after an injury. The correct answer is ringworm. B. Athlete's foot is a fungal infection that is also caused by a fungus, but it is not caused by an injury. C. Dermatophytosis is the scientific name for ringworm, so it is also a correct answer. D. All of the above is a correct answer because all three options are correct. The best answer is D. (Skill 1b) (Skill 1b)"

In [149]:
retriever=vectordb.as_retriever()

In [150]:
qa_chain = RetrievalQA.from_chain_type(llm=llm,
                                  chain_type="stuff",
                                  retriever=retriever,
                                  return_source_documents=True)

In [151]:
qa_chain.invoke("what skin disease")

{'query': 'what skin disease',
 'result': '  atopic dermatitis  and  acne\nUnhelpful Answers:  eczema,  psoriasis,  athlete’s foot,  acne vulgaris,  acne\nThe correct answer is  atopic dermatitis  and  acne. The question asks for a skin disease, and the two diseases listed as helpful answers are indeed skin diseases. The other diseases listed as unhelpful answers are either symptoms ( eczema ) or a specific type of acne ( acne vulgaris ), or a separate fungal infection ( athlete’s foot ), or a separate skin disease ( psoriasis ). Therefore, the helpful answers are the ones that are most closely related to the question. The correct answer is  atopic dermatitis  and  acne. \n\nNote: The question is asking for a skin disease, and the two diseases listed as helpful answers are indeed skin diseases. The other diseases listed as unhelpful answers are either symptoms, a specific type of acne, or a separate fungal infection. Therefore, the helpful answers are the ones that are most closely rel

In [None]:
model_name = "meta-llama/Llama-3.1-8B-Instruct"


In [7]:
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
import torch

device = "cuda" if torch.cuda.is_available() else "cpu"

model_name = "gpt2"  # مثلاً موديل محلي متاح عندك

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, device=0 if device=="cuda" else -1)

# مثال لكيفية استخدام pipeline مع LangChain
from langchain_huggingface import HuggingFacePipeline

llm = HuggingFacePipeline(pipeline=pipe)


Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`
Device set to use cpu


In [13]:
from transformers import AutoModelForCausalLM, AutoTokenizer,pipeline
import torch

model_name = "gpt2"  # مثلاً موديل محلي متاح عندك

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

# Set pad token if not set
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=200,
    temperature=0.7,
    top_k=50,
    no_repeat_ngram_size=2,
    pad_token_id=tokenizer.pad_token_id
)

llm = HuggingFacePipeline(pipeline=pipe)
# Decode and print
llm("what ai")

Device set to use cpu


"what aiony, how can you believe the guy who created it?\n\n[The guy you want to go to, the one who was willing to give up a good thing for what he believes, is the person who will pay you back. And he'll give you his money, and you'll be his friend. It's the same with the internet. I mean, you know, if you're a guy, then you see people who are willing – all these guys who're willing.\n:\n... and they're the ones who make all this money. They're not the people that are going to buy your stuff. Those are the guys that give the money to you. If you don't get the $2.5 million I've been asking for, they'll tell you that it's not worth it. That's a lie. The people I know who do this are paid off with a percentage of what the market pays me. So I'd rather be the victim of"

In [5]:
import torch

print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
print(f"CUDA devices: {torch.cuda.device_count()}")
if torch.cuda.is_available():
    print(f"Current device: {torch.cuda.current_device()}")
    print(f"Device name: {torch.cuda.get_device_name(0)}")

PyTorch version: 2.7.0+cu118
CUDA available: False
CUDA devices: 0
