In [None]:
from PyPDF2 import PdfReader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory
from langchain.llms import HuggingFacePipeline
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline

def get_pdf_text(pdf_path):
    """
    Reads text from a PDF file.

    Args:
        pdf_path (str): Path to the PDF file.

    Returns:
        str: Combined text content from the PDF file.
    """
    text = ""
    pdf_reader = PdfReader(pdf_path)
    for page in pdf_reader.pages:
        if page.extract_text():  # Handle cases where text extraction may fail
            text += page.extract_text()
    return text

def get_text_chunks(text):
    """
    Splits a large text into smaller chunks for processing.

    Args:
        text (str): The input text to split.

    Returns:
        list: List of text chunks.
    """
    text_splitter = CharacterTextSplitter(
        separator="\n",
        chunk_size=1000,
        chunk_overlap=200,
        length_function=len
    )
    return text_splitter.split_text(text)

def get_vectorstore(text_chunks):
    """
    Creates a FAISS vector store for semantic search.

    Args:
        text_chunks (list): List of text chunks.

    Returns:
        FAISS: A FAISS vector store.
    """
    embeddings = HuggingFaceEmbeddings(model_name="hkunlp/instructor-large")
    return FAISS.from_texts(texts=text_chunks, embedding=embeddings)

def get_conversation_chain(vectorstore):
    """
    Sets up a conversational retrieval chain using the LLaMA-3.2-3B-Instruct model.

    Args:
        vectorstore (FAISS): The vector store for document retrieval.

    Returns:
        ConversationalRetrievalChain: A LangChain conversation chain.
    """
    model_name = "meta-llama/Llama-3.2-3B-Instruct"

    # Load tokenizer and model
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        device_map="auto",
        torch_dtype="float32"
    )

    # Create a Hugging Face pipeline
    llm_pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer)

    # Wrap pipeline in LangChain-compatible LLM
    llm = HuggingFacePipeline(pipeline=llm_pipeline)

    # Set up memory for conversational chain
    memory = ConversationBufferMemory(
        memory_key='chat_history', return_messages=True
    )

    return ConversationalRetrievalChain.from_llm(
        llm=llm,
        retriever=vectorstore.as_retriever(),
        memory=memory
    )

def main(pdf_path, question):
    """
    Main function to process a PDF and answer a question.

    Args:
        pdf_path (str): Path to the PDF file.
        question (str): The question to ask about the PDF content.
    """
    print("Reading PDF...")
    raw_text = get_pdf_text(pdf_path)

    print("Splitting text into chunks...")
    text_chunks = get_text_chunks(raw_text)

    print("Creating vector store...")
    vectorstore = get_vectorstore(text_chunks)

    print("Setting up conversation chain...")
    conversation_chain = get_conversation_chain(vectorstore)

    print("Generating response...")
    response = conversation_chain.run(question)

    # Check if response is a string or dictionary and print accordingly
    if isinstance(response, str):
        print("Response:")
        print(response)
    else:
        print("Response:")
        print(response.get('result', 'No result found'))  # In case it's a dictionary

if __name__ == "__main__":
    # Replace 'path_to_pdf.pdf' with the actual PDF file path
    pdf_path = "/content/The_Lightning_Thief_-_Percy_Jackson_1-10.pdf"

    # Replace 'your_question_here' with the actual question
    question = "What is the book about?"

    main(pdf_path, question)


Reading PDF...
Splitting text into chunks...
Creating vector store...


  embeddings = HuggingFaceEmbeddings(model_name="hkunlp/instructor-large")
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Setting up conversation chain...


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Device set to use cpu
  llm = HuggingFacePipeline(pipeline=llm_pipeline)
  memory = ConversationBufferMemory(
  response = conversation_chain.run(question)


Generating response...


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Response:
Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

My name is Percy Jackson. 
I'm twelve years old. Until a few months ago, I was a 
boarding student at Yancy Academy, a private school for 
troubled kids in upstate New York. 
Am I a troubled kid? 
Yeah. You could say that. 
[1] I could start at any point in my short miserable life 
to prove it, but things really started going bad last May, 
when our sixth-grade class took a field trip to Manhattan— 
twenty-eight mental-case kids and two teachers on a yellow 
school bus, heading to the Metropolitan Museum of Art to 
look at ancient Greek and Roman stuff. 
I know—it sounds like torture. Most Yancy field trips 
were. 
But Mr. Brunner, our Latin teacher, was leading this 
trip, so I had hopes. 
Mr. Brunner was this middle-aged guy in a motorized 
wheelchair. He had thinning hair and a scruffy beard and a 
frayed tweed 

https://github.com/scorpionTaj/Chat-with-LLaMA-2-and-PDFs/issues

https://github.com/scorpionTaj/Chat-with-LLaMA-2-and-PDFs/issues/1

شغال جيد

In [None]:
from PyPDF2 import PdfReader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory
from langchain.llms import HuggingFacePipeline
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from concurrent.futures import ThreadPoolExecutor

def get_pdf_text(pdf_path):
    """
    Reads text from a PDF file.

    Args:
        pdf_path (str): Path to the PDF file.

    Returns:
        str: Combined text content from the PDF file.
    """
    text = ""
    pdf_reader = PdfReader(pdf_path)
    for page in pdf_reader.pages:
        if page.extract_text():  # Handle cases where text extraction may fail
            text += page.extract_text()
    return text

def get_text_chunks(text):
    """
    Splits a large text into smaller chunks for processing.

    Args:
        text (str): The input text to split.

    Returns:
        list: List of text chunks.
    """
    text_splitter = CharacterTextSplitter(
        separator="\n",
        chunk_size=1000,
        chunk_overlap=200,
        length_function=len
    )
    return text_splitter.split_text(text)

def get_vectorstore(text_chunks):
    """
    Creates a FAISS vector store for semantic search.

    Args:
        text_chunks (list): List of text chunks.

    Returns:
        FAISS: A FAISS vector store.
    """
    embeddings = HuggingFaceEmbeddings(model_name="hkunlp/instructor-large")
    return FAISS.from_texts(texts=text_chunks, embedding=embeddings)

def get_conversation_chain(vectorstore):
    """
    Sets up a conversational retrieval chain using the LLaMA-3.2-3B-Instruct model.

    Args:
        vectorstore (FAISS): The vector store for document retrieval.

    Returns:
        ConversationalRetrievalChain: A LangChain conversation chain.
    """
    model_name = "meta-llama/Llama-3.2-3B-Instruct"

    # Load tokenizer and model
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        device_map="auto",
        torch_dtype="float32"
    )

    # Create a Hugging Face pipeline
    llm_pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer)

    # Wrap pipeline in LangChain-compatible LLM
    llm = HuggingFacePipeline(pipeline=llm_pipeline)

    # Set up memory for conversational chain
    memory = ConversationBufferMemory(
        memory_key='chat_history', return_messages=True
    )

    return ConversationalRetrievalChain.from_llm(
        llm=llm,
        retriever=vectorstore.as_retriever(),
        memory=memory
    )

def chat_processor(chat_data):
    """
    Processes the chat and gets a response from the conversation chain.

    Args:
        chat_data (dict): The chat data containing the question.

    Returns:
        str: The generated response.
    """
    try:
        question = chat_data['question']
        # Generate response using the conversation chain
        response = conversation_chain.run(question)
        return response
    except Exception as e:
        print(f"Error in chat_processor: {str(e)}")
        return "Error occurred while generating response."

def main(pdf_path, question):
    """
    Main function to process a PDF and answer a question.

    Args:
        pdf_path (str): Path to the PDF file.
        question (str): The question to ask about the PDF content.
    """
    print("Reading PDF...")
    raw_text = get_pdf_text(pdf_path)

    print("Splitting text into chunks...")
    text_chunks = get_text_chunks(raw_text)

    print("Creating vector store...")
    vectorstore = get_vectorstore(text_chunks)

    print("Setting up conversation chain...")
    global conversation_chain
    conversation_chain = get_conversation_chain(vectorstore)

    print("Generating response...")
    # Run the chat processing in parallel
    with ThreadPoolExecutor(max_workers=2) as executor:
        responses = list(executor.map(chat_processor, [{'question': question}]))

    # Only print the last response
    print("Response:")
    print(responses[-1])

if __name__ == "__main__":
    # Replace 'path_to_pdf.pdf' with the actual PDF file path
    pdf_path = "/content/The_Lightning_Thief_-_Percy_Jackson_1-10.pdf"

    # Replace 'your_question_here' with the actual question
    question = "What is the name of the hero in the book?"

    main(pdf_path, question)


Reading PDF...
Splitting text into chunks...
Creating vector store...


  embeddings = HuggingFaceEmbeddings(model_name="hkunlp/instructor-large")
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Setting up conversation chain...


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Device set to use cpu
  llm = HuggingFacePipeline(pipeline=llm_pipeline)
  memory = ConversationBufferMemory(
  response = conversation_chain.run(question)


Generating response...


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Response:
Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

RICK RIORDAN 
MIRAMAX BOOKS 
HYPERION BOOKS FOR CHILDREN 
NEW YORK Copyright © 2005 by Rick Riordan 
All rights reserved. No part of this book may be reproduced or transmitted in any 
form or by any means, electronic or mechanical, including photocopying, recording, 
or by any information storage and retrieval system, without written permission from 
the publisher. For information address Hyperion Books for Children, 
114 Fifth Avenue, New York, New York 10011-5690. 
First Edition 
5 7 9 10 8 6 
Printed in the United States of America 
Library of Congress Cataloging-in-Publication Data on file. 
ISBN 0-7868-5629-7 (hardcover) 
Reinforced binding 
Visit www.hyperionbooksforchildren.com To Haley, 
who heard the story first 1 · I Accidentally Vaporize My Pre-algebra Teacher 1 
2 · Three Old Ladies Knit the Socks of De

شغال

In [None]:
from PyPDF2 import PdfReader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory
from langchain.llms import HuggingFacePipeline
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from concurrent.futures import ThreadPoolExecutor

def get_pdf_text(pdf_path):
    """
    Reads text from a PDF file.

    Args:
        pdf_path (str): Path to the PDF file.

    Returns:
        str: Combined text content from the PDF file.
    """
    text = ""
    pdf_reader = PdfReader(pdf_path)
    for page in pdf_reader.pages:
        if page.extract_text():  # Handle cases where text extraction may fail
            text += page.extract_text()
    return text

def get_text_chunks(text):
    """
    Splits a large text into smaller chunks for processing.

    Args:
        text (str): The input text to split.

    Returns:
        list: List of text chunks.
    """
    text_splitter = CharacterTextSplitter(
        separator="\n",
        chunk_size=1000,
        chunk_overlap=200,
        length_function=len
    )
    return text_splitter.split_text(text)

def get_vectorstore(text_chunks):
    """
    Creates a FAISS vector store for semantic search.

    Args:
        text_chunks (list): List of text chunks.

    Returns:
        FAISS: A FAISS vector store.
    """
    embeddings = HuggingFaceEmbeddings(model_name="hkunlp/instructor-large")
    return FAISS.from_texts(texts=text_chunks, embedding=embeddings)

def get_conversation_chain(vectorstore):
    """
    Sets up a conversational retrieval chain using the LLaMA-3.2-3B-Instruct model.

    Args:
        vectorstore (FAISS): The vector store for document retrieval.

    Returns:
        ConversationalRetrievalChain: A LangChain conversation chain.
    """
    model_name = "meta-llama/Llama-3.2-3B-Instruct"

    # Load tokenizer and model
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        device_map="auto",
        torch_dtype="float32"
    )

    # Create a Hugging Face pipeline
    llm_pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer)

    # Wrap pipeline in LangChain-compatible LLM
    llm = HuggingFacePipeline(pipeline=llm_pipeline)

    # Set up memory for conversational chain
    memory = ConversationBufferMemory(
        memory_key='chat_history', return_messages=True
    )

    return ConversationalRetrievalChain.from_llm(
        llm=llm,
        retriever=vectorstore.as_retriever(),
        memory=memory
    )

def chat_processor(chat_data):
    """
    Processes the chat and gets a response from the conversation chain.

    Args:
        chat_data (dict): The chat data containing the question.

    Returns:
        str: The generated response.
    """
    try:
        question = chat_data['question']
        # Generate response using the conversation chain
        response = conversation_chain.run(question)
        return response
    except Exception as e:
        print(f"Error in chat_processor: {str(e)}")
        return "Error occurred while generating response."

def main(pdf_path, question):
    """
    Main function to process a PDF and answer a question.

    Args:
        pdf_path (str): Path to the PDF file.
        question (str): The question to ask about the PDF content.
    """
    print("Reading PDF...")  # This can be kept to show process, but content is not printed.
    raw_text = get_pdf_text(pdf_path)

    print("Splitting text into chunks...")  # The splitting process is kept, no text is shown.
    text_chunks = get_text_chunks(raw_text)

    print("Creating vector store...")  # Creating the vector store, no text is displayed.
    vectorstore = get_vectorstore(text_chunks)

    print("Setting up conversation chain...")  # Setting up the chain without showing intermediate content.
    global conversation_chain
    conversation_chain = get_conversation_chain(vectorstore)

    print("Generating response...")  # This is kept to show process, but no intermediate text is printed.
    # Run the chat processing in parallel
    with ThreadPoolExecutor(max_workers=2) as executor:
        responses = list(executor.map(chat_processor, [{'question': question}]))

    # Only print the last response
    print("Response:")
    print(responses[-1])

if __name__ == "__main__":
    # Replace 'path_to_pdf.pdf' with the actual PDF file path
    pdf_path = "/content/The_Lightning_Thief_-_Percy_Jackson_1-10.pdf"

    # Replace 'your_question_here' with the actual question
    question = "Who is Percy in the book and what does he do?"

    main(pdf_path, question)


Reading PDF...
Splitting text into chunks...
Creating vector store...


  embeddings = HuggingFaceEmbeddings(model_name="hkunlp/instructor-large")
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Setting up conversation chain...


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Device set to use cpu
  llm = HuggingFacePipeline(pipeline=llm_pipeline)
  memory = ConversationBufferMemory(
  response = conversation_chain.run(question)


Generating response...


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Response:
Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

My name is Percy Jackson. 
I'm twelve years old. Until a few months ago, I was a 
boarding student at Yancy Academy, a private school for 
troubled kids in upstate New York. 
Am I a troubled kid? 
Yeah. You could say that. 
[1] I could start at any point in my short miserable life 
to prove it, but things really started going bad last May, 
when our sixth-grade class took a field trip to Manhattan— 
twenty-eight mental-case kids and two teachers on a yellow 
school bus, heading to the Metropolitan Museum of Art to 
look at ancient Greek and Roman stuff. 
I know—it sounds like torture. Most Yancy field trips 
were. 
But Mr. Brunner, our Latin teacher, was leading this 
trip, so I had hopes. 
Mr. Brunner was this middle-aged guy in a motorized 
wheelchair. He had thinning hair and a scruffy beard and a 
frayed tweed 

شغال

In [None]:
from PyPDF2 import PdfReader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory
from langchain.llms import HuggingFacePipeline
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from concurrent.futures import ThreadPoolExecutor

def get_pdf_text(pdf_path):
    """
    Reads text from a PDF file.

    Args:
        pdf_path (str): Path to the PDF file.

    Returns:
        str: Combined text content from the PDF file.
    """
    text = ""
    pdf_reader = PdfReader(pdf_path)
    for page in pdf_reader.pages:
        if page.extract_text():  # Handle cases where text extraction may fail
            text += page.extract_text()
    return text

def get_text_chunks(text):
    """
    Splits a large text into smaller chunks for processing.

    Args:
        text (str): The input text to split.

    Returns:
        list: List of text chunks.
    """
    text_splitter = CharacterTextSplitter(
        separator="\n",
        chunk_size=1000,
        chunk_overlap=200,
        length_function=len
    )
    return text_splitter.split_text(text)

def get_vectorstore(text_chunks):
    """
    Creates a FAISS vector store for semantic search.

    Args:
        text_chunks (list): List of text chunks.

    Returns:
        FAISS: A FAISS vector store.
    """
    embeddings = HuggingFaceEmbeddings(model_name="hkunlp/instructor-large")
    return FAISS.from_texts(texts=text_chunks, embedding=embeddings)

def get_conversation_chain(vectorstore):
    """
    Sets up a conversational retrieval chain using the LLaMA-3.2-3B-Instruct model.

    Args:
        vectorstore (FAISS): The vector store for document retrieval.

    Returns:
        ConversationalRetrievalChain: A LangChain conversation chain.
    """
    model_name = "meta-llama/Llama-3.2-3B-Instruct"

    # Load tokenizer and model
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        device_map="auto",
        torch_dtype="float32"
    )

    # Create a Hugging Face pipeline
    llm_pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer)

    # Wrap pipeline in LangChain-compatible LLM
    llm = HuggingFacePipeline(pipeline=llm_pipeline)

    # Set up memory for conversational chain
    memory = ConversationBufferMemory(
        memory_key='chat_history', return_messages=True
    )

    return ConversationalRetrievalChain.from_llm(
        llm=llm,
        retriever=vectorstore.as_retriever(),
        memory=memory
    )

def chat_processor(chat_data):
    """
    Processes the chat and gets a response from the conversation chain.

    Args:
        chat_data (dict): The chat data containing the question.

    Returns:
        str: The generated response.
    """
    try:
        question = chat_data['question']
        # Generate response using the conversation chain
        response = conversation_chain.run(question)
        return response
    except Exception as e:
        print(f"Error in chat_processor: {str(e)}")
        return "Error occurred while generating response."

def main(pdf_path, question):
    """
    Main function to process a PDF and answer a question.

    Args:
        pdf_path (str): Path to the PDF file.
        question (str): The question to ask about the PDF content.
    """
    print("Reading PDF...")  # This can be kept to show process, but content is not printed.
    raw_text = get_pdf_text(pdf_path)

    print("Splitting text into chunks...")  # The splitting process is kept, no text is shown.
    text_chunks = get_text_chunks(raw_text)

    print("Creating vector store...")  # Creating the vector store, no text is displayed.
    vectorstore = get_vectorstore(text_chunks)

    print("Setting up conversation chain...")  # Setting up the chain without showing intermediate content.
    global conversation_chain
    conversation_chain = get_conversation_chain(vectorstore)

    print("Generating response...")  # This is kept to show process, but no intermediate text is printed.
    # Run the chat processing in parallel
    with ThreadPoolExecutor(max_workers=2) as executor:
        responses = list(executor.map(chat_processor, [{'question': question}]))

    # Only print the last response
    print("Response:")
    print(responses[-1])

if __name__ == "__main__":
    # Replace 'path_to_pdf.pdf' with the actual PDF file path
    pdf_path = "/content/The_Lightning_Thief_-_Percy_Jackson_1-10.pdf"

    # Replace 'your_question_here' with the actual question
    question = "How did Percy's story begin?"

    main(pdf_path, question)


Reading PDF...
Splitting text into chunks...
Creating vector store...


  embeddings = HuggingFaceEmbeddings(model_name="hkunlp/instructor-large")
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Setting up conversation chain...


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Device set to use cpu
  llm = HuggingFacePipeline(pipeline=llm_pipeline)
  memory = ConversationBufferMemory(
  response = conversation_chain.run(question)


Generating response...


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Response:
Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

My name is Percy Jackson. 
I'm twelve years old. Until a few months ago, I was a 
boarding student at Yancy Academy, a private school for 
troubled kids in upstate New York. 
Am I a troubled kid? 
Yeah. You could say that. 
[1] I could start at any point in my short miserable life 
to prove it, but things really started going bad last May, 
when our sixth-grade class took a field trip to Manhattan— 
twenty-eight mental-case kids and two teachers on a yellow 
school bus, heading to the Metropolitan Museum of Art to 
look at ancient Greek and Roman stuff. 
I know—it sounds like torture. Most Yancy field trips 
were. 
But Mr. Brunner, our Latin teacher, was leading this 
trip, so I had hopes. 
Mr. Brunner was this middle-aged guy in a motorized 
wheelchair. He had thinning hair and a scruffy beard and a 
frayed tweed 

In [None]:
from groq import Groq

client = Groq()
completion = client.chat.completions.create(
    model="llama3-8b-8192",
    messages=[],
    temperature=1,
    max_tokens=1024,
    top_p=1,
    stream=True,
    stop=None,
)

for chunk in completion:
    print(chunk.choices[0].delta.content or "", end="")


ModuleNotFoundError: No module named 'groq'

In [None]:
ها أنا قد كتبت لك برنامج بسيط بلغة بايثون لإنشاء شات مع الكتب والكود، يستخدم نماذج hkunlp/instructor-large و meta-llama/Llama-3.2-3B-Instruct، و يتم تشغيله على المعالج فقط بدون GPU. يمكنك تشغيله في كولاب جوجل.

```python
# استيراد المكتبات الضرورية
import torch
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer

# تحميل النماذج
model1 = AutoModelForSeq2SeqLM.from_pretrained("hkunlp/instructor-large")
tokenizer1 = AutoTokenizer.from_pretrained("hkunlp/instructor-large")

model2 = AutoModelForSeq2SeqLM.from_pretrained("meta-llama/Llama-3.2-3B-Instruct")
tokenizer2 = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-3B-Instruct")

# ضبط Device على المعالج فقط
device = torch.device("cpu")

# نقل النماذج إلى المعالج
model1.to(device)
model2.to(device)

def chat_with_book(question, model, tokenizer):
    # معالجة السؤال
    inputs = tokenizer.encode_plus(
        question,
        add_special_tokens=True,
        max_length=512,
        return_attention_mask=True,
        return_tensors="pt"
    )

    # تمرير السؤال من خلال النموذج
    outputs = model.generate(
        inputs["input_ids"],
        attention_mask=inputs["attention_mask"],
        max_length=512
    )

    # معالجة الإجابة
    answer = tokenizer.decode(outputs[0], skip_special_tokens=True)

    return answer

# تقديم سؤال
question = input("ادخل سؤالك: ")

# الحصول على إجابة من النموذج الأول
answer1 = chat_with_book(question, model1, tokenizer1)

# الحصول على إجابة من النموذج الثاني
answer2 = chat_with_book(question, model2, tokenizer2)

# طباعة الإجابات
print("الإجابة من النموذج الأول: ", answer1)
print("الإجابة من النموذج الثاني: ", answer2)
```

In [None]:
# تثبيت المكتبات اللازمة
#!pip install transformers sentence-transformers accelerate torch PyPDF2

import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from sentence_transformers import SentenceTransformer
from pathlib import Path
import os
import PyPDF2

# التحقق من توفر المعالج (CPU)
if not torch.cuda.is_available():
    print("سيتم استخدام المعالج (CPU) نظرًا لعدم توفر معالج الرسوميات (GPU).")
else:
    print("تم الكشف عن معالج الرسوميات (GPU)، ولكن سيتم استخدام المعالج (CPU) حسب طلبك.")

# تحديد مسار الكتاب (يمكنك تغييره)
book_path = 'book.pdf'  # استبدل هذا بالمسار الفعلي للكتاب PDF الخاص بك

# 1. معالجة الكتاب: استخراج النص وتقسيمه إلى فقرات (تعديل لملفات PDF)
def load_and_split_book(file_path):
    if not os.path.exists(file_path):
        print(f"لم يتم العثور على الملف {file_path}")
        return []
    try:
        with open(file_path, 'rb') as file:
            pdf_reader = PyPDF2.PdfReader(file)
            text = ""
            for page_num in range(len(pdf_reader.pages)):
                page = pdf_reader.pages[page_num]
                text += page.extract_text()
            paragraphs = text.split('\n\n')  # تقسيم النص إلى فقرات بناءً على فاصل سطرين فارغين
            return paragraphs
    except Exception as e:
         print(f"حدث خطأ أثناء معالجة ملف PDF: {e}")
         return []

# 2. تحميل النموذج instructor-large للتمثيل النصي
print("جاري تحميل نموذج instructor-large...")
instructor_model = SentenceTransformer('hkunlp/instructor-large', device='cpu')
print("تم تحميل نموذج instructor-large.")

# 3. إنشاء تمثيلات نصية (embeddings) للفقرات
def generate_embeddings(paragraphs, model):
    embeddings = model.encode(paragraphs, show_progress_bar=True)
    return embeddings

# 4. تحميل نموذج Llama-3.2-3B-Instruct للدردشة
print("جاري تحميل نموذج Llama-3.2-3B-Instruct...")
llama_tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3-8B-Instruct")
llama_model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-3-8B-Instruct", device_map="cpu",low_cpu_mem_usage=True)
print("تم تحميل نموذج Llama-3.2-3B-Instruct.")

# 5. وظيفة البحث عن الفقرات الأكثر صلة بالسؤال
def search_relevant_paragraphs(query, embeddings, paragraphs, model, top_k=5):
    query_embedding = model.encode(query)
    similarities = torch.nn.functional.cosine_similarity(torch.tensor(query_embedding).unsqueeze(0), torch.tensor(embeddings))
    top_indices = torch.topk(similarities, k=top_k).indices
    return [paragraphs[i] for i in top_indices]

# 6. وظيفة إنشاء الإجابة باستخدام Llama-3.2-3B-Instruct
def generate_answer(query, context, tokenizer, model):
    prompt = f"Based on the following context, answer the question: Context: {context}\n\nQuestion: {query}\n\nAnswer:"
    inputs = tokenizer(prompt, return_tensors="pt")
    outputs = model.generate(**inputs, max_length=200,do_sample=True, top_k=50, top_p=0.95)
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response

# 7. الواجهة الرئيسية للدردشة
def chat_with_book(book_path):
    paragraphs = load_and_split_book(book_path)

    if not paragraphs:
        print("لا يمكن بدء الدردشة، الرجاء التأكد من وجود الكتاب.")
        return

    embeddings = generate_embeddings(paragraphs, instructor_model)

    while True:
        query = input("اطرح سؤالك (أو اكتب 'خروج' للخروج): ")
        if query.lower() == 'خروج':
            break

        relevant_paragraphs = search_relevant_paragraphs(query, embeddings, paragraphs, instructor_model)
        context = " ".join(relevant_paragraphs)

        answer = generate_answer(query, context, llama_tokenizer, llama_model)
        print("الإجابة:", answer)

# بدء الدردشة
if __name__ == "__main__":
  chat_with_book(book_path)

In [None]:
# تثبيت المكتبات اللازمة
#!pip install transformers sentence-transformers accelerate torch PyPDF2

import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from sentence_transformers import SentenceTransformer
from pathlib import Path
import os
import PyPDF2

# التحقق من توفر المعالج (CPU)
if not torch.cuda.is_available():
    print("سيتم استخدام المعالج (CPU) نظرًا لعدم توفر معالج الرسوميات (GPU).")
else:
    print("تم الكشف عن معالج الرسوميات (GPU)، ولكن سيتم استخدام المعالج (CPU) حسب طلبك.")

# تحديد مسار الكتاب (يمكنك تغييره)
book_path = '/content/The_Lightning_Thief_-_Percy_Jackson_1-10.pdf'  # استبدل هذا بالمسار الفعلي للكتاب PDF الخاص بك

# 1. معالجة الكتاب: استخراج النص وتقسيمه إلى فقرات (تعديل لملفات PDF)
def load_and_split_book(file_path):
    if not os.path.exists(file_path):
        print(f"لم يتم العثور على الملف {file_path}")
        return []
    try:
        with open(file_path, 'rb') as file:
            pdf_reader = PyPDF2.PdfReader(file)
            text = ""
            for page_num in range(len(pdf_reader.pages)):
                page = pdf_reader.pages[page_num]
                text += page.extract_text()
            paragraphs = text.split('\n\n')  # تقسيم النص إلى فقرات بناءً على فاصل سطرين فارغين
            return paragraphs
    except Exception as e:
         print(f"حدث خطأ أثناء معالجة ملف PDF: {e}")
         return []

# 2. تحميل النموذج instructor-large للتمثيل النصي
print("جاري تحميل نموذج instructor-large...")
instructor_model = SentenceTransformer('hkunlp/instructor-large')
print("تم تحميل نموذج instructor-large.")

# 3. إنشاء تمثيلات نصية (embeddings) للفقرات
def generate_embeddings(paragraphs, model):
    embeddings = model.encode(paragraphs, show_progress_bar=True)
    return embeddings

# 4. تحميل نموذج Llama-3.2-3B-Instruct للدردشة
print("جاري تحميل نموذج Llama-3.2-3B-Instruct...")
llama_tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-3B-Instruct")
llama_model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-3.2-3B-Instruct", device_map="auto", torch_dtype="float32", low_cpu_mem_usage=True)
print("تم تحميل نموذج Llama-3.2-3B-Instruct.")

# 5. وظيفة البحث عن الفقرات الأكثر صلة بالسؤال
def search_relevant_paragraphs(query, embeddings, paragraphs, model, top_k=5):
    query_embedding = model.encode(query)
    similarities = torch.nn.functional.cosine_similarity(torch.tensor(query_embedding).unsqueeze(0), torch.tensor(embeddings))
    top_indices = torch.topk(similarities, k=top_k).indices
    return [paragraphs[i] for i in top_indices]

# 6. وظيفة إنشاء الإجابة باستخدام Llama-3.2-3B-Instruct
def generate_answer(query, context, tokenizer, model):
    prompt = f"Based on the following context, answer the question: Context: {context}\n\nQuestion: {query}\n\nAnswer:"
    inputs = tokenizer(prompt, return_tensors="pt")
    outputs = model.generate(**inputs, max_length=200,do_sample=True, top_k=40, top_p=0.90)
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response

# 7. الواجهة الرئيسية للدردشة
def chat_with_book(book_path):
    paragraphs = load_and_split_book(book_path)

    if not paragraphs:
        print("لا يمكن بدء الدردشة، الرجاء التأكد من وجود الكتاب.")
        return

    embeddings = generate_embeddings(paragraphs, instructor_model)

    while True:
        query = input("اطرح سؤالك (أو اكتب 'خروج' للخروج): ")
        if query.lower() == 'خروج':
            break

        relevant_paragraphs = search_relevant_paragraphs(query, embeddings, paragraphs, instructor_model)
        context = " ".join(relevant_paragraphs)

        answer = generate_answer(query, context, llama_tokenizer, llama_model)
        print("الإجابة:", answer)

# بدء الدردشة
if __name__ == "__main__":
  chat_with_book(book_path)

سيتم استخدام المعالج (CPU) نظرًا لعدم توفر معالج الرسوميات (GPU).
جاري تحميل نموذج instructor-large...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


تم تحميل نموذج instructor-large.
جاري تحميل نموذج Llama-3.2-3B-Instruct...


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]



تم تحميل نموذج Llama-3.2-3B-Instruct.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

اطرح سؤالك (أو اكتب 'خروج' للخروج): Who is the hero of the story?


RuntimeError: selected index k out of range

In [None]:
!huggingface-cli login




    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|

    A token is already saved on your machine. Run `huggingface-cli whoami` to get more information or `huggingface-cli logout` if you want to log out.
    Setting a new token will erase the existing one.
    To log in, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
Enter your token (input will not be visible): 
Add token as git credential? (Y/n) Y
Token is valid (permission: read)

In [None]:
# تثبيت المكتبات اللازمة
#!pip install transformers sentence-transformers accelerate torch PyPDF2

import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from sentence_transformers import SentenceTransformer
from pathlib import Path
import os
import PyPDF2

# التحقق من توفر المعالج (CPU)
if not torch.cuda.is_available():
    print("سيتم استخدام المعالج (CPU) نظرًا لعدم توفر معالج الرسوميات (GPU).")
else:
    print("تم الكشف عن معالج الرسوميات (GPU)، ولكن سيتم استخدام المعالج (CPU) حسب طلبك.")

# تحديد مسار الكتاب (يمكنك تغييره)
book_path = '/content/The_Lightning_Thief_-_Percy_Jackson_1-10.pdf'  # استبدل هذا بالمسار الفعلي للكتاب PDF الخاص بك

# 1. معالجة الكتاب: استخراج النص وتقسيمه إلى فقرات (تعديل لملفات PDF)
def load_and_split_book(file_path):
    if not os.path.exists(file_path):
        print(f"لم يتم العثور على الملف {file_path}")
        return []
    try:
        with open(file_path, 'rb') as file:
            pdf_reader = PyPDF2.PdfReader(file)
            text = ""
            for page_num in range(len(pdf_reader.pages)):
                page = pdf_reader.pages[page_num]
                text += page.extract_text()
            paragraphs = text.split('\n\n')  # تقسيم النص إلى فقرات بناءً على فاصل سطرين فارغين
            return paragraphs
    except Exception as e:
         print(f"حدث خطأ أثناء معالجة ملف PDF: {e}")
         return []

# 2. تحميل النموذج instructor-large للتمثيل النصي
print("جاري تحميل نموذج instructor-large...")
instructor_model = SentenceTransformer('hkunlp/instructor-large', device='cpu')
print("تم تحميل نموذج instructor-large.")

# 3. إنشاء تمثيلات نصية (embeddings) للفقرات
def generate_embeddings(paragraphs, model):
    embeddings = model.encode(paragraphs, show_progress_bar=True)
    return embeddings

# 4. تحميل نموذج Llama-3.2-3B-Instruct للدردشة
print("جاري تحميل نموذج Llama-3.2-3B-Instruct...")
llama_tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-3B-Instruct")
llama_model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-3.2-3B-Instruct", device_map="auto", torch_dtype="float32", low_cpu_mem_usage=True)
print("تم تحميل نموذج Llama-3.2-3B-Instruct.")

# 5. وظيفة البحث عن الفقرات الأكثر صلة بالسؤال (تعديل لحل الخطأ)
def search_relevant_paragraphs(query, embeddings, paragraphs, model, top_k=5):
    num_paragraphs = len(paragraphs)
    if num_paragraphs == 0:
      return [] # إرجاع قائمة فارغة إذا لم يكن هناك فقرات
    if num_paragraphs < top_k:
        top_k = num_paragraphs # تعديل قيمة k لتتناسب مع عدد الفقرات إذا كانت أقل منها

    query_embedding = model.encode(query)
    similarities = torch.nn.functional.cosine_similarity(torch.tensor(query_embedding).unsqueeze(0), torch.tensor(embeddings))
    top_indices = torch.topk(similarities, k=top_k).indices
    return [paragraphs[i] for i in top_indices]

# 6. وظيفة إنشاء الإجابة باستخدام Llama-3.2-3B-Instruct
def generate_answer(query, context, tokenizer, model):
    prompt = f"Based on the following context, answer the question: Context: {context}\n\nQuestion: {query}\n\nAnswer:"
    inputs = tokenizer(prompt, return_tensors="pt")
    outputs = model.generate(**inputs, max_length=200,do_sample=True, top_k=50, top_p=0.95)
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response

# 7. الواجهة الرئيسية للدردشة
def chat_with_book(book_path):
    paragraphs = load_and_split_book(book_path)

    if not paragraphs:
        print("لا يمكن بدء الدردشة، الرجاء التأكد من وجود الكتاب.")
        return

    embeddings = generate_embeddings(paragraphs, instructor_model)

    while True:
        query = input("اطرح سؤالك (أو اكتب 'خروج' للخروج): ")
        if query.lower() == 'خروج':
            break

        relevant_paragraphs = search_relevant_paragraphs(query, embeddings, paragraphs, instructor_model)
        context = " ".join(relevant_paragraphs)

        answer = generate_answer(query, context, llama_tokenizer, llama_model)
        print("الإجابة:", answer)

# بدء الدردشة
if __name__ == "__main__":
  chat_with_book(book_path)

سيتم استخدام المعالج (CPU) نظرًا لعدم توفر معالج الرسوميات (GPU).
جاري تحميل نموذج instructor-large...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


تم تحميل نموذج instructor-large.
جاري تحميل نموذج Llama-3.2-3B-Instruct...


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]



تم تحميل نموذج Llama-3.2-3B-Instruct.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

اطرح سؤالك (أو اكتب 'خروج' للخروج): Who is the hero of the story?


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


ValueError: Input length of input_ids is 2585, but `max_length` is set to 200. This can lead to unexpected behavior. You should consider increasing `max_length` or, better yet, setting `max_new_tokens`.

In [None]:
# تثبيت المكتبات اللازمة
#!pip install transformers sentence-transformers accelerate torch PyPDF2

import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from sentence_transformers import SentenceTransformer
from pathlib import Path
import os
import PyPDF2

# التحقق من توفر المعالج (CPU)
if not torch.cuda.is_available():
    print("سيتم استخدام المعالج (CPU) نظرًا لعدم توفر معالج الرسوميات (GPU).")
else:
    print("تم الكشف عن معالج الرسوميات (GPU)، ولكن سيتم استخدام المعالج (CPU) حسب طلبك.")

# تحديد مسار الكتاب (يمكنك تغييره)
book_path = '/content/The_Lightning_Thief_-_Percy_Jackson_1-10.pdf'  # استبدل هذا بالمسار الفعلي للكتاب PDF الخاص بك

# 1. معالجة الكتاب: استخراج النص وتقسيمه إلى فقرات (تعديل لملفات PDF)
def load_and_split_book(file_path):
    if not os.path.exists(file_path):
        print(f"لم يتم العثور على الملف {file_path}")
        return []
    try:
        with open(file_path, 'rb') as file:
            pdf_reader = PyPDF2.PdfReader(file)
            text = ""
            for page_num in range(len(pdf_reader.pages)):
                page = pdf_reader.pages[page_num]
                text += page.extract_text()
            paragraphs = text.split('\n\n')  # تقسيم النص إلى فقرات بناءً على فاصل سطرين فارغين
            return paragraphs
    except Exception as e:
         print(f"حدث خطأ أثناء معالجة ملف PDF: {e}")
         return []

# 2. تحميل النموذج instructor-large للتمثيل النصي
print("جاري تحميل نموذج instructor-large...")
instructor_model = SentenceTransformer('hkunlp/instructor-large')
print("تم تحميل نموذج instructor-large.")

# 3. إنشاء تمثيلات نصية (embeddings) للفقرات
def generate_embeddings(paragraphs, model):
    embeddings = model.encode(paragraphs, show_progress_bar=True)
    return embeddings

# 4. تحميل نموذج Llama-3.2-3B-Instruct للدردشة
print("جاري تحميل نموذج Llama-3.2-3B-Instruct...")
llama_tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-3B-Instruct")
llama_model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-3.2-3B-Instruct", device_map="auto", torch_dtype="float32", low_cpu_mem_usage=True)
print("تم تحميل نموذج Llama-3.2-3B-Instruct.")

# 5. وظيفة البحث عن الفقرات الأكثر صلة بالسؤال (تعديل لحل الخطأ)
def search_relevant_paragraphs(query, embeddings, paragraphs, model, top_k=5):
    num_paragraphs = len(paragraphs)
    if num_paragraphs == 0:
        return []  # إرجاع قائمة فارغة إذا لم يكن هناك فقرات
    if num_paragraphs < top_k:
        top_k = num_paragraphs  # تعديل قيمة k لتتناسب مع عدد الفقرات إذا كانت أقل منها

    query_embedding = model.encode(query)
    similarities = torch.nn.functional.cosine_similarity(torch.tensor(query_embedding).unsqueeze(0), torch.tensor(embeddings))
    top_indices = torch.topk(similarities, k=top_k).indices
    return [paragraphs[i] for i in top_indices]

# 6. وظيفة إنشاء الإجابة باستخدام Llama-3.2-3B-Instruct (تعديل لاستخدام max_new_tokens)
def generate_answer(query, context, tokenizer, model):
    prompt = f"Based on the following context, answer the question: Context: {context}\n\nQuestion: {query}\n\nAnswer:"
    inputs = tokenizer(prompt, return_tensors="pt")
    outputs = model.generate(**inputs, max_new_tokens=300, do_sample=True, top_k=50, top_p=0.95)
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response

# 7. الواجهة الرئيسية للدردشة
def chat_with_book(book_path):
    paragraphs = load_and_split_book(book_path)

    if not paragraphs:
        print("لا يمكن بدء الدردشة، الرجاء التأكد من وجود الكتاب.")
        return

    embeddings = generate_embeddings(paragraphs, instructor_model)

    while True:
        query = input("اطرح سؤالك (أو اكتب 'خروج' للخروج): ")
        if query.lower() == 'خروج':
            break

        relevant_paragraphs = search_relevant_paragraphs(query, embeddings, paragraphs, instructor_model)
        context = " ".join(relevant_paragraphs)

        answer = generate_answer(query, context, llama_tokenizer, llama_model)
        print("الإجابة:", answer)

# بدء الدردشة
if __name__ == "__main__":
    chat_with_book(book_path)

سيتم استخدام المعالج (CPU) نظرًا لعدم توفر معالج الرسوميات (GPU).
جاري تحميل نموذج instructor-large...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


تم تحميل نموذج instructor-large.
جاري تحميل نموذج Llama-3.2-3B-Instruct...


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]



تم تحميل نموذج Llama-3.2-3B-Instruct.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

اطرح سؤالك (أو اكتب 'خروج' للخروج): Who is the hero of the story?


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


In [None]:
# تثبيت المكتبات اللازمة
#!pip install transformers sentence-transformers accelerate torch PyPDF2

import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from sentence_transformers import SentenceTransformer
from pathlib import Path
import os
import PyPDF2

# التحقق من توفر المعالج (CPU)
if not torch.cuda.is_available():
    print("سيتم استخدام المعالج (CPU) نظرًا لعدم توفر معالج الرسوميات (GPU).")
else:
    print("تم الكشف عن معالج الرسوميات (GPU)، ولكن سيتم استخدام المعالج (CPU) حسب طلبك.")

# تحديد مسار الكتاب (يمكنك تغييره)
book_path = '/content/The_Lightning_Thief_-_Percy_Jackson_1-10.pdf'  # استبدل هذا بالمسار الفعلي للكتاب PDF الخاص بك

# 1. معالجة الكتاب: استخراج النص وتقسيمه إلى فقرات (تعديل لملفات PDF)
def load_and_split_book(file_path):
    if not os.path.exists(file_path):
        print(f"لم يتم العثور على الملف {file_path}")
        return []
    try:
        with open(file_path, 'rb') as file:
            pdf_reader = PyPDF2.PdfReader(file)
            text = ""
            for page_num in range(len(pdf_reader.pages)):
                page = pdf_reader.pages[page_num]
                text += page.extract_text()
            paragraphs = text.split('\n\n')  # تقسيم النص إلى فقرات بناءً على فاصل سطرين فارغين
            return paragraphs
    except Exception as e:
         print(f"حدث خطأ أثناء معالجة ملف PDF: {e}")
         return []

# 2. تحميل النموذج instructor-large للتمثيل النصي
print("جاري تحميل نموذج instructor-large...")
instructor_model = SentenceTransformer('hkunlp/instructor-large')
print("تم تحميل نموذج instructor-large.")

# 3. إنشاء تمثيلات نصية (embeddings) للفقرات
def generate_embeddings(paragraphs, model):
    embeddings = model.encode(paragraphs, show_progress_bar=True)
    return embeddings

# 4. تحميل نموذج Llama-3.2-3B-Instruct للدردشة
print("جاري تحميل نموذج Llama-3.2-3B-Instruct...")
llama_tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-3B-Instruct")
llama_model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-3.2-3B-Instruct", device_map="auto", torch_dtype="float32", low_cpu_mem_usage=True)
print("تم تحميل نموذج Llama-3.2-3B-Instruct.")

# 5. وظيفة البحث عن الفقرات الأكثر صلة بالسؤال (تعديل لحل الخطأ)
def search_relevant_paragraphs(query, embeddings, paragraphs, model, top_k=5):
    num_paragraphs = len(paragraphs)
    if num_paragraphs == 0:
        return []  # إرجاع قائمة فارغة إذا لم يكن هناك فقرات
    if num_paragraphs < top_k:
        top_k = num_paragraphs  # تعديل قيمة k لتتناسب مع عدد الفقرات إذا كانت أقل منها

    query_embedding = model.encode(query)
    similarities = torch.nn.functional.cosine_similarity(torch.tensor(query_embedding).unsqueeze(0), torch.tensor(embeddings))
    top_indices = torch.topk(similarities, k=top_k).indices
    return [paragraphs[i] for i in top_indices]

# 6. وظيفة إنشاء الإجابة باستخدام Llama-3.2-3B-Instruct (تعديل لاستخدام max_new_tokens)
def generate_answer(query, context, tokenizer, model):
    prompt = f"Based on the following context, answer the question: Context: {context}\n\nQuestion: {query}\n\nAnswer:"
    inputs = tokenizer(prompt, return_tensors="pt")
    outputs = model.generate(**inputs, max_new_tokens=30, do_sample=True, top_k=40, top_p=0.85)
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response

# 7. الواجهة الرئيسية للدردشة
def chat_with_book(book_path):
    paragraphs = load_and_split_book(book_path)

    if not paragraphs:
        print("لا يمكن بدء الدردشة، الرجاء التأكد من وجود الكتاب.")
        return

    embeddings = generate_embeddings(paragraphs, instructor_model)

    while True:
        query = input("اطرح سؤالك (أو اكتب 'خروج' للخروج): ")
        if query.lower() == 'خروج':
            break

        relevant_paragraphs = search_relevant_paragraphs(query, embeddings, paragraphs, instructor_model)
        context = " ".join(relevant_paragraphs)

        answer = generate_answer(query, context, llama_tokenizer, llama_model)
        print("الإجابة:", answer)

# بدء الدردشة
if __name__ == "__main__":
    chat_with_book(book_path)

In [None]:
# تثبيت المكتبات اللازمة
#!pip install transformers sentence-transformers accelerate torch PyPDF2

import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from sentence_transformers import SentenceTransformer
from pathlib import Path
import os
import PyPDF2

# التحقق من توفر المعالج (CPU)
if not torch.cuda.is_available():
    print("سيتم استخدام المعالج (CPU) نظرًا لعدم توفر معالج الرسوميات (GPU).")
else:
    print("تم الكشف عن معالج الرسوميات (GPU)، ولكن سيتم استخدام المعالج (CPU) حسب طلبك.")

# تحديد مسار الكتاب (يمكنك تغييره)
book_path = '/content/The_Lightning_Thief_-_Percy_Jackson_1-10.pdf'  # استبدل هذا بالمسار الفعلي للكتاب PDF الخاص بك

# 1. معالجة الكتاب: استخراج النص وتقسيمه إلى فقرات (تعديل لملفات PDF)
def load_and_split_book(file_path):
    if not os.path.exists(file_path):
        print(f"لم يتم العثور على الملف {file_path}")
        return []
    try:
        with open(file_path, 'rb') as file:
            pdf_reader = PyPDF2.PdfReader(file)
            text = ""
            for page_num in range(len(pdf_reader.pages)):
                page = pdf_reader.pages[page_num]
                text += page.extract_text()
            paragraphs = text.split('\n\n')  # تقسيم النص إلى فقرات بناءً على فاصل سطرين فارغين
            return paragraphs
    except Exception as e:
         print(f"حدث خطأ أثناء معالجة ملف PDF: {e}")
         return []

# 2. تحميل النموذج instructor-large للتمثيل النصي
print("جاري تحميل نموذج instructor-large...")
instructor_model = SentenceTransformer('hkunlp/instructor-large', device='cpu')
print("تم تحميل نموذج instructor-large.")

# 3. إنشاء تمثيلات نصية (embeddings) للفقرات
def generate_embeddings(paragraphs, model):
    embeddings = model.encode(paragraphs, show_progress_bar=True)
    return embeddings

# 4. تحميل نموذج Llama-3.2-3B-Instruct للدردشة (نموذج أصغر)
print("جاري تحميل نموذج Llama-3.2-3B-Instruct...")
llama_tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-3B-Instruct")
llama_model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-3.2-3B-Instruct", device_map="auto", torch_dtype="bfloat32", low_cpu_mem_usage=True)
print("تم تحميل نموذج Llama-3.2-3B-Instruct.")

# 5. وظيفة البحث عن الفقرات الأكثر صلة بالسؤال (تعديل لحل الخطأ)
def search_relevant_paragraphs(query, embeddings, paragraphs, model, top_k=5):
    num_paragraphs = len(paragraphs)
    if num_paragraphs == 0:
        return []  # إرجاع قائمة فارغة إذا لم يكن هناك فقرات
    if num_paragraphs < top_k:
        top_k = num_paragraphs  # تعديل قيمة k لتتناسب مع عدد الفقرات إذا كانت أقل منها

    query_embedding = model.encode(query)
    similarities = torch.nn.functional.cosine_similarity(torch.tensor(query_embedding).unsqueeze(0), torch.tensor(embeddings))
    top_indices = torch.topk(similarities, k=top_k).indices
    return [paragraphs[i] for i in top_indices]

# 6. وظيفة إنشاء الإجابة باستخدام Llama-3.2-3B-Instruct (تعديل لاستخدام max_new_tokens)
def generate_answer(query, context, tokenizer, model):
    prompt = f"Based on the following context, answer the question: Context: {context}\n\nQuestion: {query}\n\nAnswer:"
    inputs = tokenizer(prompt, return_tensors="pt")
    outputs = model.generate(**inputs, max_new_tokens=30, do_sample=True, top_k=40, top_p=0.9)
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response

# 7. الواجهة الرئيسية للدردشة
def chat_with_book(book_path):
    paragraphs = load_and_split_book(book_path)

    if not paragraphs:
        print("لا يمكن بدء الدردشة، الرجاء التأكد من وجود الكتاب.")
        return

    embeddings = generate_embeddings(paragraphs, instructor_model)

    while True:
        query = input("اطرح سؤالك (أو اكتب 'خروج' للخروج): ")
        if query.lower() == 'خروج':
            break

        relevant_paragraphs = search_relevant_paragraphs(query, embeddings, paragraphs, instructor_model)
        context = " ".join(relevant_paragraphs)

        answer = generate_answer(query, context, llama_tokenizer, llama_model)
        print("الإجابة:", answer)

# بدء الدردشة
if __name__ == "__main__":
    chat_with_book(book_path)

AttributeError: partially initialized module 'torch' has no attribute 'nn' (most likely due to a circular import)

In [None]:
!pip uninstall torch -y
!pip install torch torchvision torchaudio

Found existing installation: torch 2.5.1+cu121
Uninstalling torch-2.5.1+cu121:
  Successfully uninstalled torch-2.5.1+cu121
Collecting torch
  Downloading torch-2.5.1-cp310-cp310-manylinux1_x86_64.whl.metadata (28 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuf

In [None]:
# تثبيت وإعادة تثبيت PyTorch
!pip uninstall torch -y
!pip install torch torchvision torchaudio

# تثبيت المكتبات اللازمة
!pip install transformers sentence-transformers accelerate PyPDF2

استمر اكتر من نص ساعة

In [None]:


import torch
import torch.nn.functional as F  # استيراد F من torch.nn.functional
from transformers import AutoTokenizer, AutoModelForCausalLM
from sentence_transformers import SentenceTransformer
from pathlib import Path
import os
import PyPDF2

# التحقق من إصدار PyTorch
print("إصدار PyTorch:", torch.__version__)

# التحقق من توفر المعالج (CPU)
if not torch.cuda.is_available():
    print("سيتم استخدام المعالج (CPU) نظرًا لعدم توفر معالج الرسوميات (GPU).")
else:
    print("تم الكشف عن معالج الرسوميات (GPU)، ولكن سيتم استخدام المعالج (CPU) حسب طلبك.")

# تحديد مسار الكتاب (يمكنك تغييره)
book_path = '/content/The_Lightning_Thief_-_Percy_Jackson_1-10.pdf'  # استبدل هذا بالمسار الفعلي للكتاب PDF الخاص بك

# 1. معالجة الكتاب: استخراج النص وتقسيمه إلى فقرات (تعديل لملفات PDF)
def load_and_split_book(file_path):
    if not os.path.exists(file_path):
        print(f"لم يتم العثور على الملف {file_path}")
        return []
    try:
        with open(file_path, 'rb') as file:
            pdf_reader = PyPDF2.PdfReader(file)
            text = ""
            for page_num in range(len(pdf_reader.pages)):
                page = pdf_reader.pages[page_num]
                text += page.extract_text()
            paragraphs = text.split('\n\n')  # تقسيم النص إلى فقرات بناءً على فاصل سطرين فارغين
            return paragraphs
    except Exception as e:
         print(f"حدث خطأ أثناء معالجة ملف PDF: {e}")
         return []

# 2. تحميل النموذج instructor-large للتمثيل النصي
print("جاري تحميل نموذج instructor-large...")
instructor_model = SentenceTransformer('hkunlp/instructor-large', device='cpu')
print("تم تحميل نموذج instructor-large.")

# 3. إنشاء تمثيلات نصية (embeddings) للفقرات
def generate_embeddings(paragraphs, model):
    embeddings = model.encode(paragraphs, show_progress_bar=True)
    return embeddings

# 4. تحميل نموذج Llama-3.2-3B-Instruct للدردشة (نموذج أصغر)
print("جاري تحميل نموذج Llama-3.2-3B-Instruct...")
llama_tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-3B-Instruct")
llama_model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-3.2-3B-Instruct", device_map="auto", torch_dtype="auto", low_cpu_mem_usage=True)
print("تم تحميل نموذج Llama-3.2-3B-Instruct.")
print("تم تحميل نموذج Llama-3.2-3B-Instruct.")

# 5. وظيفة البحث عن الفقرات الأكثر صلة بالسؤال (تعديل لحل الخطأ)
def search_relevant_paragraphs(query, embeddings, paragraphs, model, top_k=5):
    num_paragraphs = len(paragraphs)
    if num_paragraphs == 0:
        return []  # إرجاع قائمة فارغة إذا لم يكن هناك فقرات
    if num_paragraphs < top_k:
        top_k = num_paragraphs  # تعديل قيمة k لتتناسب مع عدد الفقرات إذا كانت أقل منها

    query_embedding = model.encode(query)
    similarities = F.cosine_similarity(torch.tensor(query_embedding).unsqueeze(0), torch.tensor(embeddings)) # استخدم F بدلاً من torch.nn.functional
    top_indices = torch.topk(similarities, k=top_k).indices
    return [paragraphs[i] for i in top_indices]

# 6. وظيفة إنشاء الإجابة باستخدام Llama-3.2-3B-Instruct (تعديل لاستخدام max_new_tokens)
def generate_answer(query, context, tokenizer, model):
    prompt = f"Based on the following context, answer the question: Context: {context}\n\nQuestion: {query}\n\nAnswer:"
    inputs = tokenizer(prompt, return_tensors="pt")
    outputs = model.generate(**inputs, max_new_tokens=30, do_sample=True, top_k=40, top_p=0.90)
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response

# 7. الواجهة الرئيسية للدردشة
def chat_with_book(book_path):
    paragraphs = load_and_split_book(book_path)

    if not paragraphs:
        print("لا يمكن بدء الدردشة، الرجاء التأكد من وجود الكتاب.")
        return

    embeddings = generate_embeddings(paragraphs, instructor_model)

    while True:
        query = input("اطرح سؤالك (أو اكتب 'خروج' للخروج): ")
        if query.lower() == 'خروج':
            break

        relevant_paragraphs = search_relevant_paragraphs(query, embeddings, paragraphs, instructor_model)
        context = " ".join(relevant_paragraphs)

        answer = generate_answer(query, context, llama_tokenizer, llama_model)
        print("الإجابة:", answer)

# بدء الدردشة
if __name__ == "__main__":
    chat_with_book(book_path)

إصدار PyTorch: 2.5.1+cu124
سيتم استخدام المعالج (CPU) نظرًا لعدم توفر معالج الرسوميات (GPU).
جاري تحميل نموذج instructor-large...
تم تحميل نموذج instructor-large.
جاري تحميل نموذج Llama-3.2-3B-Instruct...


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

تم تحميل نموذج Llama-3.2-3B-Instruct.
تم تحميل نموذج Llama-3.2-3B-Instruct.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

اطرح سؤالك (أو اكتب 'خروج' للخروج): Who is the hero of the story?


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


aشغال مدة كبيرة

In [None]:
import torch
import torch.nn.functional as F
from transformers import AutoTokenizer, AutoModelForCausalLM
from sentence_transformers import SentenceTransformer
from pathlib import Path
import os
import PyPDF2

# التحقق من إصدار PyTorch
print("إصدار PyTorch:", torch.__version__)

# التحقق من توفر المعالج (CPU)
if not torch.cuda.is_available():
    print("سيتم استخدام المعالج (CPU) نظرًا لعدم توفر معالج الرسوميات (GPU).")
else:
    print("تم الكشف عن معالج الرسوميات (GPU)، ولكن سيتم استخدام المعالج (CPU) حسب طلبك.")

# تحديد مسار الكتاب (يمكنك تغييره)
book_path = '/content/The_Lightning_Thief_-_Percy_Jackson_1-10.pdf'  # استبدل هذا بالمسار الفعلي للكتاب PDF الخاص بك

# 1. معالجة الكتاب: استخراج النص وتقسيمه إلى فقرات (تعديل لملفات PDF)
def load_and_split_book(file_path):
    if not os.path.exists(file_path):
        print(f"لم يتم العثور على الملف {file_path}")
        return []
    try:
        with open(file_path, 'rb') as file:
            pdf_reader = PyPDF2.PdfReader(file)
            text = ""
            for page_num in range(len(pdf_reader.pages)):
                page = pdf_reader.pages[page_num]
                text += page.extract_text()
            paragraphs = text.split('\n\n')  # تقسيم النص إلى فقرات بناءً على فاصل سطرين فارغين
            return paragraphs
    except Exception as e:
         print(f"حدث خطأ أثناء معالجة ملف PDF: {e}")
         return []

# 2. تحميل النموذج instructor-large للتمثيل النصي
print("جاري تحميل نموذج instructor-large...")
instructor_model = SentenceTransformer('hkunlp/instructor-large', device='cpu')
print("تم تحميل نموذج instructor-large.")

# 3. إنشاء تمثيلات نصية (embeddings) للفقرات
def generate_embeddings(paragraphs, model):
    embeddings = model.encode(paragraphs, show_progress_bar=True)
    return embeddings

# 4. تحميل نموذج Llama-3.2-3B-Instruct للدردشة (نموذج أصغر)
print("جاري تحميل نموذج Llama-3.2-3B-Instruct...")
llama_tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-3B-Instruct")
llama_model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-3.2-3B-Instruct", device_map="auto", torch_dtype=torch.float16, low_cpu_mem_usage=True) # تحميل النموذج بدقة float16
print("تم تحميل نموذج Llama-3.2-3B-Instruct.")

# 5. وظيفة البحث عن الفقرات الأكثر صلة بالسؤال (تعديل لحل الخطأ)
def search_relevant_paragraphs(query, embeddings, paragraphs, model, top_k=3): # تقليل top_k هنا
    num_paragraphs = len(paragraphs)
    if num_paragraphs == 0:
        return []  # إرجاع قائمة فارغة إذا لم يكن هناك فقرات
    if num_paragraphs < top_k:
        top_k = num_paragraphs  # تعديل قيمة k لتتناسب مع عدد الفقرات إذا كانت أقل منها

    query_embedding = model.encode(query)
    similarities = F.cosine_similarity(torch.tensor(query_embedding).unsqueeze(0), torch.tensor(embeddings))
    top_indices = torch.topk(similarities, k=top_k).indices
    return [paragraphs[i] for i in top_indices]

# 6. وظيفة إنشاء الإجابة باستخدام Llama-3.2-3B-Instruct (تعديل لاستخدام max_new_tokens)
def generate_answer(query, context, tokenizer, model):
    prompt = f"Based on the following context, answer the question: Context: {context}\n\nQuestion: {query}\n\nAnswer:"
    inputs = tokenizer(prompt, return_tensors="pt")
    max_context_length = 512 # تحديد الطول الأقصى للسياق
    if len(inputs['input_ids'][0]) > max_context_length:
      inputs['input_ids'] = inputs['input_ids'][:, :max_context_length] # قطع السياق إذا تجاوز الحد الأقصى
      inputs['attention_mask'] = inputs['attention_mask'][:, :max_context_length]
    outputs = model.generate(**inputs, max_new_tokens=50, do_sample=False, top_k=30, top_p=0.85) # تعطيل do_sample, تقليل max_new_tokens , وتقليل top_k و top_p
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response

# 7. الواجهة الرئيسية للدردشة
def chat_with_book(book_path):
    paragraphs = load_and_split_book(book_path)

    if not paragraphs:
        print("لا يمكن بدء الدردشة، الرجاء التأكد من وجود الكتاب.")
        return

    embeddings = generate_embeddings(paragraphs, instructor_model)

    while True:
        query = input("اطرح سؤالك (أو اكتب 'خروج' للخروج): ")
        if query.lower() == 'خروج':
            break

        relevant_paragraphs = search_relevant_paragraphs(query, embeddings, paragraphs, instructor_model)
        context = " ".join(relevant_paragraphs)

        answer = generate_answer(query, context, llama_tokenizer, llama_model)
        print("الإجابة:", answer)

# بدء الدردشة
if __name__ == "__main__":
    chat_with_book(book_path)

إصدار PyTorch: 2.5.1+cu124
سيتم استخدام المعالج (CPU) نظرًا لعدم توفر معالج الرسوميات (GPU).
جاري تحميل نموذج instructor-large...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


تم تحميل نموذج instructor-large.
جاري تحميل نموذج Llama-3.2-3B-Instruct...


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

تم تحميل نموذج Llama-3.2-3B-Instruct.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

اطرح سؤالك (أو اكتب 'خروج' للخروج): Who is the hero of the story?


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


/usr/local/lib/python3.10/dist-packages/transformers/generation/configuration_utils.py:628: UserWarning: `do_sample` is set to `False`. However, `temperature` is set to `0.6` -- this flag is only used in sample-based generation modes. You should set `do_sample=True` or unset `temperature`.
  warnings.warn(
/usr/local/lib/python3.10/dist-packages/transformers/generation/configuration_utils.py:633: UserWarning: `do_sample` is set to `False`. However, `top_p` is set to `0.85` -- this flag is only used in sample-based generation modes. You should set `do_sample=True` or unset `top_p`.
  warnings.warn(
/usr/local/lib/python3.10/dist-packages/transformers/generation/configuration_utils.py:650: UserWarning: `do_sample` is set to `False`. However, `top_k` is set to `30` -- this flag is only used in sample-based generation modes. You should set `do_sample=True` or unset `top_k`.
  warnings.warn(
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.

In [None]:
!pip install InstructorEmbedding

Collecting InstructorEmbedding
  Downloading InstructorEmbedding-1.0.1-py2.py3-none-any.whl.metadata (20 kB)
Downloading InstructorEmbedding-1.0.1-py2.py3-none-any.whl (19 kB)
Installing collected packages: InstructorEmbedding
Successfully installed InstructorEmbedding-1.0.1


In [None]:
!pip install pymupdf

Collecting pymupdf
  Downloading pymupdf-1.25.1-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (3.4 kB)
Downloading pymupdf-1.25.1-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (20.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m20.0/20.0 MB[0m [31m37.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pymupdf
Successfully installed pymupdf-1.25.1


https://github.com/mjanputra/chatPDF

In [2]:
!pip install sentence_transformers PyPDF2

Collecting PyPDF2
  Downloading pypdf2-3.0.1-py3-none-any.whl.metadata (6.8 kB)
Downloading pypdf2-3.0.1-py3-none-any.whl (232 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m232.6/232.6 kB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: PyPDF2
Successfully installed PyPDF2-3.0.1


In [5]:
!huggingface-cli login




    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|

    To log in, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
Enter your token (input will not be visible): 
Add token as git credential? (Y/n) Y
Token is valid (permission: read).
The token `read` has been saved to /root/.cache/huggingface/stored_tokens
[1m[31mCannot authenticate through git-credential as no helper is defined on your machine.
You might have to re-authenticate w

شغال سريع على الفيجا

In [1]:
from transformers import AutoModelForCausalLM, AutoTokenizer
from sentence_transformers import SentenceTransformer
from PyPDF2 import PdfReader
import torch
import numpy as np

# Setup device (GPU or CPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Running on: {device}")

# Load embedding and language models
embedding_model = SentenceTransformer('hkunlp/instructor-large', device=device)
language_model_name = "meta-llama/Llama-3.2-3B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(language_model_name)
language_model = AutoModelForCausalLM.from_pretrained(
   language_model_name,
   device_map="auto",
   torch_dtype=torch.float16
).to(device)

# Clean text
def clean_text(text):
   text = text.replace('\n', ' ')
   text = ' '.join(text.split())
   return text

# Extract text from PDF
def extract_text_from_pdf(pdf_path):
   reader = PdfReader(pdf_path)
   text = ""
   for page in reader.pages:
       text += page.extract_text()
   return clean_text(text)

# Split text into chunks
def split_text(text, chunk_size=300):
   sentences = text.split('.')
   chunks = []
   current_chunk = ""
   for sentence in sentences:
       if len(current_chunk) + len(sentence) < chunk_size:
           current_chunk += sentence + "."
       else:
           chunks.append(current_chunk)
           current_chunk = sentence + "."
   if current_chunk:
       chunks.append(current_chunk)
   return chunks

# Create embeddings
def create_embeddings(chunks):
   return embedding_model.encode(chunks, convert_to_tensor=True)

# Search relevant chunks
def search_relevant_chunks(question, chunks, embeddings, top_k=3):
   question_embedding = embedding_model.encode(question, convert_to_tensor=True)
   similarities = torch.matmul(embeddings, question_embedding.T).cpu().numpy()
   top_indices = np.argsort(similarities, axis=0)[-top_k:][::-1]
   return [chunks[idx] for idx in top_indices.flatten()]

# Create prompt
def create_prompt(relevant_chunks, question):
   context = "\n".join(relevant_chunks)
   return f"""Use the following information from the book to answer the question. If the information is not in the provided text, say so clearly.

Text from book:
{context}

Question: {question}
Answer: """

# Chat function
def chat_with_books(pdf_path):
   print("Loading and analyzing book...")
   book_text = extract_text_from_pdf(pdf_path)
   chunks = split_text(book_text)
   print("Text split into chunks.")

   print("Creating embeddings...")
   embeddings = create_embeddings(chunks)
   print("Embeddings created!")

   print("Welcome! Type 'exit' to end chat.")
   while True:
       user_input = input("\nYou: ")
       if user_input.lower() == "exit":
           print("Goodbye!")
           break

       relevant_chunks = search_relevant_chunks(user_input, chunks, embeddings)
       prompt = create_prompt(relevant_chunks, user_input)

       inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1024).to(device)
       outputs = language_model.generate(
           **inputs,
           max_length=1024,
           temperature=0.7,
           top_p=0.9,
           pad_token_id=tokenizer.eos_token_id,
           do_sample=True
       )

       response = tokenizer.decode(outputs[:, inputs["input_ids"].shape[-1]:][0], skip_special_tokens=True)
       print(f"\nModel: {response}")

# Run program
if __name__ == "__main__":
   pdf_path = "/content/The_Lightning_Thief_-_Percy_Jackson_1-10.pdf"  # Replace with actual book path
   chat_with_books(pdf_path)

Running on: cuda


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Loading and analyzing book...
Text split into chunks.
Creating embeddings...
Embeddings created!
Welcome! Type 'exit' to end chat.

You: What is the story about?


  similarities = torch.matmul(embeddings, question_embedding.T).cpu().numpy()



Model:  The story is not provided in the text. The provided text only includes book information and some character descriptions. The actual story itself is not given. 

(Note: The text only provides a list of book titles and a brief description of two characters, but does not include the plot or content of the book.) 

Please let me know if I can help you with anything else!

You: What do you know about the story presented to you?

Model: 1 point
You know that the narrator is a student on a field trip. That is, the narrator is likely a student, probably in elementary school. The narrator is not sure if he is in trouble. The narrator has a bad history with field trips. There is a teacher, Mr. Brunner, who is presenting information about the stele. The narrator is able to recognize the picture on the stele. Mr. Brunner is pointing to the picture, and the narrator is trying to pay attention to what he is saying. The narrator is trying to listen to Mr. Brunner's explanation, but is distra

KeyboardInterrupt: Interrupted by user

شغال جيد جدا

In [1]:
from transformers import AutoModelForCausalLM, AutoTokenizer
from sentence_transformers import SentenceTransformer
from PyPDF2 import PdfReader
import torch
import numpy as np

# Setup device (GPU or CPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Running on: {device}")

# Load embedding and language models
embedding_model = SentenceTransformer('hkunlp/instructor-large', device=device)
language_model_name = "meta-llama/Llama-3.2-3B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(language_model_name)
language_model = AutoModelForCausalLM.from_pretrained(
   language_model_name,
   device_map="auto",
   torch_dtype=torch.float16
).to(device)

# Extract text from PDF with improved cleaning
def extract_text_from_pdf(pdf_path):
   reader = PdfReader(pdf_path)
   full_text = ""
   for page in reader.pages:
       text = page.extract_text()
       # Basic cleaning of extracted text
       text = text.replace('\n', ' ')
       text = text.replace('  ', ' ')  # Remove double spaces
       text = ' '.join(text.split())
       full_text += text + " "
   return full_text

# Improved text splitting function
def split_text(text, chunk_size=1000):
   words = text.split()
   chunks = []
   current_chunk = []
   current_length = 0

   for word in words:
       if current_length + len(word) > chunk_size:
           chunks.append(' '.join(current_chunk))
           current_chunk = [word]
           current_length = len(word)
       else:
           current_chunk.append(word)
           current_length += len(word) + 1  # +1 for space

   if current_chunk:
       chunks.append(' '.join(current_chunk))

   return chunks

# Create embeddings
def create_embeddings(chunks):
   return embedding_model.encode(chunks, convert_to_tensor=True)

# Improved search function with debugging
def search_relevant_chunks(question, chunks, embeddings, top_k=3):
   question_embedding = embedding_model.encode(question, convert_to_tensor=True)
   similarities = torch.matmul(embeddings, question_embedding.T).cpu().numpy()
   top_indices = np.argsort(similarities, axis=0)[-top_k:][::-1]
   selected_chunks = [chunks[idx] for idx in top_indices.flatten()]

   # Debug print
   print("\nRelevant excerpts found:")
   for i, chunk in enumerate(selected_chunks, 1):
       print(f"\nExcerpt {i}:\n{chunk[:200]}...")

   return selected_chunks

# Improved prompt creation
def create_prompt(relevant_chunks, question):
   context = "\n\n".join(relevant_chunks)
   return f"""Based on the following excerpt from 'The Lightning Thief', please answer the question. If the information is not directly stated in the excerpt, please say so.

Excerpt:
{context}

Question: {question}
Answer (based only on the excerpt above): """

# Improved chat function
def chat_with_books(pdf_path):
   print("Loading and analyzing book...")
   try:
       book_text = extract_text_from_pdf(pdf_path)
       print(f"Successfully extracted {len(book_text)} characters of text")

       chunks = split_text(book_text)
       print(f"Split into {len(chunks)} chunks")

       print("Creating embeddings...")
       embeddings = create_embeddings(chunks)
       print("Embeddings created successfully!")

       print("\nWelcome! Type 'exit' to end chat.")
       print("Type 'debug' to see the first chunk of text.")

       while True:
           user_input = input("\nYou: ").strip()

           if user_input.lower() == "exit":
               print("Goodbye!")
               break

           if user_input.lower() == "debug":
               print("\nFirst chunk of text:")
               print(chunks[0][:500])
               continue

           relevant_chunks = search_relevant_chunks(user_input, chunks, embeddings)
           prompt = create_prompt(relevant_chunks, user_input)

           inputs = tokenizer(
               prompt,
               return_tensors="pt",
               truncation=True,
               max_length=1024
           ).to(device)

           outputs = language_model.generate(
               **inputs,
               max_length=1024,
               temperature=0.1,
               top_p=0.9,
               do_sample=True,
               pad_token_id=tokenizer.eos_token_id,
               num_return_sequences=1
           )

           response = tokenizer.decode(
               outputs[:, inputs["input_ids"].shape[-1]:][0],
               skip_special_tokens=True
           )
           print(f"\nModel: {response}")

   except Exception as e:
       print(f"An error occurred: {str(e)}")
       import traceback
       print(traceback.format_exc())

# Run program
if __name__ == "__main__":
   pdf_path = "/content/The_Lightning_Thief_-_Percy_Jackson_1-10.pdf"  # Replace with actual book path
   chat_with_books(pdf_path)

Running on: cuda


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Loading and analyzing book...
Successfully extracted 9480 characters of text
Split into 10 chunks
Creating embeddings...
Embeddings created successfully!

Welcome! Type 'exit' to end chat.
Type 'debug' to see the first chunk of text.

You: What is going on in the story presented to you /content/The_Lightning_Thief_-_Percy_Jackson_1-10.pdf


  similarities = torch.matmul(embeddings, question_embedding.T).cpu().numpy()



Relevant excerpts found:

Excerpt 1:
believe that none of this ever happened. But if you recognize yourself in these pages—if you feel something stirring inside—stop reading immediately. You might be one of us. And once you know that, it...

Excerpt 2:
up, he tricked his dad, Kronos, into barfing up his brothers and sisters—" "Eeew!" said one of the girls behind me. "—and so there was this big fight between the gods and the Titans," I continued, "an...

Excerpt 3:
Up in Smoke 93 8 · We Capture a Flag 107 9 · I Am Offered a Quest 127 10 · I Ruin a Perfectly Good Bus 149 11 · We Visit the Garden Gnome Emporium 168 12 · We Get Advice from a Poodle 188 13 · I Plung...


You: Print me the full text of the fifth page /content/The_Lightning_Thief_-_Percy_Jackson_1-10.pdf

Relevant excerpts found:

Excerpt 1:
believe that none of this ever happened. But if you recognize yourself in these pages—if you feel something stirring inside—stop reading immediately. You might be one of us. And once you

KeyboardInterrupt: Interrupted by user