In [None]:
!pip install pymupdf
!pip install langchain-community
!pip install faiss-cpu

In [None]:
import faiss
import numpy as np
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyMuPDFLoader
from langchain import PromptTemplate, LLMChain
from gpt4all import GPT4All

# تحميل وثيقة PDF
loader = PyMuPDFLoader("/content/q.pdf")
data = loader.load()

# دمج الصفحات في نص واحد
text = " ".join([page.page_content for page in data])

# تقسيم النص إلى أجزاء متداخلة (تم التعديل)
text_splitter = RecursiveCharacterTextSplitter(chunk_size=400, chunk_overlap=100)
chunks = text_splitter.split_text(text)

# تهيئة نموذج التضمين
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# إنشاء فهرس Faiss
first_chunk_embedding = embeddings.embed_query(chunks[0])
d = np.array(first_chunk_embedding).shape[0]

index = faiss.IndexFlatL2(d)

# تحويل الأجزاء إلى تضمينات وإضافتها إلى الفهرس
chunk_vectors = [embeddings.embed_query(chunk) for chunk in chunks]
chunk_vectors_np = np.array(chunk_vectors).astype('float32')
index.add(chunk_vectors_np)

# حفظ الفهرس إلى ملف (اختياري)
faiss.write_index(index, "book_index.bin")

# تحميل نموذج GPT4All
model = GPT4All(model_name="/content/Sky-T1-32B-Preview-Q4_K_M.gguf")

# قالب لتوليد الإجابة (تم التعديل)
template = """
Given the following text from a book:

{context}

Using only the provided text, directly answer the question: {question}.
Do not use any outside information or prior knowledge.
"""

prompt = PromptTemplate(
    input_variables=["context", "question"],
    template=template,
)

# دالة لإنشاء الإجابة (تم التعديل)
def generate_answer(query, k_value=5, max_tokens_value=300):
    # تحويل النص المراد البحث عنه إلى متجه
    query_vector = embeddings.embed_query(query)

    # البحث عن المتجهات المشابهة في فهرس فايس
    D, I = index.search(np.array([query_vector]).astype('float32'), k=k_value)

    # استخراج السياق من الأجزاء المطابقة
    context = " ".join([chunks[i] for i in I[0]])

    # توليد الإجابة باستخدام GPT4All
    with model.chat_session():

      prompt_with_instruction = prompt.format(context=context, question=query)
      answer = model.generate(prompt_with_instruction, max_tokens=max_tokens_value)

    return answer

# مثال على استخدام الدالة (تم التعديل)
question = "What is the book about?"
answer = generate_answer(question)

print(f"Question: {question}")
print(f"Answer: {answer}")

In [None]:
!wget https://huggingface.co/bartowski/Sky-T1-32B-Preview-GGUF/resolve/main/Sky-T1-32B-Preview-Q4_K_M.gguf

In [None]:
!wget https://github.com/ggerganov/llama.cpp/releases/download/b4456/llama-b4456-bin-ubuntu-x64.zip

In [None]:
!unzip llama-b4456-bin-ubuntu-x64.zip

In [None]:
!/content/build/bin/llama-cli


In [None]:
!./llama-cli

In [None]:
%cd /content/build/bin
!./llama-cli -h

In [None]:
%cd /content

In [None]:
!git clone https://github.com/ggerganov/llama.cpp
%cd llama.cpp

In [None]:
!cmake -B build
!cmake --build build --config Release

In [None]:
!wget https://github.com/abetlen/llama-cpp-python/releases/download/v0.3.4-cu124/llama_cpp_python-0.3.4-cp310-cp310-linux_x86_64.whl

In [None]:
!pip install gpt4all

In [None]:
from gpt4all import GPT4All
model = GPT4All("/content/Sky-T1-32B-Preview-Q4_K_M.gguf") # downloads / loads a 4.66GB LLM
with model.chat_session():
    print(model.generate("what is 2+4=?", max_tokens=124))

In [None]:
python convert_hf_to_gguf.py --input_model path/to/your/model --output_model path/to/output/model.gguf


In [None]:
!./build/bin/llama-gguf-split -m /path/to/your/model.gguf -o /path/to/output/directory


In [None]:
!/content/llama.cpp/build/bin/llama-gguf-split -h

In [None]:
!/content/llama.cpp/build/bin/llama-gguf-split --split-max-size N(5G) /content/Sky-T1-32B-Preview-Q4_K_M.gguf /content

In [None]:
!/content/llama.cpp/build/bin/llama-gguf-split --split-max-size 5G /content/Sky-T1-32B-Preview-Q4_K_M.gguf /content

In [None]:
./build/bin/llama-cli -m PATH_TO_MODEL -p "Building a website can be done in 10 steps:" -ngl 32

In [None]:
!/content/llama.cpp/build/bin/llama-cli -m /content/Sky-T1-32B-Preview-Q4_K_M.gguf -p "Building a website can be done in 10 steps:"

In [None]:
Who is Napoleon Bonaparte?

In [None]:
!/content/llama.cpp/build/bin/llama-cli -m /content/Sky-T1-32B-Preview-Q4_K_M.gguf -p "Who is Napoleon Bonaparte?"

In [None]:
from huggingface_hub import login

login(token="YOUR_HUGGING_FACE_TOKEN")

In [None]:
from huggingface_hub import upload_file

upload_file(
    path_or_fileobj="/path/to/your/model_file",
    path_in_repo="model_file.bin",
    repo_id="your_username/your_repo_name",
)

In [None]:
rakmik/Sky-T1q4part

In [None]:
from huggingface_hub import login

login(token="XXXXXXXXXXXXXXXXXXXXXXXXXXX")

In [None]:
from huggingface_hub import upload_file

upload_file(
    path_or_fileobj="/content-00001-of-00004.gguf",
    path_in_repo="content-00001-of-00004.gguf",
    repo_id="rakmik/Sky-T1q4part",
)

In [None]:
from huggingface_hub import upload_file

upload_file(
    path_or_fileobj="/content-00002-of-00004.gguf",
    path_in_repo="content-00002-of-00004.gguf",
    repo_id="rakmik/Sky-T1q4part",
)

In [None]:
from huggingface_hub import upload_file

upload_file(
    path_or_fileobj="/content-00003-of-00004.gguf",
    path_in_repo="content-00003-of-00004.gguf",
    repo_id="rakmik/Sky-T1q4part",
)

In [None]:
from huggingface_hub import upload_file

upload_file(
    path_or_fileobj="/content-00004-of-00004.gguf",
    path_in_repo="content-00004-of-00004.gguf",
    repo_id="rakmik/Sky-T1q4part",
)

In [None]:
!/content/llama.cpp/build/bin/llama-cli -m /content-00001-of-00004.gguf -p "Who is Napoleon Bonaparte?"

In [None]:
llama-cli -m ./models/llama-7b/ggml-model-q4_0.gguf -c 512 -b 1024 -n 256 --keep 48

In [None]:
./bin/llama-cli -m "PATH_TO_MODEL" -p "Hi you how are you" -n 50 -e -ngl 33 -t 4

In [None]:
!/content/llama.cpp/build/bin/llama-cli -m /content-00001-of-00004.gguf

In [None]:
!/content/llama.cpp/build/bin/llama-cli -m "/content-00001-of-00004.gguf" -p "Hi you how are you" -n 50 -e -ngl 33 -t 4

In [None]:

import faiss
import numpy as np
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyMuPDFLoader
from langchain import PromptTemplate, LLMChain
from gpt4all import GPT4All

# تحميل وثيقة PDF
loader = PyMuPDFLoader("/content/The_Lightning_Thief_-_Percy_Jackson_1-10.pdf")
data = loader.load()

# دمج الصفحات في نص واحد
text = " ".join([page.page_content for page in data])

# تقسيم النص إلى أجزاء متداخلة (تم التعديل)
text_splitter = RecursiveCharacterTextSplitter(chunk_size=600, chunk_overlap=150)
chunks = text_splitter.split_text(text)

# تهيئة نموذج التضمين
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# إنشاء فهرس Faiss
first_chunk_embedding = embeddings.embed_query(chunks[0])
d = np.array(first_chunk_embedding).shape[0]

index = faiss.IndexFlatL2(d)

# تحويل الأجزاء إلى تضمينات وإضافتها إلى الفهرس
chunk_vectors = [embeddings.embed_query(chunk) for chunk in chunks]
chunk_vectors_np = np.array(chunk_vectors).astype('float32')
index.add(chunk_vectors_np)

# حفظ الفهرس إلى ملف (اختياري)
faiss.write_index(index, "book_index.bin")

# تحميل نموذج GPT4All
model = GPT4All(model_name="/content/Sky-T1-32B-Preview-Q4_K_M.gguf")

# قالب لتوليد الإجابة (تم التعديل)
prompt_template = """
<|start_header_id|>user<|end_header_id|>
You are an assistant for answering questions using provided context.
You are given the extracted parts of a long document and a question. Provide a conversational answer.
If you don't know the answer, just say "I do not know." Don't make up an answer.
Question: {question}
Context: {context}<|eot_id|><|start_header_id|>assistant<|end_header_id|>
"""

prompt = PromptTemplate(
    input_variables=["context", "question"],
    template=prompt_template,
)

# دالة لإنشاء الإجابة (تم التعديل)
def generate_answer(query, k_value=3, max_tokens_value=300): #تم التعديل هنا
    # تحويل النص المراد البحث عنه إلى متجه
    query_vector = embeddings.embed_query(query)

    # البحث عن المتجهات المشابهة في فهرس فايس
    D, I = index.search(np.array([query_vector]).astype('float32'), k=k_value)

    # استخراج السياق من الأجزاء المطابقة
    context = " ".join([chunks[i] for i in I[0]])

    # توليد الإجابة باستخدام GPT4All
    with model.chat_session():
        answer = model.generate(prompt.format(context=context, question=query), max_tokens=max_tokens_value)

    return answer

# مثال على استخدام الدالة (تم التعديل)
question = "who is Grover?"
answer = generate_answer(question)

print(f"Question: {question}")
print(f"Answer: {answer}")

In [None]:
!pip install pymupdf

In [None]:
!pip install langchain-community

In [None]:
!pip install faiss-cpu

In [None]:
!pip install pymupdf
!pip install langchain-community
!pip install faiss-cpu