In [1]:
!pip install torch
!pip install git+https://github.com/huggingface/transformers
!pip install git+https://github.com/huggingface/accelerate
!pip install huggingface_hub
!pip install sentencepiece
!pip install bitsandbytes


!pip install haystack-ai duckduckgo-api-haystack transformers sentence-transformers datasets

Collecting git+https://github.com/huggingface/transformers
  Cloning https://github.com/huggingface/transformers to /tmp/pip-req-build-9020u_sg
  Running command git clone --filter=blob:none --quiet https://github.com/huggingface/transformers /tmp/pip-req-build-9020u_sg
  Resolved https://github.com/huggingface/transformers to commit 919220dab1e29f4d04eacd61a197a45a4fec2613
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: transformers
  Building wheel for transformers (pyproject.toml) ... [?25l[?25hdone
  Created wheel for transformers: filename=transformers-4.48.0.dev0-py3-none-any.whl size=10328720 sha256=b421c1a15ae9f01ae2f58cbf0556f39be3a736c56dd5999da7415406748a3482
  Stored in directory: /tmp/pip-ephem-wheel-cache-od0q86bm/wheels/c0/14/d6/6c9a5582d2ac191ec0a483be151a4495fe1eb2a6706ca49f1b
Successfully built transformer

In [3]:
!huggingface-cli login



    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|

    To log in, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
Enter your token (input will not be visible): 
Add token as git credential? (Y/n) Y
Token is valid (permission: read).
The token `read` has been saved to /root/.cache/huggingface/stored_tokens
[1m[31mCannot authenticate through git-credential as no helper is defined on your machine.
You might have to re-authenticate w

In [4]:
from datasets import load_dataset
from haystack import Document, Pipeline
from haystack.document_stores.in_memory import InMemoryDocumentStore
from haystack.components.embedders import SentenceTransformersDocumentEmbedder, SentenceTransformersTextEmbedder
from haystack.components.retrievers.in_memory import InMemoryEmbeddingRetriever
from haystack.components.builders import PromptBuilder
import torch
from haystack.components.generators import HuggingFaceLocalGenerator

# إعداد مخزن الوثائق وتحميل البيانات
document_store = InMemoryDocumentStore()
dataset = load_dataset("bilgeyucel/seven-wonders", split="train")
docs = [Document(content=doc["content"], meta=doc["meta"]) for doc in dataset]

# إعداد embedder للوثائق
doc_embedder = SentenceTransformersDocumentEmbedder(model="sentence-transformers/all-MiniLM-L6-v2")
doc_embedder.warm_up()

# إضافة embeddings للوثائق وحفظها في المخزن
docs_with_embeddings = doc_embedder.run(docs)
document_store.write_documents(docs_with_embeddings["documents"])

# إعداد نموذج التوليد
generator = HuggingFaceLocalGenerator(
    model="meta-llama/Llama-3.2-3B-Instruct",
    huggingface_pipeline_kwargs={
        "device_map": "auto",
        "torch_dtype": torch.bfloat16
    },
    generation_kwargs={"max_new_tokens": 20}  # زيادة عدد التوكنز للحصول على إجابات أطول
)
generator.warm_up()

# إعداد مكونات الـ Pipeline
text_embedder = SentenceTransformersTextEmbedder(model="sentence-transformers/all-MiniLM-L6-v2")
retriever = InMemoryEmbeddingRetriever(document_store, top_k=1)  # زيادة top_k لاسترجاع المزيد من الوثائق ذات الصلة

# تعديل قالب الـ prompt ليكون أكثر وضوحاً
prompt_template = """
<|begin_of_text|><|start_header_id|>user<|end_header_id|>

Please answer the following question using only the information provided in the documentation.
If the answer is not in the documentation, respond with 'Information not available in the database'.

Documents:
{% for document in documents %}
{{document.content}}
{% endfor %}

Question: {{query}}<|eot_id|>

<|start_header_id|>assistant<|end_header_id|>
"""

prompt_builder = PromptBuilder(template=prompt_template)

# إنشاء وتكوين الـ Pipeline
pipe = Pipeline()
pipe.add_component("text_embedder", text_embedder)
pipe.add_component("retriever", retriever)
pipe.add_component("prompt_builder", prompt_builder)
pipe.add_component("llm", generator)

# ربط مكونات الـ Pipeline
pipe.connect("text_embedder", "retriever")
pipe.connect("retriever", "prompt_builder.documents")
pipe.connect("prompt_builder", "llm")

# دالة للحصول على الإجابة
def get_answer(query):
    result = pipe.run({"text_embedder": {"text": query}, "prompt_builder": {"query": query}})
    return result["llm"]["replies"][0]

# مثال على الاستخدام
query = "In what year did an Arab force led by the Muslim general Muawiyah I invade Rhodes?"
answer = get_answer(query)
print(answer)

Batches:   0%|          | 0/5 [00:00<?, ?it/s]

config.json:   0%|          | 0.00/878 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/20.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/1.46G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/189 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/54.5k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/296 [00:00<?, ?B/s]

Device set to use cpu


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.




653.
