# **Qwen2.5-3B Persian AI Assistant**

In [3]:
# ===========================================================
# SECTION 1: INSTALLATION
# ===========================================================
print("🚀 Setting up Google Colab environment...")

!pip install -q --upgrade pip setuptools wheel
!pip install -q transformers==4.44.2
!pip install -q "unsloth[colab] @ git+https://github.com/unslothai/unsloth.git"
!pip install -q --no-deps trl peft accelerate bitsandbytes datasets xformers

import os, gc, torch
from datetime import datetime

os.environ["TOKENIZERS_PARALLELISM"] = "false"
torch.cuda.empty_cache()

if torch.cuda.is_available():
    print(f"✅ Colab GPU: {torch.cuda.get_device_name(0)}")
    print(f"   Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")

# ===========================================================
# SECTION 2: CONFIGURATION
# ===========================================================
HF_TOKEN = "Replace with your token"  # Replace with your token
MODEL_NAME = "Qwen/Qwen2.5-3B-Instruct"
DOMAINS = ["Artificial Intelligence", "Technology and Innovation"]

MAX_SEQ_LENGTH = 1024
OUTPUT_DIR = "/content/qwen2.5-3b-persian-ai-tech-final"

# ===========================================================
# SECTION 3: LOGIN & MODEL LOADING
# ===========================================================
print("\n🔐 Logging into HuggingFace...")

from huggingface_hub import login
login(token=HF_TOKEN, add_to_git_credential=False)

print("\n🤖 Loading 4-bit quantized model...")

from unsloth import FastLanguageModel

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="unsloth/Qwen2.5-3B-Instruct-bnb-4bit",
    max_seq_length=MAX_SEQ_LENGTH,
    dtype=None,
    load_in_4bit=True,
    device_map="auto",
)

print("✅ Model loaded successfully!")

# ===========================================================
# SECTION 4: LoRA CONFIGURATION
# ===========================================================
print("\n🎯 Applying LoRA...")

model = FastLanguageModel.get_peft_model(
    model,
    r=16,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    use_gradient_checkpointing="unsloth",
    random_state=3407,
)

trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"Trainable parameters: {trainable_params:,}")

# ===========================================================
# SECTION 5: LOAD DATASET
# ===========================================================
print("\n📊 Loading ParsBench dataset...")

from datasets import load_dataset, concatenate_datasets

datasets = []
for d in DOMAINS:
    print(f"  Loading: {d}")
    try:
        ds = load_dataset("ParsBench/PersianSyntheticQA", name=d, split="train[:2000]")
        datasets.append(ds)
        print(f"    Loaded {len(ds)} samples")
    except Exception as e:
        print(f"    Error: {e}")
        continue

if not datasets:
    raise ValueError("❌ Could not load any dataset!")

dataset = concatenate_datasets(datasets)
print(f"✅ Total samples: {len(dataset)}")

# ===========================================================
# SECTION 6: FORMAT DATA
# ===========================================================
def format_chat(examples):
    texts = []
    for msgs in examples["messages"]:
        text = "<|im_start|>system\nشما یک دستیار هوشمند فارسی هستید.<|im_end|>\n"
        for m in msgs:
            if m["role"] == "system":
                continue
            role = "user" if m["role"] == "user" else "assistant"
            text += f"<|im_start|>{role}\n{m['content']}<|im_end|>\n"
        text += tokenizer.eos_token
        texts.append(text)
    return {"text": texts}

print("Formatting dataset...")
dataset = dataset.map(format_chat, batched=True, remove_columns=dataset.column_names)

# Split
if len(dataset) > 100:
    split_dataset = dataset.train_test_split(test_size=0.1, seed=3407)
    train_dataset = split_dataset["train"]
    eval_dataset = split_dataset["test"]
    print(f"Train: {len(train_dataset)}, Eval: {len(eval_dataset)}")
else:
    train_dataset = dataset
    eval_dataset = dataset[:50]

# ===========================================================
# SECTION 7: TRAINING SETUP (FIXED PARAMETER NAMES)
# ===========================================================
print("\n⚙️ Setting up training...")

from trl import SFTTrainer
from transformers import TrainingArguments

torch.cuda.empty_cache()
gc.collect()

# CORRECTED TrainingArguments with proper parameter names
training_args = TrainingArguments(
    output_dir=OUTPUT_DIR,
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    gradient_accumulation_steps=8,
    warmup_steps=10,
    num_train_epochs=2,
    learning_rate=2e-4,
    fp16=True,
    logging_steps=10,
    eval_steps=50,
    eval_strategy="steps",
    save_strategy="steps",
    save_steps=100,
    save_total_limit=2,
    load_best_model_at_end=True,
    metric_for_best_model="eval_loss",
    greater_is_better=False,
    optim="paged_adamw_8bit",
    weight_decay=0.01,
    lr_scheduler_type="cosine",
    seed=3407,
    report_to="none",
    gradient_checkpointing=True,
    remove_unused_columns=True,
    dataloader_pin_memory=True,
)

# Initialize trainer
trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    dataset_text_field="text",
    max_seq_length=MAX_SEQ_LENGTH,
    packing=False,
)

# ===========================================================
# SECTION 8: TRAIN THE MODEL
# ===========================================================
print("\n🎓 Starting training...")
print("   This will take 1-2 hours on Colab T4")

trainer.train()

print("\n✅ Training completed!")

# ===========================================================
# SECTION 9: SAVE MODEL
# ===========================================================
print("\n💾 Saving model...")

model.save_pretrained(OUTPUT_DIR)
tokenizer.save_pretrained(OUTPUT_DIR)

print(f"Model saved to: {OUTPUT_DIR}")

# ===========================================================
# SECTION 10: TEST
# ===========================================================
print("\n🧪 Testing model...")

model = FastLanguageModel.for_inference(model)

def ask(question):
    prompt = "<|im_start|>system\nشما یک دستیار هوشمند فارسی هستید.<|im_end|>\n"
    prompt += f"<|im_start|>user\n{question}<|im_end|>\n"
    prompt += "<|im_start|>assistant\n"

    inputs = tokenizer(prompt, return_tensors="pt", truncation=True).to("cuda")

    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=200,
            temperature=0.7,
            do_sample=True,
            pad_token_id=tokenizer.pad_token_id,
            eos_token_id=tokenizer.eos_token_id,
        )

    return tokenizer.decode(outputs[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True)

print("\nTesting Persian responses:")
test_questions = ["هوش مصنوعی چیست؟", "تفاوت AI و ML چیست؟"]
for q in test_questions:
    try:
        answer = ask(q)
        print(f"\n❓ {q}")
        print(f"💬 {answer[:150]}...")
    except Exception as e:
        print(f"\n❓ {q}")
        print(f"❌ Error: {e}")

print("\n" + "="*60)
print("🎉 Persian AI Assistant training completed!")
print("="*60)

🚀 Setting up Google Colab environment...
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
unsloth-zoo 2025.12.4 requires transformers!=4.52.0,!=4.52.1,!=4.52.2,!=4.52.3,!=4.53.0,!=4.54.0,!=4.55.0,!=4.55.1,<=4.57.3,>=4.51.3, but you have transformers 4.44.2 which is incompatible.
trl 0.24.0 requires transformers>=4.56.1, but you have transformers 4.44.2 which is incompatible.[0m[31m
[0m  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
✅ Colab GPU: Tesla T4
   Memory: 15.8 GB

🔐 Logging into HuggingFace...

🤖 Loading 4-bit quantized model...
==((====))==  Unsloth 2025.12.5: Fast Qwen2 patching. Transformers: 4.57.3.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.9.0+cu126. CUDA: 7.5. 

Map:   0%|          | 0/4000 [00:00<?, ? examples/s]

Train: 3600, Eval: 400

⚙️ Setting up training...


Unsloth: Tokenizing ["text"] (num_proc=6):   0%|          | 0/3600 [00:00<?, ? examples/s]

Unsloth: Tokenizing ["text"] (num_proc=6):   0%|          | 0/400 [00:00<?, ? examples/s]

The model is already on multiple devices. Skipping the move to device specified in `args`.



🎓 Starting training...
   This will take 1-2 hours on Colab T4


==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 3,600 | Num Epochs = 2 | Total steps = 450
O^O/ \_/ \    Batch size per device = 2 | Gradient accumulation steps = 8
\        /    Data Parallel GPUs = 1 | Total batch size (2 x 8 x 1) = 16
 "-____-"     Trainable parameters = 29,933,568 of 3,115,872,256 (0.96% trained)


Unsloth: Will smartly offload gradients to save VRAM!


Step,Training Loss,Validation Loss
50,0.5782,0.547243
100,0.4596,0.464647
150,0.4295,0.423432
200,0.4038,0.391504
250,0.3119,0.37677
300,0.3158,0.360384
350,0.293,0.350449
400,0.2776,0.346534
450,0.2896,0.34553


Unsloth: Not an error, but Qwen2ForCausalLM does not accept `num_items_in_batch`.
Using gradient accumulation will be very slightly less accurate.
Read more on gradient accumulation issues here: https://unsloth.ai/blog/gradient



✅ Training completed!

💾 Saving model...
Model saved to: /content/qwen2.5-3b-persian-ai-tech-final

🧪 Testing model...

Testing Persian responses:

❓ هوش مصنوعی چیست؟
💬 هوش مصنوعی شاخه‌ای از علوم کامپیوتر است که به روش‌هایی مانند الگوریتم‌های یادگیری ماشین و شبکه‌های عصبی از حجم زیادی از داده‌ها برای یادگیری و تحلیل ا...

❓ تفاوت AI و ML چیست؟
💬 AI یا هوش مصنوعی کلیدی واژگان است که به سیستم‌ها اجازه می‌دهد تا با استفاده از الگوریتم‌های پیچیده، تصمیم‌گیری‌های خودکار انجام دهند. معمولاً AI شامل ...

🎉 Persian AI Assistant training completed!


# **Upload trained model to Hugging Face Hub**

In [6]:
print("🚀 Uploading model to Hugging Face Hub...")

# ===========================================================
# SECTION 1: SETUP & CONFIGURATION
# ===========================================================
import os
from huggingface_hub import HfApi, create_repo, login
from datetime import datetime

# Configuration
HF_TOKEN = "Replace with your token"  # Replace with your token
MODEL_PATH = "/content/qwen2.5-3b-persian-ai-tech-final"

# Repository name - CHANGE THIS!
REPO_NAME = "OmidSakaki/qwen2.5-3b-persian-ai-tech"  # Change "your-username"

# ===========================================================
# SECTION 2: LOGIN TO HUGGING FACE
# ===========================================================
print("🔐 Logging into Hugging Face...")
try:
    login(token=HF_TOKEN, add_to_git_credential=True)
    print("✅ Login successful!")
except Exception as e:
    print(f"❌ Login failed: {e}")
    print("Please check your HF_TOKEN")
    exit()

# ===========================================================
# SECTION 3: CREATE REPOSITORY
# ===========================================================
print(f"\n📦 Creating repository: {REPO_NAME}")
try:
    create_repo(
        repo_id=REPO_NAME,
        token=HF_TOKEN,
        private=True,
        repo_type="model",
        exist_ok=True,
    )
    print("✅ Repository created!")
except Exception as e:
    print(f"⚠️ Repository creation: {e}")

# ===========================================================
# SECTION 4: UPLOAD MODEL FILES
# ===========================================================
print("\n📤 Uploading model files...")
api = HfApi(token=HF_TOKEN)

if not os.path.exists(MODEL_PATH):
    print(f"❌ Model directory not found: {MODEL_PATH}")
    exit()

try:
    api.upload_folder(
        folder_path=MODEL_PATH,
        repo_id=REPO_NAME,
        repo_type="model",
        commit_message="Qwen2.5-3B Persian AI Assistant - Trained on Colab",
    )
    print("✅ Model uploaded!")

except Exception as e:
    print(f"❌ Upload failed: {e}")
    print("\nTrying file-by-file upload...")

    try:
        for root, dirs, files in os.walk(MODEL_PATH):
            for file in files:
                file_path = os.path.join(root, file)
                rel_path = os.path.relpath(file_path, MODEL_PATH)

                with open(file_path, 'rb') as f:
                    api.upload_file(
                        path_or_fileobj=f,
                        path_in_repo=rel_path,
                        repo_id=REPO_NAME,
                        repo_type="model",
                    )
                print(f"  Uploaded: {rel_path}")

        print("✅ All files uploaded!")
    except Exception as e2:
        print(f"❌ File upload failed: {e2}")


🚀 Uploading model to Hugging Face Hub...
🔐 Logging into Hugging Face...
✅ Login successful!

📦 Creating repository: OmidSakaki/qwen2.5-3b-persian-ai-tech
✅ Repository created!

📤 Uploading model files...


Processing Files (0 / 0)      : |          |  0.00B /  0.00B            

New Data Upload               : |          |  0.00B /  0.00B            

  .../checkpoint-450/scaler.pt: 100%|##########| 1.38kB / 1.38kB            

  ...eckpoint-450/scheduler.pt: 100%|##########| 1.47kB / 1.47kB            

  .../checkpoint-400/scaler.pt: 100%|##########| 1.38kB / 1.38kB            

  ...eckpoint-400/optimizer.pt:   0%|          | 11.8kB / 61.4MB            

  ...tech-final/tokenizer.json: 100%|##########| 11.4MB / 11.4MB            

  ...kpoint-450/tokenizer.json: 100%|##########| 11.4MB / 11.4MB            

  ...kpoint-400/tokenizer.json: 100%|##########| 11.4MB / 11.4MB            

  ...ckpoint-400/rng_state.pth:   1%|          |   126B / 14.6kB            

  ...eckpoint-400/scheduler.pt:   1%|          |  12.0B / 1.47kB            

  ...int-400/training_args.bin:   1%|          |  54.0B / 6.29kB            

✅ Model uploaded!


# **Simple Persian RAG System with Qwen2.5-3B Model**

In [8]:
# ===========================================================
# SECTION 1: INSTALLATION & SETUP
# ===========================================================
"""
This section installs only the essential packages with compatible versions
to avoid dependency conflicts. We use CPU-only PyTorch and specific versions
that work well together on Google Colab.
"""

print("📦 Installing essential packages...")

# Install minimal PyTorch (CPU version) - compatible with Colab
!pip install -q torch==2.5.0 torchvision==0.20.0 torchaudio==2.5.0 --index-url https://download.pytorch.org/whl/cpu

# Install transformers with specific version to avoid compatibility issues
!pip install -q transformers==4.35.0

print("✅ Packages installed successfully!")

# ===========================================================
# SECTION 2: IMPORT LIBRARIES
# ===========================================================
"""
Import necessary libraries after installation.
Keep imports minimal to reduce potential import errors.
"""

import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

print(f"\n🔧 Environment check:")
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")  # Should be False for CPU-only

# ===========================================================
# SECTION 3: MODEL CONFIGURATION
# ===========================================================
"""
Configuration for the Persian Qwen2.5-3B model.
This model is specifically trained for Persian language tasks.
"""

MODEL_NAME = "OmidSakaki/qwen2.5-3b-persian-ai-tech"

print(f"\n🤖 Model to load: {MODEL_NAME}")

# ===========================================================
# SECTION 4: PERSIAN KNOWLEDGE BASE
# ===========================================================
"""
Simple Persian knowledge base for the RAG system.
Contains factual information about AI/ML topics in Persian.
"""

PERSIAN_KNOWLEDGE = [
    "هوش مصنوعی (AI) شاخه‌ای از علوم کامپیوتر است که به ساخت ماشین‌های هوشمند می‌پردازد.",
    "یادگیری ماشین سه نوع اصلی دارد: ۱. نظارت شده ۲. بدون نظارت ۳. تقویتی",
    "پردازش زبان طبیعی (NLP) برای ترجمه ماشینی و چت‌بات‌ها استفاده می‌شود.",
    "RAG (Retrieval-Augmented Generation) اطلاعات را بازیابی کرده و سپس پاسخ تولید می‌کند.",
    "معماری ترنسفورمر بر پایه مکانیسم توجه (Attention Mechanism) کار می‌کند."
]

print(f"📚 Knowledge base created with {len(PERSIAN_KNOWLEDGE)} Persian documents")

# ===========================================================
# SECTION 5: SIMPLE TEXT RETRIEVAL FUNCTION
# ===========================================================
"""
Basic text retrieval without vector databases.
Uses simple keyword matching to find relevant documents.
"""

def retrieve_relevant_documents(question, documents, top_k=2):
    """
    Find relevant documents based on word overlap with the question.

    Args:
        question (str): User's question in Persian
        documents (list): List of Persian knowledge documents
        top_k (int): Number of documents to retrieve

    Returns:
        list: Relevant documents sorted by relevance
    """
    # Convert to lowercase and split into words
    question_words = set(question.lower().split())

    # Score each document based on word overlap
    scored_documents = []

    for doc in documents:
        doc_words = set(doc.lower().split())
        common_words = len(question_words.intersection(doc_words))
        scored_documents.append((common_words, doc))

    # Sort by relevance (highest score first)
    scored_documents.sort(reverse=True, key=lambda x: x[0])

    # Return only the content (without scores)
    relevant_docs = []
    for score, doc in scored_documents[:top_k]:
        if score > 0:  # Only include documents with some relevance
            relevant_docs.append(doc)

    return relevant_docs

# ===========================================================
# SECTION 6: LOAD QWEN2.5 PERSIAN MODEL
# ===========================================================
"""
Load the Qwen2.5-3B Persian model with error handling.
If model loading fails, the system will use a fallback mode.
"""

def load_model():
    """
    Attempt to load the Qwen2.5 Persian model with fallback options.

    Returns:
        tuple: (tokenizer, model, model_loaded_flag)
    """
    print("\n🤖 Loading Qwen2.5 Persian model...")

    try:
        # First attempt: Load tokenizer
        tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

        # Then load model with CPU settings
        model = AutoModelForCausalLM.from_pretrained(
            MODEL_NAME,
            torch_dtype=torch.float32,      # Use float32 for CPU
            device_map="cpu",               # Force CPU usage
            low_cpu_mem_usage=True          # Optimize for CPU memory
        )

        # Set padding token if not present
        if tokenizer.pad_token is None:
            tokenizer.pad_token = tokenizer.eos_token

        print("✅ Model loaded successfully on CPU!")
        return tokenizer, model, True

    except Exception as e:
        print(f"⚠️ Model loading failed: {str(e)[:100]}...")
        print("Using fallback mode for demonstration...")

        # Create mock tokenizer and model for fallback
        class MockTokenizer:
            def __init__(self):
                self.pad_token_id = 0

            def __call__(self, text, return_tensors="pt", **kwargs):
                return {
                    "input_ids": torch.tensor([[101, 102, 103, 104, 105]]),
                    "attention_mask": torch.tensor([[1, 1, 1, 1, 1]])
                }

            def decode(self, tokens, skip_special_tokens=True):
                return "This is a mock response. Model could not be loaded."

        class MockModel:
            def generate(self, **kwargs):
                return torch.tensor([[1, 2, 3, 4, 5]])

        return MockTokenizer(), MockModel(), False

# Load the model
tokenizer, model, model_loaded = load_model()

# ===========================================================
# SECTION 7: RAG ANSWER GENERATION
# ===========================================================
"""
Generate answers using retrieved context and the language model.
Combines information retrieval with text generation.
"""

def generate_rag_answer(question, use_model=True):
    """
    Generate answer using RAG approach.

    Args:
        question (str): User's question in Persian
        use_model (bool): Whether to use the real model or fallback

    Returns:
        str: Generated answer in Persian
    """
    # Step 1: Retrieve relevant documents
    relevant_docs = retrieve_relevant_documents(question, PERSIAN_KNOWLEDGE, top_k=2)

    # Step 2: Prepare context
    if relevant_docs:
        context = "\n".join([f"• {doc}" for doc in relevant_docs])
        context_header = "اطلاعات مرتبط:\n\n"
    else:
        context = "اطلاعات مرتبطی یافت نشد."
        context_header = ""

    # Step 3: Create prompt
    prompt = f"""<|im_start|>system
شما یک دستیار فارسی هوشمند هستید.

{context_header}{context}

لطفاً با استفاده از اطلاعات بالا به سوال پاسخ دهید.<|im_end|>

<|im_start|>user
{question}<|im_end|>

<|im_start|>assistant
"""

    # Step 4: Generate answer
    try:
        if use_model and model_loaded:
            # Tokenize prompt
            inputs = tokenizer(
                prompt,
                return_tensors="pt",
                truncation=True,
                max_length=1024
            )

            # Generate response
            with torch.no_grad():
                outputs = model.generate(
                    **inputs,
                    max_new_tokens=150,           # Limit response length
                    temperature=0.7,              # Creativity level
                    do_sample=True,               # Enable sampling
                    pad_token_id=tokenizer.pad_token_id,
                    repetition_penalty=1.1        # Reduce repetition
                )

            # Decode and clean response
            full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)

            # Extract assistant's response
            if "<|im_start|>assistant" in full_response:
                answer = full_response.split("<|im_start|>assistant")[-1].strip()
            else:
                answer = tokenizer.decode(
                    outputs[0][inputs["input_ids"].shape[1]:],
                    skip_special_tokens=True
                )

            # Remove any remaining special tokens
            answer = answer.split("<|im_end|>")[0].strip()

        else:
            # Fallback response
            if relevant_docs:
                answer = f"بر اساس اطلاعات: {relevant_docs[0][:80]}..."
            else:
                answer = "متأسفانه اطلاعات کافی برای پاسخ به این سوال ندارم."

    except Exception as e:
        print(f"⚠️ Generation error: {str(e)[:50]}")
        answer = "خطا در تولید پاسخ. لطفاً دوباره تلاش کنید."

    return answer.strip()

# ===========================================================
# SECTION 8: TEST THE RAG SYSTEM
# ===========================================================
"""
Test the RAG system with predefined Persian questions.
Demonstrates the system's capabilities.
"""

print("\n" + "="*60)
print("🧪 TESTING PERSIAN RAG SYSTEM")
print("="*60)

# Test questions in Persian
test_questions = [
    "هوش مصنوعی چیست؟",
    "یادگیری ماشین چه انواعی دارد؟",
    "پردازش زبان طبیعی چیست؟",
    "RAG چگونه کار می‌کند؟",
    "ترنسفورمر چیست؟"
]

# Run tests
results = []
for i, question in enumerate(test_questions, 1):
    print(f"\n{i}. ❓ Question: {question}")

    # Retrieve relevant documents
    relevant_docs = retrieve_relevant_documents(question, PERSIAN_KNOWLEDGE)
    print(f"   📚 Relevant documents found: {len(relevant_docs)}")

    # Generate answer
    answer = generate_rag_answer(question, use_model=model_loaded)

    # Display answer (truncate if too long)
    display_answer = answer[:120] + "..." if len(answer) > 120 else answer
    print(f"   💬 Answer: {display_answer}")

    # Store results
    results.append({
        "question": question,
        "answer": answer,
        "docs_found": len(relevant_docs),
        "model_used": model_loaded
    })

# ===========================================================
# SECTION 9: SYSTEM SUMMARY
# ===========================================================
"""
Display summary of the RAG system performance.
"""

print("\n" + "="*60)
print("📊 SYSTEM SUMMARY")
print("="*60)

successful_tests = len([r for r in results if r["answer"] and not r["answer"].startswith("خطا")])
total_questions = len(results)

print(f"✅ Tests completed: {successful_tests}/{total_questions}")
print(f"🤖 Real model used: {'Yes' if model_loaded else 'No (fallback mode)'}")
print(f"📚 Knowledge base size: {len(PERSIAN_KNOWLEDGE)} documents")
print(f"🔧 Environment: PyTorch {torch.__version__} (CPU only)")

# ===========================================================
# SECTION 10: USAGE EXAMPLE
# ===========================================================
"""
Example of how to use the RAG system with new questions.
"""

print("\n" + "="*60)
print("💡 HOW TO USE THE SYSTEM")
print("="*60)

print("""
To ask a new question in Persian:

# Method 1: Simple retrieval + generation
question = "سوال فارسی شما"
answer = generate_rag_answer(question, use_model=True)
print(f"سوال: {question}")
print(f"پاسخ: {answer}")

# Method 2: Just retrieval
relevant = retrieve_relevant_documents(question, PERSIAN_KNOWLEDGE)
print(f"مستندات مرتبط: {relevant}")
""")

# ===========================================================
# SECTION 11: FINAL TEST
# ===========================================================
"""
Final test with a new question to demonstrate functionality.
"""

print("\n" + "="*60)
print("🎯 FINAL DEMONSTRATION")
print("="*60)

# Test a new question
new_question = "تفاوت هوش مصنوعی و یادگیری ماشین چیست؟"
print(f"New question: {new_question}")

final_answer = generate_rag_answer(new_question, use_model=model_loaded)
print(f"\nAnswer: {final_answer}")

print("\n" + "="*60)
print("✅ PERSIAN RAG SYSTEM READY FOR USE!")
print("="*60)

📦 Installing essential packages...
✅ Packages installed successfully!

🔧 Environment check:
PyTorch version: 2.9.0+cpu
CUDA available: False

🤖 Model to load: OmidSakaki/qwen2.5-3b-persian-ai-tech
📚 Knowledge base created with 5 Persian documents

🤖 Loading Qwen2.5 Persian model...
⚠️ Model loading failed: No module named 'transformers.models.colqwen2'...
Using fallback mode for demonstration...

🧪 TESTING PERSIAN RAG SYSTEM

1. ❓ Question: هوش مصنوعی چیست؟
   📚 Relevant documents found: 1
   💬 Answer: بر اساس اطلاعات: هوش مصنوعی (AI) شاخه‌ای از علوم کامپیوتر است که به ساخت ماشین‌های هوشمند می‌پردا...

2. ❓ Question: یادگیری ماشین چه انواعی دارد؟
   📚 Relevant documents found: 1
   💬 Answer: بر اساس اطلاعات: یادگیری ماشین سه نوع اصلی دارد: ۱. نظارت شده ۲. بدون نظارت ۳. تقویتی...

3. ❓ Question: پردازش زبان طبیعی چیست؟
   📚 Relevant documents found: 1
   💬 Answer: بر اساس اطلاعات: پردازش زبان طبیعی (NLP) برای ترجمه ماشینی و چت‌بات‌ها استفاده می‌شود....

4. ❓ Question: RAG چگونه کار می‌ک