# **Qwen2.5-3B Persian AI Assistant**

In [None]:
# ===========================================================
# SECTION 1: INSTALLATION
# ===========================================================
print("üöÄ Setting up Google Colab environment...")

!pip install -q --upgrade pip setuptools wheel
!pip install -q transformers==4.44.2
!pip install -q "unsloth[colab] @ git+https://github.com/unslothai/unsloth.git"
!pip install -q --no-deps trl peft accelerate bitsandbytes datasets xformers

import os, gc, torch
from datetime import datetime

os.environ["TOKENIZERS_PARALLELISM"] = "false"
torch.cuda.empty_cache()

if torch.cuda.is_available():
    print(f"‚úÖ Colab GPU: {torch.cuda.get_device_name(0)}")
    print(f"   Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")

# ===========================================================
# SECTION 2: CONFIGURATION
# ===========================================================
HF_TOKEN = "Replace with your token"  # Replace with your token
MODEL_NAME = "Qwen/Qwen2.5-3B-Instruct"
DOMAINS = ["Artificial Intelligence", "Technology and Innovation"]

MAX_SEQ_LENGTH = 1024
OUTPUT_DIR = "/content/qwen2.5-3b-persian-ai-tech-final"

# ===========================================================
# SECTION 3: LOGIN & MODEL LOADING
# ===========================================================
print("\nüîê Logging into HuggingFace...")

from huggingface_hub import login
login(token=HF_TOKEN, add_to_git_credential=False)

print("\nü§ñ Loading 4-bit quantized model...")

from unsloth import FastLanguageModel

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="unsloth/Qwen2.5-3B-Instruct-bnb-4bit",
    max_seq_length=MAX_SEQ_LENGTH,
    dtype=None,
    load_in_4bit=True,
    device_map="auto",
)

print("‚úÖ Model loaded successfully!")

# ===========================================================
# SECTION 4: LoRA CONFIGURATION
# ===========================================================
print("\nüéØ Applying LoRA...")

model = FastLanguageModel.get_peft_model(
    model,
    r=16,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    use_gradient_checkpointing="unsloth",
    random_state=3407,
)

trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"Trainable parameters: {trainable_params:,}")

# ===========================================================
# SECTION 5: LOAD DATASET
# ===========================================================
print("\nüìä Loading ParsBench dataset...")

from datasets import load_dataset, concatenate_datasets

datasets = []
for d in DOMAINS:
    print(f"  Loading: {d}")
    try:
        ds = load_dataset("ParsBench/PersianSyntheticQA", name=d, split="train[:2000]")
        datasets.append(ds)
        print(f"    Loaded {len(ds)} samples")
    except Exception as e:
        print(f"    Error: {e}")
        continue

if not datasets:
    raise ValueError("‚ùå Could not load any dataset!")

dataset = concatenate_datasets(datasets)
print(f"‚úÖ Total samples: {len(dataset)}")

# ===========================================================
# SECTION 6: FORMAT DATA
# ===========================================================
def format_chat(examples):
    texts = []
    for msgs in examples["messages"]:
        text = "<|im_start|>system\nÿ¥ŸÖÿß €å⁄© ÿØÿ≥ÿ™€åÿßÿ± ŸáŸàÿ¥ŸÖŸÜÿØ ŸÅÿßÿ±ÿ≥€å Ÿáÿ≥ÿ™€åÿØ.<|im_end|>\n"
        for m in msgs:
            if m["role"] == "system":
                continue
            role = "user" if m["role"] == "user" else "assistant"
            text += f"<|im_start|>{role}\n{m['content']}<|im_end|>\n"
        text += tokenizer.eos_token
        texts.append(text)
    return {"text": texts}

print("Formatting dataset...")
dataset = dataset.map(format_chat, batched=True, remove_columns=dataset.column_names)

# Split
if len(dataset) > 100:
    split_dataset = dataset.train_test_split(test_size=0.1, seed=3407)
    train_dataset = split_dataset["train"]
    eval_dataset = split_dataset["test"]
    print(f"Train: {len(train_dataset)}, Eval: {len(eval_dataset)}")
else:
    train_dataset = dataset
    eval_dataset = dataset[:50]

# ===========================================================
# SECTION 7: TRAINING SETUP (FIXED PARAMETER NAMES)
# ===========================================================
print("\n‚öôÔ∏è Setting up training...")

from trl import SFTTrainer
from transformers import TrainingArguments

torch.cuda.empty_cache()
gc.collect()

# CORRECTED TrainingArguments with proper parameter names
training_args = TrainingArguments(
    output_dir=OUTPUT_DIR,
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    gradient_accumulation_steps=8,
    warmup_steps=10,
    num_train_epochs=2,
    learning_rate=2e-4,
    fp16=True,
    logging_steps=10,
    eval_steps=50,
    eval_strategy="steps",
    save_strategy="steps",
    save_steps=100,
    save_total_limit=2,
    load_best_model_at_end=True,
    metric_for_best_model="eval_loss",
    greater_is_better=False,
    optim="paged_adamw_8bit",
    weight_decay=0.01,
    lr_scheduler_type="cosine",
    seed=3407,
    report_to="none",
    gradient_checkpointing=True,
    remove_unused_columns=True,
    dataloader_pin_memory=True,
)

# Initialize trainer
trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    dataset_text_field="text",
    max_seq_length=MAX_SEQ_LENGTH,
    packing=False,
)

# ===========================================================
# SECTION 8: TRAIN THE MODEL
# ===========================================================
print("\nüéì Starting training...")
print("   This will take 1-2 hours on Colab T4")

trainer.train()

print("\n‚úÖ Training completed!")

# ===========================================================
# SECTION 9: SAVE MODEL
# ===========================================================
print("\nüíæ Saving model...")

model.save_pretrained(OUTPUT_DIR)
tokenizer.save_pretrained(OUTPUT_DIR)

print(f"Model saved to: {OUTPUT_DIR}")

# ===========================================================
# SECTION 10: TEST
# ===========================================================
print("\nüß™ Testing model...")

model = FastLanguageModel.for_inference(model)

def ask(question):
    prompt = "<|im_start|>system\nÿ¥ŸÖÿß €å⁄© ÿØÿ≥ÿ™€åÿßÿ± ŸáŸàÿ¥ŸÖŸÜÿØ ŸÅÿßÿ±ÿ≥€å Ÿáÿ≥ÿ™€åÿØ.<|im_end|>\n"
    prompt += f"<|im_start|>user\n{question}<|im_end|>\n"
    prompt += "<|im_start|>assistant\n"

    inputs = tokenizer(prompt, return_tensors="pt", truncation=True).to("cuda")

    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=200,
            temperature=0.7,
            do_sample=True,
            pad_token_id=tokenizer.pad_token_id,
            eos_token_id=tokenizer.eos_token_id,
        )

    return tokenizer.decode(outputs[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True)

print("\nTesting Persian responses:")
test_questions = ["ŸáŸàÿ¥ ŸÖÿµŸÜŸàÿπ€å ⁄Ü€åÿ≥ÿ™ÿü", "ÿ™ŸÅÿßŸàÿ™ AI Ÿà ML ⁄Ü€åÿ≥ÿ™ÿü"]
for q in test_questions:
    try:
        answer = ask(q)
        print(f"\n‚ùì {q}")
        print(f"üí¨ {answer[:150]}...")
    except Exception as e:
        print(f"\n‚ùì {q}")
        print(f"‚ùå Error: {e}")

print("\n" + "="*60)
print("üéâ Persian AI Assistant training completed!")
print("="*60)

# **Upload trained model to Hugging Face Hub**

In [None]:
print("üöÄ Uploading model to Hugging Face Hub...")

# ===========================================================
# SECTION 1: SETUP & CONFIGURATION
# ===========================================================
import os
from huggingface_hub import HfApi, create_repo, login
from datetime import datetime

# Configuration
HF_TOKEN = "Replace with your token"  # Replace with your token
MODEL_PATH = "/content/qwen2.5-3b-persian-ai-tech-final"

# Repository name - CHANGE THIS!
REPO_NAME = "OmidSakaki/qwen2.5-3b-persian-ai-tech"  # Change "your-username"

# ===========================================================
# SECTION 2: LOGIN TO HUGGING FACE
# ===========================================================
print("üîê Logging into Hugging Face...")
try:
    login(token=HF_TOKEN, add_to_git_credential=True)
    print("‚úÖ Login successful!")
except Exception as e:
    print(f"‚ùå Login failed: {e}")
    print("Please check your HF_TOKEN")
    exit()

# ===========================================================
# SECTION 3: CREATE REPOSITORY
# ===========================================================
print(f"\nüì¶ Creating repository: {REPO_NAME}")
try:
    create_repo(
        repo_id=REPO_NAME,
        token=HF_TOKEN,
        private=True,
        repo_type="model",
        exist_ok=True,
    )
    print("‚úÖ Repository created!")
except Exception as e:
    print(f"‚ö†Ô∏è Repository creation: {e}")

# ===========================================================
# SECTION 4: UPLOAD MODEL FILES
# ===========================================================
print("\nüì§ Uploading model files...")
api = HfApi(token=HF_TOKEN)

if not os.path.exists(MODEL_PATH):
    print(f"‚ùå Model directory not found: {MODEL_PATH}")
    exit()

try:
    api.upload_folder(
        folder_path=MODEL_PATH,
        repo_id=REPO_NAME,
        repo_type="model",
        commit_message="Qwen2.5-3B Persian AI Assistant - Trained on Colab",
    )
    print("‚úÖ Model uploaded!")

except Exception as e:
    print(f"‚ùå Upload failed: {e}")
    print("\nTrying file-by-file upload...")

    try:
        for root, dirs, files in os.walk(MODEL_PATH):
            for file in files:
                file_path = os.path.join(root, file)
                rel_path = os.path.relpath(file_path, MODEL_PATH)

                with open(file_path, 'rb') as f:
                    api.upload_file(
                        path_or_fileobj=f,
                        path_in_repo=rel_path,
                        repo_id=REPO_NAME,
                        repo_type="model",
                    )
                print(f"  Uploaded: {rel_path}")

        print("‚úÖ All files uploaded!")
    except Exception as e2:
        print(f"‚ùå File upload failed: {e2}")


# **Simple Persian RAG System with Qwen2.5-3B Model**

In [None]:
# ===========================================================
# SECTION 1: INSTALLATION & SETUP
# ===========================================================
"""
This section installs only the essential packages with compatible versions
to avoid dependency conflicts. We use CPU-only PyTorch and specific versions
that work well together on Google Colab.
"""

print("üì¶ Installing essential packages...")

# Install minimal PyTorch (CPU version) - compatible with Colab
!pip install -q torch==2.5.0 torchvision==0.20.0 torchaudio==2.5.0 --index-url https://download.pytorch.org/whl/cpu

# Install transformers with specific version to avoid compatibility issues
!pip install -q transformers==4.35.0

print("‚úÖ Packages installed successfully!")

# ===========================================================
# SECTION 2: IMPORT LIBRARIES
# ===========================================================
"""
Import necessary libraries after installation.
Keep imports minimal to reduce potential import errors.
"""

import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

print(f"\nüîß Environment check:")
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")  # Should be False for CPU-only

# ===========================================================
# SECTION 3: MODEL CONFIGURATION
# ===========================================================
"""
Configuration for the Persian Qwen2.5-3B model.
This model is specifically trained for Persian language tasks.
"""

MODEL_NAME = "OmidSakaki/qwen2.5-3b-persian-ai-tech"

print(f"\nü§ñ Model to load: {MODEL_NAME}")

# ===========================================================
# SECTION 4: PERSIAN KNOWLEDGE BASE
# ===========================================================
"""
Simple Persian knowledge base for the RAG system.
Contains factual information about AI/ML topics in Persian.
"""

PERSIAN_KNOWLEDGE = [
    "ŸáŸàÿ¥ ŸÖÿµŸÜŸàÿπ€å (AI) ÿ¥ÿßÿÆŸá‚Äåÿß€å ÿßÿ≤ ÿπŸÑŸàŸÖ ⁄©ÿßŸÖŸæ€åŸàÿ™ÿ± ÿßÿ≥ÿ™ ⁄©Ÿá ÿ®Ÿá ÿ≥ÿßÿÆÿ™ ŸÖÿßÿ¥€åŸÜ‚ÄåŸáÿß€å ŸáŸàÿ¥ŸÖŸÜÿØ ŸÖ€å‚ÄåŸæÿ±ÿØÿßÿ≤ÿØ.",
    "€åÿßÿØ⁄Ø€åÿ±€å ŸÖÿßÿ¥€åŸÜ ÿ≥Ÿá ŸÜŸàÿπ ÿßÿµŸÑ€å ÿØÿßÿ±ÿØ: €±. ŸÜÿ∏ÿßÿ±ÿ™ ÿ¥ÿØŸá €≤. ÿ®ÿØŸàŸÜ ŸÜÿ∏ÿßÿ±ÿ™ €≥. ÿ™ŸÇŸà€åÿ™€å",
    "Ÿæÿ±ÿØÿßÿ≤ÿ¥ ÿ≤ÿ®ÿßŸÜ ÿ∑ÿ®€åÿπ€å (NLP) ÿ®ÿ±ÿß€å ÿ™ÿ±ÿ¨ŸÖŸá ŸÖÿßÿ¥€åŸÜ€å Ÿà ⁄Üÿ™‚Äåÿ®ÿßÿ™‚ÄåŸáÿß ÿßÿ≥ÿ™ŸÅÿßÿØŸá ŸÖ€å‚Äåÿ¥ŸàÿØ.",
    "RAG (Retrieval-Augmented Generation) ÿßÿ∑ŸÑÿßÿπÿßÿ™ ÿ±ÿß ÿ®ÿßÿ≤€åÿßÿ®€å ⁄©ÿ±ÿØŸá Ÿà ÿ≥Ÿæÿ≥ Ÿæÿßÿ≥ÿÆ ÿ™ŸàŸÑ€åÿØ ŸÖ€å‚Äå⁄©ŸÜÿØ.",
    "ŸÖÿπŸÖÿßÿ±€å ÿ™ÿ±ŸÜÿ≥ŸÅŸàÿ±ŸÖÿ± ÿ®ÿ± Ÿæÿß€åŸá ŸÖ⁄©ÿßŸÜ€åÿ≥ŸÖ ÿ™Ÿàÿ¨Ÿá (Attention Mechanism) ⁄©ÿßÿ± ŸÖ€å‚Äå⁄©ŸÜÿØ."
]

print(f"üìö Knowledge base created with {len(PERSIAN_KNOWLEDGE)} Persian documents")

# ===========================================================
# SECTION 5: SIMPLE TEXT RETRIEVAL FUNCTION
# ===========================================================
"""
Basic text retrieval without vector databases.
Uses simple keyword matching to find relevant documents.
"""

def retrieve_relevant_documents(question, documents, top_k=2):
    """
    Find relevant documents based on word overlap with the question.

    Args:
        question (str): User's question in Persian
        documents (list): List of Persian knowledge documents
        top_k (int): Number of documents to retrieve

    Returns:
        list: Relevant documents sorted by relevance
    """
    # Convert to lowercase and split into words
    question_words = set(question.lower().split())

    # Score each document based on word overlap
    scored_documents = []

    for doc in documents:
        doc_words = set(doc.lower().split())
        common_words = len(question_words.intersection(doc_words))
        scored_documents.append((common_words, doc))

    # Sort by relevance (highest score first)
    scored_documents.sort(reverse=True, key=lambda x: x[0])

    # Return only the content (without scores)
    relevant_docs = []
    for score, doc in scored_documents[:top_k]:
        if score > 0:  # Only include documents with some relevance
            relevant_docs.append(doc)

    return relevant_docs

# ===========================================================
# SECTION 6: LOAD QWEN2.5 PERSIAN MODEL
# ===========================================================
"""
Load the Qwen2.5-3B Persian model with error handling.
If model loading fails, the system will use a fallback mode.
"""

def load_model():
    """
    Attempt to load the Qwen2.5 Persian model with fallback options.

    Returns:
        tuple: (tokenizer, model, model_loaded_flag)
    """
    print("\nü§ñ Loading Qwen2.5 Persian model...")

    try:
        # First attempt: Load tokenizer
        tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

        # Then load model with CPU settings
        model = AutoModelForCausalLM.from_pretrained(
            MODEL_NAME,
            torch_dtype=torch.float32,      # Use float32 for CPU
            device_map="cpu",               # Force CPU usage
            low_cpu_mem_usage=True          # Optimize for CPU memory
        )

        # Set padding token if not present
        if tokenizer.pad_token is None:
            tokenizer.pad_token = tokenizer.eos_token

        print("‚úÖ Model loaded successfully on CPU!")
        return tokenizer, model, True

    except Exception as e:
        print(f"‚ö†Ô∏è Model loading failed: {str(e)[:100]}...")
        print("Using fallback mode for demonstration...")

        # Create mock tokenizer and model for fallback
        class MockTokenizer:
            def __init__(self):
                self.pad_token_id = 0

            def __call__(self, text, return_tensors="pt", **kwargs):
                return {
                    "input_ids": torch.tensor([[101, 102, 103, 104, 105]]),
                    "attention_mask": torch.tensor([[1, 1, 1, 1, 1]])
                }

            def decode(self, tokens, skip_special_tokens=True):
                return "This is a mock response. Model could not be loaded."

        class MockModel:
            def generate(self, **kwargs):
                return torch.tensor([[1, 2, 3, 4, 5]])

        return MockTokenizer(), MockModel(), False

# Load the model
tokenizer, model, model_loaded = load_model()

# ===========================================================
# SECTION 7: RAG ANSWER GENERATION
# ===========================================================
"""
Generate answers using retrieved context and the language model.
Combines information retrieval with text generation.
"""

def generate_rag_answer(question, use_model=True):
    """
    Generate answer using RAG approach.

    Args:
        question (str): User's question in Persian
        use_model (bool): Whether to use the real model or fallback

    Returns:
        str: Generated answer in Persian
    """
    # Step 1: Retrieve relevant documents
    relevant_docs = retrieve_relevant_documents(question, PERSIAN_KNOWLEDGE, top_k=2)

    # Step 2: Prepare context
    if relevant_docs:
        context = "\n".join([f"‚Ä¢ {doc}" for doc in relevant_docs])
        context_header = "ÿßÿ∑ŸÑÿßÿπÿßÿ™ ŸÖÿ±ÿ™ÿ®ÿ∑:\n\n"
    else:
        context = "ÿßÿ∑ŸÑÿßÿπÿßÿ™ ŸÖÿ±ÿ™ÿ®ÿ∑€å €åÿßŸÅÿ™ ŸÜÿ¥ÿØ."
        context_header = ""

    # Step 3: Create prompt
    prompt = f"""<|im_start|>system
ÿ¥ŸÖÿß €å⁄© ÿØÿ≥ÿ™€åÿßÿ± ŸÅÿßÿ±ÿ≥€å ŸáŸàÿ¥ŸÖŸÜÿØ Ÿáÿ≥ÿ™€åÿØ.

{context_header}{context}

ŸÑÿ∑ŸÅÿßŸã ÿ®ÿß ÿßÿ≥ÿ™ŸÅÿßÿØŸá ÿßÿ≤ ÿßÿ∑ŸÑÿßÿπÿßÿ™ ÿ®ÿßŸÑÿß ÿ®Ÿá ÿ≥ŸàÿßŸÑ Ÿæÿßÿ≥ÿÆ ÿØŸá€åÿØ.<|im_end|>

<|im_start|>user
{question}<|im_end|>

<|im_start|>assistant
"""

    # Step 4: Generate answer
    try:
        if use_model and model_loaded:
            # Tokenize prompt
            inputs = tokenizer(
                prompt,
                return_tensors="pt",
                truncation=True,
                max_length=1024
            )

            # Generate response
            with torch.no_grad():
                outputs = model.generate(
                    **inputs,
                    max_new_tokens=150,           # Limit response length
                    temperature=0.7,              # Creativity level
                    do_sample=True,               # Enable sampling
                    pad_token_id=tokenizer.pad_token_id,
                    repetition_penalty=1.1        # Reduce repetition
                )

            # Decode and clean response
            full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)

            # Extract assistant's response
            if "<|im_start|>assistant" in full_response:
                answer = full_response.split("<|im_start|>assistant")[-1].strip()
            else:
                answer = tokenizer.decode(
                    outputs[0][inputs["input_ids"].shape[1]:],
                    skip_special_tokens=True
                )

            # Remove any remaining special tokens
            answer = answer.split("<|im_end|>")[0].strip()

        else:
            # Fallback response
            if relevant_docs:
                answer = f"ÿ®ÿ± ÿßÿ≥ÿßÿ≥ ÿßÿ∑ŸÑÿßÿπÿßÿ™: {relevant_docs[0][:80]}..."
            else:
                answer = "ŸÖÿ™ÿ£ÿ≥ŸÅÿßŸÜŸá ÿßÿ∑ŸÑÿßÿπÿßÿ™ ⁄©ÿßŸÅ€å ÿ®ÿ±ÿß€å Ÿæÿßÿ≥ÿÆ ÿ®Ÿá ÿß€åŸÜ ÿ≥ŸàÿßŸÑ ŸÜÿØÿßÿ±ŸÖ."

    except Exception as e:
        print(f"‚ö†Ô∏è Generation error: {str(e)[:50]}")
        answer = "ÿÆÿ∑ÿß ÿØÿ± ÿ™ŸàŸÑ€åÿØ Ÿæÿßÿ≥ÿÆ. ŸÑÿ∑ŸÅÿßŸã ÿØŸàÿ®ÿßÿ±Ÿá ÿ™ŸÑÿßÿ¥ ⁄©ŸÜ€åÿØ."

    return answer.strip()

# ===========================================================
# SECTION 8: TEST THE RAG SYSTEM
# ===========================================================
"""
Test the RAG system with predefined Persian questions.
Demonstrates the system's capabilities.
"""

print("\n" + "="*60)
print("üß™ TESTING PERSIAN RAG SYSTEM")
print("="*60)

# Test questions in Persian
test_questions = [
    "ŸáŸàÿ¥ ŸÖÿµŸÜŸàÿπ€å ⁄Ü€åÿ≥ÿ™ÿü",
    "€åÿßÿØ⁄Ø€åÿ±€å ŸÖÿßÿ¥€åŸÜ ⁄ÜŸá ÿßŸÜŸàÿßÿπ€å ÿØÿßÿ±ÿØÿü",
    "Ÿæÿ±ÿØÿßÿ≤ÿ¥ ÿ≤ÿ®ÿßŸÜ ÿ∑ÿ®€åÿπ€å ⁄Ü€åÿ≥ÿ™ÿü",
    "RAG ⁄Ü⁄ØŸàŸÜŸá ⁄©ÿßÿ± ŸÖ€å‚Äå⁄©ŸÜÿØÿü",
    "ÿ™ÿ±ŸÜÿ≥ŸÅŸàÿ±ŸÖÿ± ⁄Ü€åÿ≥ÿ™ÿü"
]

# Run tests
results = []
for i, question in enumerate(test_questions, 1):
    print(f"\n{i}. ‚ùì Question: {question}")

    # Retrieve relevant documents
    relevant_docs = retrieve_relevant_documents(question, PERSIAN_KNOWLEDGE)
    print(f"   üìö Relevant documents found: {len(relevant_docs)}")

    # Generate answer
    answer = generate_rag_answer(question, use_model=model_loaded)

    # Display answer (truncate if too long)
    display_answer = answer[:120] + "..." if len(answer) > 120 else answer
    print(f"   üí¨ Answer: {display_answer}")

    # Store results
    results.append({
        "question": question,
        "answer": answer,
        "docs_found": len(relevant_docs),
        "model_used": model_loaded
    })

# ===========================================================
# SECTION 9: SYSTEM SUMMARY
# ===========================================================
"""
Display summary of the RAG system performance.
"""

print("\n" + "="*60)
print("üìä SYSTEM SUMMARY")
print("="*60)

successful_tests = len([r for r in results if r["answer"] and not r["answer"].startswith("ÿÆÿ∑ÿß")])
total_questions = len(results)

print(f"‚úÖ Tests completed: {successful_tests}/{total_questions}")
print(f"ü§ñ Real model used: {'Yes' if model_loaded else 'No (fallback mode)'}")
print(f"üìö Knowledge base size: {len(PERSIAN_KNOWLEDGE)} documents")
print(f"üîß Environment: PyTorch {torch.__version__} (CPU only)")

# ===========================================================
# SECTION 10: USAGE EXAMPLE
# ===========================================================
"""
Example of how to use the RAG system with new questions.
"""

print("\n" + "="*60)
print("üí° HOW TO USE THE SYSTEM")
print("="*60)

print("""
To ask a new question in Persian:

# Method 1: Simple retrieval + generation
question = "ÿ≥ŸàÿßŸÑ ŸÅÿßÿ±ÿ≥€å ÿ¥ŸÖÿß"
answer = generate_rag_answer(question, use_model=True)
print(f"ÿ≥ŸàÿßŸÑ: {question}")
print(f"Ÿæÿßÿ≥ÿÆ: {answer}")

# Method 2: Just retrieval
relevant = retrieve_relevant_documents(question, PERSIAN_KNOWLEDGE)
print(f"ŸÖÿ≥ÿ™ŸÜÿØÿßÿ™ ŸÖÿ±ÿ™ÿ®ÿ∑: {relevant}")
""")

# ===========================================================
# SECTION 11: FINAL TEST
# ===========================================================
"""
Final test with a new question to demonstrate functionality.
"""

print("\n" + "="*60)
print("üéØ FINAL DEMONSTRATION")
print("="*60)

# Test a new question
new_question = "ÿ™ŸÅÿßŸàÿ™ ŸáŸàÿ¥ ŸÖÿµŸÜŸàÿπ€å Ÿà €åÿßÿØ⁄Ø€åÿ±€å ŸÖÿßÿ¥€åŸÜ ⁄Ü€åÿ≥ÿ™ÿü"
print(f"New question: {new_question}")

final_answer = generate_rag_answer(new_question, use_model=model_loaded)
print(f"\nAnswer: {final_answer}")

print("\n" + "="*60)
print("‚úÖ PERSIAN RAG SYSTEM READY FOR USE!")
print("="*60)