# RAG System for Turkish OHS Legislation (Law 6331)
**Pipeline:**
1. Load PDF documents with LlamaIndex (with article metadata)
2. Split into articles/sections with metadata tracking
3. Create embeddings with **OpenAI text-embedding-3-large** (3072-dim, 8x more precise)
4. Upload to Pinecone with **BM25 Hybrid Search** (semantic + keyword)
5. Generate answers with **article citations** like "(6331 Sayƒ±lƒ± ƒ∞SG Kanunu, Madde 4)"

In [39]:
# Import required libraries
import warnings
warnings.filterwarnings('ignore')

# LlamaIndex components
from llama_index.core import Document, VectorStoreIndex, SimpleDirectoryReader
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core import PromptTemplate
from llama_index.embeddings.openai import OpenAIEmbedding 
from llama_index.vector_stores.pinecone import PineconeVectorStore
from llama_index.core import StorageContext, Settings
from llama_index.llms.openai import OpenAI as LlamaOpenAI

# Pinecone & BM25
from pinecone import Pinecone, ServerlessSpec
from pinecone_text.sparse import BM25Encoder

# Other libraries
from dotenv import load_dotenv
import os
import re

# Load API keys from .env file
load_dotenv()

PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")

In [40]:
# Initialize Pinecone
INDEX_NAME = 'isg-rag-openai-3072'  
DIMENSION = 3072  

pc = Pinecone(api_key=PINECONE_API_KEY)

#  RESTART KORUMASDI: A≈üaƒüƒ±daki kodlar her restart'ta index'i silip yeniden olu≈üturur
#  Index zaten varsa bu kodlarƒ± √áALI≈ûTIRMAYIN (hashtag'leri kaldƒ±rmayƒ±n)
#  Sadece ILK KURULUMDA veya TAMAMEN YENƒ∞DEN BA≈ûLARKEN hashtag'leri kaldƒ±rƒ±n

# Delete existing index if present (for clean start)
# if INDEX_NAME in pc.list_indexes().names():
#     pc.delete_index(INDEX_NAME)
#     print(f"‚úì Deleted existing index: {INDEX_NAME}")

# Create new index
# pc.create_index(
#     name=INDEX_NAME,
#     dimension=DIMENSION,
#     metric='cosine',
#     spec=ServerlessSpec(cloud='aws', region='us-east-1')
# )

print(f"‚úì Connected to Pinecone index: {INDEX_NAME}")
print(f"  - Dimension: {DIMENSION} (OpenAI text-embedding-3-large)")
print(f"  - Metric: cosine")
print(f"\n‚ö° RESTART SAFE: Index will NOT be deleted/recreated")

‚úì Connected to Pinecone index: isg-rag-openai-3072
  - Dimension: 3072 (OpenAI text-embedding-3-large)
  - Metric: cosine

‚ö° RESTART SAFE: Index will NOT be deleted/recreated


In [41]:
# Load and parse documents with metadata - STRUCTURAL ARTICLE-BASED READING
DATA_PATH = './data'

# Load raw documents
raw_documents = SimpleDirectoryReader(DATA_PATH).load_data()

# Parse documents to extract article metadata
article_pattern = re.compile(r'MADDE\s+(\d+)', re.IGNORECASE)

documents = []

for raw_doc in raw_documents:
    text = raw_doc.get_content()
    file_name = raw_doc.metadata.get('file_name', 'unknown')
    
    print(f"\n Processing: {file_name}")
    
    # Split by articles using regex
    article_sections = article_pattern.split(text)
    
    # First section is preamble (before first article)
    if len(article_sections) > 1:
        print(f"   ‚úì Found {(len(article_sections)-1)//2} articles via regex split")
        
        # Create documents for each article
        for i in range(1, len(article_sections), 2):
            if i+1 < len(article_sections):
                article_num = article_sections[i]
                article_text = article_sections[i+1]
                
                # Clean and validate
                article_text = article_text.strip()
                if len(article_text) > 50:  # Only keep substantial articles
                    
                    article_type = "general"  # Default
                    article_keywords = []   # Extra keywords
                    
                    # Analyze article title and content
                    text_lower = article_text.lower()
                    
                    # DEFINITION articles (definition, scope, purpose)
                    if any(kw in text_lower[:200] for kw in ['tanim', 'tanimlar', 'kapsam', 'amac', 'tanimlanmis']):
                        article_type = "definition"
                        article_keywords.append("definition article")
                    
                    # PENALTY/SANCTION articles
                    elif any(kw in text_lower for kw in ['ceza', 'para cezasi', 'hapis', 'yaptirim', 'idar√Æ para cezasi']):
                        article_type = "penalty"
                        article_keywords.append("penalty article")
                        article_keywords.append("sanction")
                    
                    # WORK STOPPAGE articles
                    elif any(kw in text_lower for kw in ['durdurulur', 'durdurulmasi', 'durdurma', 'faaliyetten men']):
                        article_type = "work_stoppage"
                        article_keywords.append("work stoppage authority")
                        article_keywords.append("prohibited from activity")
                        article_keywords.append("work is suspended")
                    
                    # RISK ASSESSMENT related articles
                    elif 'risk degerlendirmesi' in text_lower or 'risk analizi' in text_lower:
                        if article_type == "general":  # Only if not yet tagged
                            article_type = "risk_assessment"
                            article_keywords.append("risk assessment procedure")
                    
                    # OBLIGATIONS
                    elif any(kw in text_lower for kw in ['yukumlu', 'yuk√ºml√ºl√ºk', 'sorumlu']):
                        if article_type == "general":
                            article_type = "obligation"
                            article_keywords.append("obligations")
                    
                    # RIGHTS and GUARANTEES
                    elif any(kw in text_lower for kw in ['hak ', 'hakki', 'calisma hakki', 'kacinma hakki']):
                        if article_type == "general":
                            article_type = "right"
                            article_keywords.append("employee rights")
                    
                    # Append keywords to text (Keyword Boosting)
                    keyword_suffix = ""
                    if article_keywords:
                        keyword_suffix = f"\n\n[Article Category: {article_type.upper()} - {', '.join(article_keywords)}]"
                    
                    # Save each article as separate Document
                    # Text includes "MADDE X" title + metadata has article number + TYPE
                    doc = Document(
                        text=f"MADDE {article_num}\n{article_text}{keyword_suffix}",  # Title + Content + Category Keywords
                        metadata={
                            "article_num": article_num,     # Article number (IMPORTANT!)
                            "law_id": "6331",               # Law ID (No Turkish!)
                            "article_type": article_type    # NEW: Article type (definition, penalty, work_stoppage, etc.)
                        },
                        excluded_llm_metadata_keys=[],  # Send all metadata to LLM
                        excluded_embed_metadata_keys=[]  # Include all metadata in embedding
                    )
                    documents.append(doc)
                    type_emoji = "-" if article_type == "definition" else "‚öñÔ∏è" if article_type == "penalty" else "-" if article_type == "work_stoppage" else "-"
                    print(f"      {type_emoji} Article {article_num}: {len(article_text)} characters [{article_type.upper()}]")
    else:
        # If no articles found, treat as single document
        print(f"No articles found via regex - storing as single document")
        doc = Document(
            text=text,
            metadata={
                "article_num": "0",
                "law_id": "6331"
            }
        )
        documents.append(doc)

print("\n" + "=" * 80)
print(f"TOTAL: {len(documents)} articles successfully parsed")
print("=" * 80)

print(f"\n Sample Metadata (First Article):")
if documents:
    first_doc = documents[0]
    print(f"   Law ID: {first_doc.metadata['law_id']}")
    print(f"   Article Number: {first_doc.metadata['article_num']}")
    print(f"   Text Preview: {first_doc.text[:100]}...")

print("\n IMPORTANT: Each article = separate Document = Each chunk carries its article number")
print("‚úì Article number won't be lost during chunking (fixed via metadata)")
print("‚úì NO TURKISH in metadata - Only numbers (Pinecone Latin-1 encoding issue)")


 Processing: 6331-sayili-is-sagligi-ve-guvenligi-kanunu.pdf
   ‚úì Found 3 articles via regex split
      - Article 1: 246 characters [DEFINITION]
      - Article 2: 924 characters [GENERAL]
      - Article 3: 647 characters [GENERAL]

 Processing: 6331-sayili-is-sagligi-ve-guvenligi-kanunu.pdf
No articles found via regex - storing as single document

 Processing: 6331-sayili-is-sagligi-ve-guvenligi-kanunu.pdf
   ‚úì Found 2 articles via regex split
      - Article 4: 1138 characters [OBLIGATION]
      - Article 5: 215 characters [GENERAL]

 Processing: 6331-sayili-is-sagligi-ve-guvenligi-kanunu.pdf
   ‚úì Found 2 articles via regex split
      - Article 6: 2579 characters [GENERAL]
      - Article 7: 825 characters [GENERAL]

 Processing: 6331-sayili-is-sagligi-ve-guvenligi-kanunu.pdf
   ‚úì Found 1 articles via regex split
      - Article 8: 1835 characters [WORK_STOPPAGE]

 Processing: 6331-sayili-is-sagligi-ve-guvenligi-kanunu.pdf
   ‚úì Found 1 articles via regex split
      - Ar

In [42]:
# Configure LlamaIndex Settings

# OpenAI text-embedding-3-large (API-based, no local download)
Settings.embed_model = OpenAIEmbedding(
    api_key=OPENAI_API_KEY,
    model="text-embedding-3-large",  
    dimensions=3072  
)

# Setup text splitter for chunking
Settings.text_splitter = SentenceSplitter(
    chunk_size=1024,     
    chunk_overlap=200  
)

# Connect to Pinecone index
index_connection = pc.Index(INDEX_NAME)

# Create Pinecone vector store
vector_store = PineconeVectorStore(
    pinecone_index=index_connection,
    remove_text_from_metadata=True  
)

# Create storage context with local doc store for text storage
storage_context = StorageContext.from_defaults(
    vector_store=vector_store
)


In [43]:
# Create index from documents (uploads to Pinecone)

# ‚ö†Ô∏è RESTART KORUMASI: A≈üaƒüƒ±daki kod t√ºm d√∂k√ºmanlarƒ± Pinecone'a yeniden y√ºkler
# ‚ö†Ô∏è D√∂k√ºmanlar zaten y√ºkl√ºyse bu kodu √áALI≈ûTIRMAYIN (hashtag'i kaldƒ±rmayƒ±n)
# ‚ö†Ô∏è Sadece ILK KURULUMDA veya YENƒ∞ D√ñK√úMAN EKLEDƒ∞ƒûƒ∞Nƒ∞ZDE hashtag'i kaldƒ±rƒ±n

# index = VectorStoreIndex.from_documents(
#     documents,
#     storage_context=storage_context,
#     show_progress=True
# )

# Alternatif: Mevcut index'e baƒülan (yeniden y√ºkleme yapmadan)
index_connection = pc.Index(INDEX_NAME)
vector_store = PineconeVectorStore(pinecone_index=index_connection)
index = VectorStoreIndex.from_vector_store(vector_store)

print(f"‚úì Connected to existing Pinecone index (no re-upload)")
print(f"‚úì Total documents already in index: {len(documents)}")
print(f"\n‚ö° RESTART SAFE: Documents will NOT be re-uploaded")

‚úì Connected to existing Pinecone index (no re-upload)
‚úì Total documents already in index: 53

‚ö° RESTART SAFE: Documents will NOT be re-uploaded


In [44]:
# Setup LlamaIndex LLM Integration - if you have gemini api, you are free to use gemini model
from llama_index.llms.openai import OpenAI as LlamaOpenAI

# Try to import Gemini support (optional)
try:
    from llama_index.llms.gemini import Gemini as LlamaGemini
    gemini_available = True
except ImportError:
    print("Gemini support not installed. Install with: pip install llama-index-llms-gemini")
    gemini_available = False

print("Configuring LlamaIndex LLM providers...\n")

# Configure OpenAI LLM for LlamaIndex (always available)
openai_llm = LlamaOpenAI(
    api_key=OPENAI_API_KEY,
    model="gpt-4o-mini",
    temperature=0.1  # Low temperature = Less hallucination, more consistent answers (critical for OHS)
)

# Configure Gemini LLM if available
if gemini_available:
    gemini_llm = LlamaGemini(
        api_key=GOOGLE_API_KEY,
        model_name="models/gemini-2.0-flash-exp",
        temperature=0.15
    )
else:
    gemini_llm = None

# Set default LLM (can be changed later)
Settings.llm = openai_llm

print("‚úì LlamaIndex OpenAI LLM configured (gpt-4o-mini)")
if gemini_available:
    print("‚úì LlamaIndex Gemini LLM configured (gemini-2.0-flash-exp)")
else:
    print("‚úó Gemini LLM not available (package not installed)")
print("‚úì Default LLM: OpenAI")
print("\nNow LlamaIndex query engine will use these LLMs automatically!")

Gemini support not installed. Install with: pip install llama-index-llms-gemini
Configuring LlamaIndex LLM providers...

‚úì LlamaIndex OpenAI LLM configured (gpt-4o-mini)
‚úó Gemini LLM not available (package not installed)
‚úì Default LLM: OpenAI

Now LlamaIndex query engine will use these LLMs automatically!


In [45]:
# Define custom prompt template for article citations
from llama_index.core.prompts import PromptTemplate

custom_prompt_str = """
Sen, geni≈ü kapsamlƒ± ƒ∞SG Mevzuatƒ± (Kanun, Y√∂netmelik, T√ºz√ºk) uzmanƒ±sƒ±n.

G√ñREVƒ∞N:
Kullanƒ±cƒ±nƒ±n sorusunu, saƒülanan baƒülamdaki (context) bilgilere dayanarak cevaplamaktƒ±r.

KRƒ∞Tƒ∞K BAƒûLAM VE ATIF KURALI:
√ñn√ºndeki metinlerin ba≈üƒ±nda `[BELGE: ...]` bilgisi yazar. Cevabƒ± verirken bilginin hangi belgeden geldiƒüini MUTLAKA belirt.
- Yanlƒ±≈ü: (Madde 5)
- Doƒüru: (6331 Sayƒ±lƒ± Kanun, Madde 5) veya (Yapƒ± ƒ∞≈üleri Y√∂netmeliƒüi, Madde 5)

KAVRAM √áEVƒ∞Rƒ∞Sƒ∞ (Halk Dilinden Hukuk Diline):
- "Patron", "≈ûef" -> **ƒ∞≈ûVEREN** (Y√ºk√ºml√ºl√ºkler: Madde 4)
- "D√ºkkan", "≈ûantiye" -> **ƒ∞≈ûYERƒ∞**
- "Kovulma", "Ceza" -> **ƒ∞DARƒ∞ YAPTIRIM / ƒ∞≈û DURDURMA** (Madde 25, 26)

CEVAP STRATEJƒ∞Sƒ∞:
1. **Tespit:** Sorunun cevabƒ± hangi belgede? (Kanun mu, Y√∂netmelik mi?)
2. **Hiyerar≈üi:** Eƒüer Kanun ve Y√∂netmelik aynƒ± ≈üeyi s√∂yl√ºyorsa, Kanunu referans g√∂ster.
3. **Kesinlik:** Metinde yoksa "Verilen belgelerde bu bilgi yok" de.

Context (Mevzuat Par√ßalarƒ±):
---------------------
{context_str}
---------------------

Soru: {query_str}

Cevap (Her ifadeden sonra madde numarasƒ±nƒ± parantez i√ßinde yaz, YAPTIRIM sorusuysa YAPTIRIM maddelerini kaynak g√∂ster):
"""

text_qa_template = PromptTemplate(custom_prompt_str)


In [46]:
def query_with_citations(question, ai_provider="openai", top_k=10, alpha=0.1, show_debug=False):
    """
    Query the RAG system using BM25 HYBRID SEARCH with article citations.
    
    Args:
        question: The question to ask (Turkish)
        ai_provider: "openai" or "gemini"
        top_k: Number of relevant chunks to retrieve (default: 10)
        alpha: Balance between semantic and keyword search (0.0 = pure keyword, 1.0 = pure semantic)
        show_debug: If True, shows detailed debug info (default: False - only show summary)
    
    Returns:
        Answer with article citations in format: (Law Name, Article Number)
    """
    
    # Switch LLM based on provider
    if ai_provider == "openai":
        current_llm = openai_llm
    elif ai_provider == "gemini":
        current_llm = gemini_llm
    else:
        raise ValueError("ai_provider must be 'openai' or 'gemini'")
    
    # Step 1: Generate query vectors (dense + sparse) - SILENT
    dense_query = embed_model.get_text_embedding(question)
    sparse_query = bm25_encoder.encode_queries([question])[0]
    
    # Step 2: Hybrid search in Pinecone - SILENT
    results = hybrid_index.query(
        vector=dense_query,
        sparse_vector=sparse_query,
        top_k=top_k,
        include_metadata=True,
        alpha=alpha
    )
    
    # Step 3: Build context from retrieved articles
    context_parts = []
    sources = []
    law_names_map = {
        "6331": "6331 Sayƒ±lƒ± ƒ∞≈ü Saƒülƒ±ƒüƒ± ve G√ºvenliƒüi Kanunu"
    }
    
    for idx, match in enumerate(results.matches, 1):
        metadata = match.metadata
        law_id = metadata.get('law_id', '6331')
        article_num = metadata.get('article_num', '?')
        article_type = metadata.get('article_type', 'general')
        text = metadata.get('text', '')
        score = match.score
        
        # Reconstruct full Turkish names
        law_name = law_names_map.get(law_id, f"Law {law_id}")
        article = f"Madde {article_num}"
        
        # Article type emoji
        type_emoji = "üìå" if article_type == "definition" else "‚öñÔ∏è" if article_type == "penalty" else "üõë" if article_type == "work_stoppage" else "üìÑ"
        
        sources.append({
            'law': law_name,
            'article': article,
            'score': score,
            'type': article_type,
            'emoji': type_emoji
        })
        
        # Add to context with metadata
        context_parts.append(f"[{law_name}, {article}, Type: {article_type}]\n{text}\n")
    
    context_str = "\n".join(context_parts)
    
    # Step 4: Generate answer with LLM - SILENT
    prompt = custom_prompt_str.format(
        context_str=context_str,
        query_str=question
    )
    
    response = current_llm.complete(prompt)
    answer = response.text
    
    # ============================================================================
    # CLEAN OUTPUT - Only show what matters
    # ============================================================================
    
    print(f"\n{'='*80}")
    print(f"üìù SORU:")
    print(f"{'='*80}")
    print(question)
    
    print(f"\n{'='*80}")
    print(f"üí° CEVAP:")
    print(f"{'='*80}")
    print(answer)
    
    print(f"\n{'='*80}")
    print(f"üìö KAYNAKLAR:")
    print(f"{'='*80}")
    for i, source in enumerate(sources, 1):
        print(f"{i}. {source['emoji']} {source['law']}, {source['article']} [{source['type'].upper()}]")
    print(f"{'='*80}\n")
    
    return answer

print("‚úì Query function updated: query_with_citations()")
print("‚úì Clean output mode: Shows only Question ‚Üí Answer ‚Üí Sources")
print("‚úì No debug clutter!")
print("‚úì Default alpha=0.4 (best performing configuration)")

‚úì Query function updated: query_with_citations()
‚úì Clean output mode: Shows only Question ‚Üí Answer ‚Üí Sources
‚úì No debug clutter!
‚úì Default alpha=0.4 (best performing configuration)


In [47]:
# Step 1: Install pinecone-text for BM25 encoding

import sys
import subprocess

# Install pinecone-text
result = subprocess.run(
    [sys.executable, "-m", "pip", "install", "-q", "pinecone-text"],
    capture_output=True,
    text=True
)

if result.returncode == 0:
    print(" Successfully installed pinecone-text")
else:
    print(f" Installation warning: {result.stderr}")
    
print("\n‚úì Ready to implement BM25 Hybrid Search!")
print("=" * 80)

 Successfully installed pinecone-text

‚úì Ready to implement BM25 Hybrid Search!


In [48]:
# Step 2: Hybrid Index - dotproduct metric (required for BM25 hybrid search)

HYBRID_INDEX_NAME = 'isg-hybrid-openai-3072' 
DIMENSION = 3072  

# ‚ö†Ô∏è RESTART KORUMASI: A≈üaƒüƒ±daki kodlar her restart'ta hybrid index'i silip yeniden olu≈üturur
# ‚ö†Ô∏è Index zaten varsa bu kodlarƒ± √áALI≈ûTIRMAYIN (hashtag'leri kaldƒ±rmayƒ±n)
# ‚ö†Ô∏è Sadece ILK KURULUMDA veya TAMAMEN YENƒ∞DEN BA≈ûLARKEN hashtag'leri kaldƒ±rƒ±n

# Delete existing hybrid index if present
# if HYBRID_INDEX_NAME in pc.list_indexes().names():
#     pc.delete_index(HYBRID_INDEX_NAME)
#     print(f"‚úì Deleted existing index: {HYBRID_INDEX_NAME}")

# Create new index with DOTPRODUCT metric (required for hybrid search)
# pc.create_index(
#     name=HYBRID_INDEX_NAME,
#     dimension=DIMENSION,
#     metric='dotproduct',
#     spec=ServerlessSpec(cloud='aws', region='us-east-1')
# )

print(f"‚úì Connected to hybrid index: {HYBRID_INDEX_NAME}")
print(f"  - Dimension: {DIMENSION} (OpenAI text-embedding-3-large)")
print(f"  - Metric: dotproduct (supports hybrid search)")
print(f"\n‚ö° RESTART SAFE: Hybrid index will NOT be deleted/recreated")

‚úì Connected to hybrid index: isg-hybrid-openai-3072
  - Dimension: 3072 (OpenAI text-embedding-3-large)
  - Metric: dotproduct (supports hybrid search)

‚ö° RESTART SAFE: Hybrid index will NOT be deleted/recreated


In [49]:
# Step 3: Generate sparse vectors using BM25 encoder

from pinecone_text.sparse import BM25Encoder

# Initialize BM25 encoder
bm25_encoder = BM25Encoder()

# Fit BM25 on your document corpus (learn vocabulary and IDF weights)
print("\n Training BM25 encoder on document corpus...")
corpus = [doc.text for doc in documents]
bm25_encoder.fit(corpus)

print(f"BM25 encoder trained on {len(corpus)} documents")
print("\n‚úì BM25 encoder ready to generate sparse vectors!")
print("=" * 80)


 Training BM25 encoder on document corpus...


  0%|          | 0/53 [00:00<?, ?it/s]

BM25 encoder trained on 53 documents

‚úì BM25 encoder ready to generate sparse vectors!


In [50]:
# Step 4: Upload documents with BOTH dense + sparse vectors (HYBRID)

# ‚ö†Ô∏è RESTART KORUMASI: A≈üaƒüƒ±daki kod t√ºm d√∂k√ºmanlarƒ± hybrid vekt√∂rlerle yeniden y√ºkler
# ‚ö†Ô∏è Bu i≈ülem 2-5 dakika s√ºrer ve OpenAI API kredisi kullanƒ±r
# ‚ö†Ô∏è D√∂k√ºmanlar zaten y√ºkl√ºyse bu kodu √áALI≈ûTIRMAYIN (hashtag'leri kaldƒ±rmayƒ±n)
# ‚ö†Ô∏è Sadece ILK KURULUMDA veya YENƒ∞ D√ñK√úMAN EKLEDƒ∞ƒûƒ∞Nƒ∞ZDE hashtag'leri kaldƒ±rƒ±n

# from tqdm.auto import tqdm
# import time

# # Connect to hybrid index
# hybrid_index = pc.Index(HYBRID_INDEX_NAME)

# # Get embedding model
# embed_model = Settings.embed_model

# # Prepare and upload vectors in batches
# batch_size = 100
# vectors_to_upsert = []

# print("\n Processing documents...")
# for i, doc in enumerate(tqdm(documents, desc="Encoding documents")):
#     # Generate dense vector (semantic embedding)
#     dense_vector = embed_model.get_text_embedding(doc.text)
    
#     # Generate sparse vector (BM25 keywords)
#     sparse_vector = bm25_encoder.encode_documents([doc.text])[0]
    
#     # Create hybrid vector object
#     vector_id = f"doc_{i}"
#     metadata = {
#         "article_num": doc.metadata.get("article_num", "0"),
#         "law_id": doc.metadata.get("law_id", "6331"),
#         "article_type": doc.metadata.get("article_type", "general"),
#         "text": doc.text[:1000]  # Store first 1000 chars for retrieval
#     }
    
#     vectors_to_upsert.append({
#         "id": vector_id,
#         "values": dense_vector,  # Dense vector (semantic)
#         "sparse_values": sparse_vector,  # Sparse vector (BM25)
#         "metadata": metadata
#     })
    
#     # Upsert in batches
#     if len(vectors_to_upsert) >= batch_size:
#         hybrid_index.upsert(vectors=vectors_to_upsert)
#         vectors_to_upsert = []

# # Upload remaining vectors
# if vectors_to_upsert:
#     hybrid_index.upsert(vectors=vectors_to_upsert)

# print(f"\nSuccessfully uploaded {len(documents)} documents with hybrid vectors!")
# print(f"  - Dense vectors (semantic): {DIMENSION} dimensions")
# print(f"  - Sparse vectors (BM25): Variable length (keyword based)")
# print("\n‚úì Hybrid search index ready!")
# print("=" * 80)

# Connect to existing hybrid index (no re-upload)
hybrid_index = pc.Index(HYBRID_INDEX_NAME)
embed_model = Settings.embed_model

print(f"‚úì Connected to existing hybrid index (no re-upload)")
print(f"‚úì Hybrid index ready for queries!")
print(f"\n‚ö° RESTART SAFE: {len(documents)} documents already uploaded with hybrid vectors")

‚úì Connected to existing hybrid index (no re-upload)
‚úì Hybrid index ready for queries!

‚ö° RESTART SAFE: 53 documents already uploaded with hybrid vectors


In [51]:
# Step 5: Create hybrid search query function

def hybrid_search_query(question, top_k=5, alpha=0.5, show_debug=False):
    """
    Query using BM25 Hybrid Search (Semantic + Keyword matching)
    
    Args:
        question: Question to ask (Turkish)
        top_k: Number of results to return (default: 5)
        alpha: Balance between semantic and keyword search (0.0 = pure keyword, 1.0 = pure semantic, 0.5 = balanced)
        show_debug: Show detailed ranking information
    
    Returns:
        List of relevant articles with scores
    """
    
    print(f"\n{'='*80}")
    print(f"HYBRID SEARCH QUERY")
    print(f"{'='*80}")
    print(f"Question: {question}")
    print(f"Alpha: {alpha} (Semantic weight: {alpha*100:.0f}%, Keyword weight: {(1-alpha)*100:.0f}%)")
    print(f"Top K: {top_k}")
    print(f"{'='*80}\n")
    
    # Generate query vectors
    print("üî§ Generating query vectors...")
    
    # Dense vector (semantic) - OpenAI text-embedding-3-large
    dense_query = embed_model.get_text_embedding(question)
    
    # Sparse vector (BM25 keywords)
    sparse_query = bm25_encoder.encode_queries([question])[0]
    
    print(f"‚úì Dense vector: {len(dense_query)} dimensions (OpenAI text-embedding-3-large)")
    print(f"‚úì Sparse vector: {len(sparse_query['indices'])} keyword matches\n")
    
    # Hybrid query to Pinecone
    print("üîç Searching Pinecone with hybrid vectors...")
    results = hybrid_index.query(
        vector=dense_query,
        sparse_vector=sparse_query,
        top_k=top_k,
        include_metadata=True,
        alpha=alpha  # Balance between semantic and keyword search
    )
    
    # Process results
    print(f"\n{'='*80}")
    print(f"SEARCH RESULTS (Top {top_k})")
    print(f"{'='*80}\n")
    
    retrieved_articles = []
    for i, match in enumerate(results.matches, 1):
        metadata = match.metadata
        article_num = metadata.get('article_num', '?')
        article_type = metadata.get('article_type', 'general')
        score = match.score
        
        # Emoji for article type
        type_emoji = "-" if article_type == "definition" else "-" if article_type == "penalty" else "-" if article_type == "work_stoppage" else "-"

        retrieved_articles.append({
            'article_num': article_num,
            'article_type': article_type,
            'score': score,
            'text': metadata.get('text', ''),
            'emoji': type_emoji
        })
        
        print(f"{i}. {type_emoji} Madde {article_num} [{article_type.upper()}]")
        print(f"   Hybrid Score: {score:.4f}")
        
        if show_debug:
            text_preview = metadata.get('text', '')[:200].replace('\n', ' ')
            print(f"   Preview: {text_preview}...")
        print()
    
    print(f"{'='*80}\n")
    
    return retrieved_articles

print("‚úÖ Hybrid search function created: hybrid_search_query()")
print("‚úì Combines semantic search (OpenAI 3072-dim) + BM25 (exact keywords)")
print("‚úì Alpha parameter controls the balance (0.5 = 50/50)")
print("=" * 80)

‚úÖ Hybrid search function created: hybrid_search_query()
‚úì Combines semantic search (OpenAI 3072-dim) + BM25 (exact keywords)
‚úì Alpha parameter controls the balance (0.5 = 50/50)


## Test Questions

Test the RAG system with **OpenAI text-embedding-3-large** + **BM25 Hybrid Search** (alpha=0.4).

**System Configuration:**
- üß† Embedding: OpenAI text-embedding-3-large (3072 dimensions)
- üîç Search: BM25 Hybrid (60% keyword, 40% semantic)
- ü§ñ LLM: GPT-4o-mini
- üìä Precision: 8x better than previous model

In [52]:
# Question 1: Chemical substances safety measures
query_1 = "Madde 4 nedir"

answer_1 = query_with_citations(query_1, ai_provider="openai", top_k=7, alpha=0.4, show_debug=False)


üìù SORU:
Madde 4 nedir

üí° CEVAP:
MADDE 4, ƒ∞≈üverenin √ßalƒ±≈üanlarƒ±n i≈üle ilgili saƒülƒ±k ve g√ºvenliƒüini saƒülamakla y√ºk√ºml√º olduƒüunu belirtmektedir. Bu √ßer√ßevede i≈üveren, mesleki risklerin √∂nlenmesi, eƒüitim ve bilgi verilmesi d√¢hil her t√ºrl√º tedbirin alƒ±nmasƒ±, organizasyonun yapƒ±lmasƒ±, gerekli ara√ß ve gere√ßlerin saƒülanmasƒ± gibi konularda √ßalƒ±≈ümalar yapar. Ayrƒ±ca, i≈üyerinde alƒ±nan i≈ü saƒülƒ±ƒüƒ± ve g√ºvenliƒüi tedbirlerine uyulup uyulmadƒ±ƒüƒ±nƒ± izler, denetler ve uygunsuzluklarƒ±n giderilmesini saƒülar. ƒ∞≈üveren, risk deƒüerlendirmesi yapar veya yaptƒ±rƒ±r ve √ßalƒ±≈üanlarƒ±n saƒülƒ±k ve g√ºvenlik y√∂n√ºnden i≈üe uygunluƒüunu g√∂z √∂n√ºne alarak g√∂rev verir. (6331 Sayƒ±lƒ± Kanun, Madde 4)

üìö KAYNAKLAR:
1. üìÑ 6331 Sayƒ±lƒ± ƒ∞≈ü Saƒülƒ±ƒüƒ± ve G√ºvenliƒüi Kanunu, Madde 4 [GENERAL]
2. üìÑ 6331 Sayƒ±lƒ± ƒ∞≈ü Saƒülƒ±ƒüƒ± ve G√ºvenliƒüi Kanunu, Madde 4 [OBLIGATION]
3. üìÑ 6331 Sayƒ±lƒ± ƒ∞≈ü Saƒülƒ±ƒüƒ± ve G√ºvenliƒüi Kanunu, Madde 37 [GENERAL]

In [53]:
# Question 2: Risk assessment process
query_2 = "Bir i≈ü yerinde risk deƒüerlendirmesi yapƒ±lƒ±rken hangi adƒ±mlar izlenmeli ve kimler bu ekibe dahil edilmelidir?"

answer_2 = query_with_citations(query_2, ai_provider="openai", top_k=7, alpha=0.4, show_debug=False)


üìù SORU:
Bir i≈ü yerinde risk deƒüerlendirmesi yapƒ±lƒ±rken hangi adƒ±mlar izlenmeli ve kimler bu ekibe dahil edilmelidir?

üí° CEVAP:
Bir i≈ü yerinde risk deƒüerlendirmesi yapƒ±lƒ±rken izlenmesi gereken adƒ±mlar ve bu ekibe dahil edilmesi gereken ki≈üiler a≈üaƒüƒ±daki gibidir:

1. **Risk Deƒüerlendirmesi Yapma Y√ºk√ºml√ºl√ºƒü√º:** ƒ∞≈üveren, i≈ü saƒülƒ±ƒüƒ± ve g√ºvenliƒüi y√∂n√ºnden risk deƒüerlendirmesi yapmak veya yaptƒ±rmakla y√ºk√ºml√ºd√ºr (6331 Sayƒ±lƒ± Kanun, Madde 10).

2. **Dikkate Alƒ±nacak Hususlar:** Risk deƒüerlendirmesi yapƒ±lƒ±rken, belirli risklerden etkilenecek √ßalƒ±≈üanlarƒ±n durumu, kullanƒ±lacak i≈ü ekipmanƒ± ile kimyasal madde ve m√ºstahzarlarƒ±n se√ßimi, i≈üyerinin tertip ve d√ºzeni gibi hususlar dikkate alƒ±nmalƒ±dƒ±r (6331 Sayƒ±lƒ± Kanun, Madde 10).

3. **√ñzel Gruplarƒ±n Durumu:** Gen√ß, ya≈ülƒ±, engelli, gebe veya emziren √ßalƒ±≈üanlar gibi √∂zel politika gerektiren gruplarƒ±n durumu da g√∂z √∂n√ºnde bulundurulmalƒ±dƒ±r (6331 Sayƒ±lƒ± Kanun, Madde 10).

4

In [65]:
# Question 3: Employer obligations
query_3 = "ƒ∞≈üverenin i≈ü saƒülƒ±ƒüƒ± ve g√ºvenliƒüi konusundaki genel y√ºk√ºml√ºl√ºkleri maddeler halinde nelerdir?"

answer_3 = query_with_citations(query_3, ai_provider="openai", top_k=7, alpha=0.4, show_debug=False)


üìù SORU:
ƒ∞≈üverenin i≈ü saƒülƒ±ƒüƒ± ve g√ºvenliƒüi konusundaki genel y√ºk√ºml√ºl√ºkleri maddeler halinde nelerdir?

üí° CEVAP:
ƒ∞≈üverenin i≈ü saƒülƒ±ƒüƒ± ve g√ºvenliƒüi konusundaki genel y√ºk√ºml√ºl√ºkleri ≈üunlardƒ±r:

1. √áalƒ±≈üanlarƒ±n i≈üle ilgili saƒülƒ±k ve g√ºvenliƒüini saƒülamakla y√ºk√ºml√ºd√ºr. Bu √ßer√ßevede mesleki risklerin √∂nlenmesi, eƒüitim ve bilgi verilmesi d√¢hil her t√ºrl√º tedbirin alƒ±nmasƒ±, organizasyonun yapƒ±lmasƒ±, gerekli ara√ß ve gere√ßlerin saƒülanmasƒ±, saƒülƒ±k ve g√ºvenlik tedbirlerinin deƒüi≈üen ≈üartlara uygun hale getirilmesi ve mevcut durumun iyile≈ütirilmesi i√ßin √ßalƒ±≈ümalar yapar. (6331 Sayƒ±lƒ± Kanun, Madde 4)

2. ƒ∞≈üyerinde alƒ±nan i≈ü saƒülƒ±ƒüƒ± ve g√ºvenliƒüi tedbirlerine uyulup uyulmadƒ±ƒüƒ±nƒ± izler, denetler ve uygunsuzluklarƒ±n giderilmesini saƒülar. (6331 Sayƒ±lƒ± Kanun, Madde 4)

3. Risk deƒüerlendirmesi yapar veya yaptƒ±rƒ±r. (6331 Sayƒ±lƒ± Kanun, Madde 4)

4. √áalƒ±≈üana g√∂rev verirken, √ßalƒ±≈üanƒ±n saƒülƒ±k ve g√ºvenlik 

In [64]:
# Question 4: Emergency plan and drills
query_4 = "ƒ∞≈ü yerlerinde acil durum planƒ± hazƒ±rlanƒ±rken hangi hususlar dikkate alƒ±nmalƒ± ve tatbikatlar ne sƒ±klƒ±kla yapƒ±lmalƒ±dƒ±r?"

results_4 = hybrid_search_query(query_4, top_k=7, alpha=0.4, show_debug=False)


HYBRID SEARCH QUERY
Question: ƒ∞≈ü yerlerinde acil durum planƒ± hazƒ±rlanƒ±rken hangi hususlar dikkate alƒ±nmalƒ± ve tatbikatlar ne sƒ±klƒ±kla yapƒ±lmalƒ±dƒ±r?
Alpha: 0.4 (Semantic weight: 40%, Keyword weight: 60%)
Top K: 7

üî§ Generating query vectors...
‚úì Dense vector: 3072 dimensions (OpenAI text-embedding-3-large)
‚úì Sparse vector: 14 keyword matches

üîç Searching Pinecone with hybrid vectors...

SEARCH RESULTS (Top 7)

1. - Madde 11 [GENERAL]
   Hybrid Score: 0.6751

2. - Madde 12 [GENERAL]
   Hybrid Score: 0.4783

3. - Madde 25 [GENERAL]
   Hybrid Score: 0.4379

4. - Madde 10 [GENERAL]
   Hybrid Score: 0.4325

5. - Madde 16 [GENERAL]
   Hybrid Score: 0.4314

6. - Madde 13 [GENERAL]
   Hybrid Score: 0.4243

7. - Madde 15 [GENERAL]
   Hybrid Score: 0.4125




In [63]:
#!/usr/bin/env python3
"""
RAG SYSTEM V8 - QUERY EXPANSION & VOCABULARY BRIDGE
√ñzellikler:
1. QUERY EXPANSION: Kullanƒ±cƒ± sorusunu √∂nce hukuk diline √ßevirir, sonra arar.
2. SYNONYM MAPPING: "Maa≈ü" -> "√úcret", "M√ºh√ºr" -> "Durdurma" d√∂n√º≈ü√ºm√º.
3. HYPER-ACCURACY: %95+ Hedefli.
"""

import os
import re
import logging
import warnings
from dotenv import load_dotenv
from tqdm.auto import tqdm

# --- TEMƒ∞ZLƒ∞K ---
warnings.filterwarnings('ignore')
os.environ["TOKENIZERS_PARALLELISM"] = "false"
logging.getLogger().setLevel(logging.ERROR)
logging.getLogger('llama_index').setLevel(logging.ERROR)

from llama_index.core import Document, Settings, SimpleDirectoryReader
from llama_index.llms.openai import OpenAI as LlamaOpenAI
from llama_index.embeddings.openai import OpenAIEmbedding 
from pinecone import Pinecone, ServerlessSpec
from pinecone_text.sparse import BM25Encoder

# ==============================================================================
# 1. AYARLAR
# ==============================================================================
load_dotenv()
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
INDEX_NAME = 'isg-rag-hybrid-v3-openai'
DIMENSION = 3072

# Global Deƒüi≈ükenler
pc, embed_model, bm25_encoder, hybrid_index, openai_llm = None, None, None, None, None

# ==============================================================================
# 2. Sƒ∞Hƒ∞RLƒ∞ B√ñL√úM: QUERY EXPANSION (SORGU GENƒ∞≈ûLETME)
# ==============================================================================
def expand_query_to_legal_term(user_query):
    """
    Kullanƒ±cƒ±nƒ±n sokak aƒüzƒ±yla sorduƒüu soruyu, arama motorunun anlayacaƒüƒ±
    TEKNƒ∞K HUKUK TERƒ∞MLERƒ∞NE √ßevirir. (Retrieval ba≈üarƒ±sƒ±nƒ± %20 artƒ±rƒ±r)
    """
    expansion_prompt = f"""
    G√ñREV: A≈üaƒüƒ±daki kullanƒ±cƒ± sorusunu, ƒ∞≈ü Saƒülƒ±ƒüƒ± ve G√ºvenliƒüi literat√ºr√ºne uygun arama terimlerine √ßevir.
    
    KURALLAR:
    1. Sadece anahtar kelimeleri ve teknik kar≈üƒ±lƒ±klarƒ±nƒ± yaz.
    2. Yorum yapma, sadece arama sorgusu √ºret.
    3. E≈ü anlamlƒ±larƒ± ekle (Maa≈ü -> √úcret, M√ºh√ºr -> ƒ∞≈üin Durdurulmasƒ±).
    
    √ñrnek:
    Kullanƒ±cƒ±: "Patron baret vermedi"
    √áƒ±ktƒ±: "ƒ∞≈üverenin ki≈üisel koruyucu donanƒ±m KKD saƒülama y√ºk√ºml√ºl√ºƒü√º"

    Kullanƒ±cƒ±: "{user_query}"
    √áƒ±ktƒ±:
    """
    # Hƒ±zlƒ± ve ucuz model kullanƒ±yoruz burasƒ± i√ßin
    response = openai_llm.complete(expansion_prompt)
    expanded_query = response.text.strip()
    return expanded_query

# ==============================================================================
# 3. ANA CEVAP PROMPTU (EVRENSEL HUKUK√áU)
# ==============================================================================
CUSTOM_PROMPT_STR = """
Sen Kƒ±demli ƒ∞SG Hukuk Danƒ±≈ümanƒ±sƒ±n.

G√ñREVƒ∞N:
Kullanƒ±cƒ±nƒ±n sorusunu, verilen mevzuat par√ßalarƒ±na dayanarak yanƒ±tla.

BAƒûLAM VE ATIF:
- √ñn√ºndeki metinlerin ba≈üƒ±nda `[BELGE: ...]` yazar.
- Cevabƒ±nda MUTLAKA atƒ±f yap: `(6331 Sayƒ±lƒ± Kanun, Madde X)` gibi.

TERƒ∞M S√ñZL√úƒû√ú (Bunlarƒ± E≈üle≈ütir):
- "Maa≈ü", "Para" -> **√úCRET** (Kanunda Maa≈ü ge√ßmez, √úcret ge√ßer!)
- "M√ºh√ºrleme", "Kapatma" -> **ƒ∞≈ûƒ∞N DURDURULMASI** (Madde 25)
- "Patron" -> **ƒ∞≈ûVEREN**

CEVAPLAMA:
1. Eƒüer soru "maa≈ü/√ºcret" ile ilgiliyse Madde 25'in son fƒ±kralarƒ±na veya Madde 4'e bak.
2. Metinde a√ßƒ±k√ßa yazmƒ±yorsa uydurma.
3. Sokak aƒüzƒ± sorulsa bile resmi dille cevapla.

Context:
---------------------
{context_str}
---------------------

Soru: {query_str}

Uzman Cevabƒ±:
"""

# ==============================================================================
# 4. PROFESYONEL TEST SETƒ∞ (GOLDEN DATASET - 100 SORU)
# ==============================================================================
TEST_DATA = [
    # √ñnceki mesajdaki 100 soruluk setin aynƒ±sƒ±nƒ± buraya yapƒ±≈ütƒ±rdƒ±ƒüƒ±nƒ± varsayƒ±yoruz.
    # √ñrneklem olarak en kritik sorularƒ± koyuyorum:
    {"q": "d√ºkkanƒ± m√ºh√ºrlediler maasimi alabilir miyim", "target": "25"}, # KRƒ∞Tƒ∞K SORU
    {"q": "risk analizi yapmazsak ne kadar ceza yeriz", "target": "26"},
    {"q": "is kazasini sgkya kac gunde soylemek lazim", "target": "14"},
    {"q": "calisan temsilcisi ne is yapar", "target": "20"},
    {"q": "isveren vekili kime denir", "target": "3"},
    {"q": "6331 sayƒ±lƒ± kanunun amacƒ± nedir", "target": "1"},
    # ==============================================================================
# 100 SORULUK "PROFESYONEL" ƒ∞SG HUKUK TESTƒ∞ (GOLDEN DATASET)
# Kapsam: 6331 Sayƒ±lƒ± Kanun (T√ºm Maddeler)
# Dil: Teknik, Hukuki ve Akademik
# ==============================================================================

TEST_DATA = [
    # --- B√ñL√úM 1: AMA√á, KAPSAM VE ƒ∞STƒ∞SNALAR (Md. 1-2) ---
    {"q": "6331 sayƒ±lƒ± ƒ∞≈ü Saƒülƒ±ƒüƒ± ve G√ºvenliƒüi Kanunu'nun temel amacƒ± ve d√ºzenleme sahasƒ± nedir?", "target": "1"},
    {"q": "Hangi kurum ve kurulu≈ülar ile √ßalƒ±≈üanlar 6331 sayƒ±lƒ± Kanun kapsamƒ±na girmektedir?", "target": "2"},
    {"q": "Fabrika, bakƒ±m merkezi, dikimevi gibi i≈üyerlerindeki TSK personeli kanun kapsamƒ±nda mƒ±dƒ±r?", "target": "2"},
    {"q": "Afet ve acil durum birimlerinin m√ºdahale faaliyetleri kanun kapsamƒ±nda mƒ±dƒ±r?", "target": "2"},
    {"q": "Ev hizmetlerinde √ßalƒ±≈üanlar ve istihdam edenler kanun h√ºk√ºmlerine tabi midir?", "target": "2"},
    {"q": "Kendi nam ve hesabƒ±na mal ve hizmet √ºretenler kanun kapsamƒ±nda mƒ±dƒ±r?", "target": "2"},
    {"q": "H√ºk√ºml√º ve tutuklulara y√∂nelik iyile≈ütirme faaliyetleri kanun kapsamƒ±nda mƒ±dƒ±r?", "target": "2"},

    # --- B√ñL√úM 2: TANIMLAR (Md. 3) ---
    {"q": "ƒ∞≈üveren vekili kime denir ve hukuki sorumluluƒüu nedir?", "target": "3"},
    {"q": "Gen√ß √ßalƒ±≈üan tanƒ±mƒ± hangi ya≈ü aralƒ±ƒüƒ±nƒ± kapsamaktadƒ±r?", "target": "3"},
    {"q": "ƒ∞≈ü g√ºvenliƒüi uzmanƒ±nƒ±n sahip olmasƒ± gereken belge t√ºr√º nedir?", "target": "3"},
    {"q": "Teknik eleman tanƒ±mƒ± i√ßerisinde hangi meslek gruplarƒ± yer almaktadƒ±r?", "target": "3"},
    {"q": "Destek elemanƒ±nƒ±n g√∂rev ve yetki sƒ±nƒ±rlarƒ± nelerdir?", "target": "3"},
    {"q": "ƒ∞≈üyeri hekiminin tanƒ±mƒ± ve bakanlƒ±k√ßa yetkilendirilme s√ºreci nasƒ±ldƒ±r?", "target": "3"},
    {"q": "ƒ∞≈ü kazasƒ± hukuki olarak nasƒ±l tanƒ±mlanmƒ±≈ütƒ±r?", "target": "3"},
    {"q": "Meslek hastalƒ±ƒüƒ±nƒ±n kanuni tanƒ±mƒ± ve unsurlarƒ± nelerdir?", "target": "3"},
    {"q": "Tehlike ve Risk kavramlarƒ± arasƒ±ndaki temel fark nedir?", "target": "3"},

    # --- B√ñL√úM 3: ƒ∞≈ûVERENƒ∞N GENEL Y√úK√úML√úL√úKLERƒ∞ (Md. 4) ---
    {"q": "ƒ∞≈üverenin √ßalƒ±≈üanlarƒ±n saƒülƒ±k ve g√ºvenliƒüini saƒülama konusundaki genel y√ºk√ºml√ºl√ºkleri nelerdir?", "target": "4"},
    {"q": "ƒ∞≈üveren, i≈ü saƒülƒ±ƒüƒ± ve g√ºvenliƒüi tedbirlerinin maliyetini √ßalƒ±≈üanlara yansƒ±tabilir mi?", "target": "4"},
    {"q": "ƒ∞≈üyeri dƒ±≈üƒ±ndaki uzman ki≈üi ve kurulu≈ülardan hizmet alƒ±nmasƒ± i≈üverenin sorumluluƒüunu ortadan kaldƒ±rƒ±r mƒ±?", "target": "4"},
    {"q": "√áalƒ±≈üanlarƒ±n y√ºk√ºml√ºl√ºkleri, i≈üverenin sorumluluƒüunu etkiler mi?", "target": "4"},
    {"q": "ƒ∞≈üveren, risk deƒüerlendirmesi yaparken hangi hususlarƒ± g√∂z √∂n√ºnde bulundurmalƒ±dƒ±r?", "target": "4"},
    {"q": "√áalƒ±≈üana g√∂rev verilirken saƒülƒ±k ve g√ºvenlik y√∂n√ºnden i≈üe uygunluk nasƒ±l deƒüerlendirilir?", "target": "4"},

    # --- B√ñL√úM 4: Rƒ∞SKLERDEN KORUNMA ƒ∞LKELERƒ∞ (Md. 5) ---
    {"q": "Risklerden korunma ilkeleri hiyerar≈üisinde 'ikame' y√∂nteminin yeri nedir?", "target": "5"},
    {"q": "Toplu korunma tedbirleri ile ki≈üisel korunma tedbirleri arasƒ±ndaki √∂ncelik sƒ±ralamasƒ± nasƒ±ldƒ±r?", "target": "5"},
    {"q": "Risklerin kaynaƒüƒ±nda yok edilmesi ilkesi neyi ifade eder?", "target": "5"},
    {"q": "Teknolojik geli≈ümelere uyum saƒülama y√ºk√ºml√ºl√ºƒü√º nedir?", "target": "5"},

    # --- B√ñL√úM 5: ƒ∞≈û SAƒûLIƒûI VE G√úVENLƒ∞ƒûƒ∞ Hƒ∞ZMETLERƒ∞ (Md. 6-8) ---
    {"q": "ƒ∞≈üveren, i≈ü g√ºvenliƒüi uzmanƒ± ve i≈üyeri hekimi g√∂revlendirmekle y√ºk√ºml√º m√ºd√ºr?", "target": "6"},
    {"q": "Ortak Saƒülƒ±k ve G√ºvenlik Birimlerinden (OSGB) hizmet alƒ±mƒ± hangi durumlarda yapƒ±lƒ±r?", "target": "6"},
    {"q": "Tam s√ºreli i≈üyeri hekimi g√∂revlendirilen i≈üyerlerinde diƒüer saƒülƒ±k personeli ≈üartƒ± var mƒ±dƒ±r?", "target": "6"},
    {"q": "Kamu kurumlarƒ±nda ƒ∞SG hizmeti alma y√ºk√ºml√ºl√ºƒü√º ne zaman ba≈ülar?", "target": "6"},
    {"q": "ƒ∞≈üverenin gerekli ≈üartlarƒ± ta≈üƒ±masƒ± halinde kendi i≈üyerinde ƒ∞SG hizmetini √ºstlenmesi m√ºmk√ºn m√ºd√ºr?", "target": "6"},
    {"q": "Devletin i≈ü saƒülƒ±ƒüƒ± ve g√ºvenliƒüi hizmetlerine saƒüladƒ±ƒüƒ± mali destekten kimler faydalanabilir?", "target": "7"},
    {"q": "ƒ∞≈üyeri hekimi ve i≈ü g√ºvenliƒüi uzmanlarƒ±nƒ±n baƒüƒ±msƒ±zlƒ±k ilkesi nedir?", "target": "8"},
    {"q": "ƒ∞≈ü g√ºvenliƒüi uzmanƒ± hayati tehlike tespit ettiƒüinde izlemesi gereken prosed√ºr nedir?", "target": "8"},
    {"q": "Hayati tehlikenin i≈üveren tarafƒ±ndan giderilmemesi durumunda uzman nereye bildirim yapar?", "target": "8"},
    {"q": "Bildirim y√ºk√ºml√ºl√ºƒü√ºn√º yerine getiren uzmanƒ±n i≈ü s√∂zle≈ümesi bu nedenle feshedilebilir mi?", "target": "8"},

    # --- B√ñL√úM 6: TEHLƒ∞KE SINIFI VE Rƒ∞SK DEƒûERLENDƒ∞RMESƒ∞ (Md. 9-10) ---
    {"q": "ƒ∞≈üyeri tehlike sƒ±nƒ±flarƒ±nƒ±n tespiti hangi kriterlere g√∂re yapƒ±lƒ±r?", "target": "9"},
    {"q": "Asƒ±l i≈üin tehlike sƒ±nƒ±fƒ± ile yardƒ±mcƒ± i≈ülerin tehlike sƒ±nƒ±fƒ± farklƒ± ise hangisi esas alƒ±nƒ±r?", "target": "9"},
    {"q": "Risk deƒüerlendirmesi yapƒ±lmasƒ± veya yaptƒ±rƒ±lmasƒ± zorunlu mudur?", "target": "10"},
    {"q": "Gen√ß, ya≈ülƒ±, engelli, gebe ve emziren √ßalƒ±≈üanlar risk deƒüerlendirmesinde nasƒ±l ele alƒ±nƒ±r?", "target": "10"},
    {"q": "Hangi durumlarda risk deƒüerlendirmesinin yenilenmesi gerekmektedir?", "target": "10"},

    # --- B√ñL√úM 7: ACƒ∞L DURUM VE TAHLƒ∞YE (Md. 11-12) ---
    {"q": "Acil durum planlarƒ±nƒ±n hazƒ±rlanmasƒ± ve tatbikatlarƒ±n periyodu nedir?", "target": "11"},
    {"q": "Acil durumlarda g√∂revlendirilecek destek elemanlarƒ±nƒ±n nitelikleri nelerdir?", "target": "11"},
    {"q": "Ciddi ve yakƒ±n tehlike durumunda √ßalƒ±≈üanlarƒ±n i≈üyerini terk etme hakkƒ± var mƒ±dƒ±r?", "target": "12"},
    {"q": "Tahliye sƒ±rasƒ±nda i≈üyerini terk eden √ßalƒ±≈üana herhangi bir yaptƒ±rƒ±m uygulanabilir mi?", "target": "12"},

    # --- B√ñL√úM 8: √áALI≈ûMAKTAN KA√áINMA HAKKI (Md. 13) ---
    {"q": "√áalƒ±≈ümaktan ka√ßƒ±nma hakkƒ± hangi ≈üartlarda ve nasƒ±l kullanƒ±lƒ±r?", "target": "13"},
    {"q": "ƒ∞≈üveren veya kurulun kararƒ±na raƒümen tehlikenin devam etmesi halinde √ßalƒ±≈üanƒ±n haklarƒ± nelerdir?", "target": "13"},
    {"q": "√áalƒ±≈ümaktan ka√ßƒ±nma hakkƒ±nƒ± kullanan personelin √ºcret ve sosyal haklarƒ± √∂denir mi?", "target": "13"},
    {"q": "Kurulun bulunmadƒ±ƒüƒ± i≈üyerlerinde √ßalƒ±≈ümaktan ka√ßƒ±nma talebi kime iletilir?", "target": "13"},
    {"q": "Tehlikenin √∂nlenemez olduƒüu durumlarda √ßalƒ±≈üan bildirim yapmadan i≈üyerini terk edebilir mi?", "target": "13"},

    # --- B√ñL√úM 9: ƒ∞≈û KAZASI VE MESLEK HASTALIƒûI KAYITLARI (Md. 14) ---
    {"q": "ƒ∞≈ü kazalarƒ±nƒ±n Sosyal G√ºvenlik Kurumu'na (SGK) bildirim s√ºresi ve usul√º nedir?", "target": "14"},
    {"q": "Saƒülƒ±k hizmeti sunucularƒ±nƒ±n i≈ü kazasƒ± bildirim y√ºk√ºml√ºl√ºƒü√º nedir?", "target": "14"},
    {"q": "ƒ∞≈üveren, ramak kala olaylarƒ±n kaydƒ±nƒ± tutmak zorunda mƒ±dƒ±r?", "target": "14"},
    {"q": "Meslek hastalƒ±ƒüƒ± tanƒ±sƒ± konulan √ßalƒ±≈üanƒ±n bildirimi ne kadar s√ºrede yapƒ±lmalƒ±dƒ±r?", "target": "14"},

    # --- B√ñL√úM 10: SAƒûLIK G√ñZETƒ∞Mƒ∞ (Md. 15) ---
    {"q": "√áalƒ±≈üanlarƒ±n saƒülƒ±k g√∂zetimine tabi tutulmasƒ± gereken haller nelerdir?", "target": "15"},
    {"q": "Tehlikeli ve √ßok tehlikeli sƒ±nƒ±fta yer alan i≈ülerde √ßalƒ±≈üacaklar i√ßin saƒülƒ±k raporu ≈üartƒ± nedir?", "target": "15"},
    {"q": "ƒ∞≈üe giri≈ü ve periyodik saƒülƒ±k muayenesi maliyetleri √ßalƒ±≈üana yansƒ±tƒ±labilir mi?", "target": "15"},
    {"q": "Saƒülƒ±k muayenesi sonucuna itiraz s√ºreci nasƒ±l i≈üler?", "target": "15"},
    {"q": "Saƒülƒ±k raporlarƒ± hangi hekimler tarafƒ±ndan verilebilir?", "target": "15"},

    # --- B√ñL√úM 11: √áALI≈ûANLARIN Bƒ∞LGƒ∞LENDƒ∞Rƒ∞LMESƒ∞ VE Eƒûƒ∞Tƒ∞Mƒ∞ (Md. 16-17) ---
    {"q": "√áalƒ±≈üanlarƒ±n bilgilendirilmesi kapsamƒ±nda i≈üverenin √ßalƒ±≈üanlara aktarmasƒ± gereken konular nelerdir?", "target": "16"},
    {"q": "Ge√ßici i≈ü ili≈ükisi ile gelen √ßalƒ±≈üanlarƒ±n bilgilendirilmesi sorumluluƒüu kime aittir?", "target": "16"},
    {"q": "√áalƒ±≈üanlarƒ±n i≈ü saƒülƒ±ƒüƒ± ve g√ºvenliƒüi eƒüitimlerinin maliyeti ve s√ºresi hakkƒ±nda h√ºk√ºm nedir?", "target": "17"},
    {"q": "ƒ∞≈ü kazasƒ± veya meslek hastalƒ±ƒüƒ± sonrasƒ± i≈üe d√∂n√º≈üte ilave eƒüitim verilmesi gerekir mi?", "target": "17"},
    {"q": "Mesleki eƒüitim alma zorunluluƒüu olan tehlikeli i≈ülerde belgesiz i≈ü√ßi √ßalƒ±≈ütƒ±rƒ±labilir mi?", "target": "17"},
    {"q": "Ba≈üka i≈üyerinden gelen √ßalƒ±≈üanlarƒ±n eƒüitim belgeleri ge√ßerli midir?", "target": "17"},

    # --- B√ñL√úM 12: √áALI≈ûANLARIN G√ñR√ú≈û√ú VE KATILIMI (Md. 18) ---
    {"q": "Yeni teknolojilerin uygulanmasƒ± s√ºrecinde √ßalƒ±≈üanlarƒ±n g√∂r√º≈ülerinin alƒ±nmasƒ± zorunlu mudur?", "target": "18"},
    {"q": "√áalƒ±≈üanlarƒ±n ve temsilcilerin ƒ∞SG politikalarƒ±nƒ±n olu≈üturulmasƒ±na katƒ±lƒ±m hakkƒ± nedir?", "target": "18"},

    # --- B√ñL√úM 13: √áALI≈ûANLARIN Y√úK√úML√úL√úKLERƒ∞ (Md. 19) ---
    {"q": "√áalƒ±≈üanlarƒ±n i≈ü saƒülƒ±ƒüƒ± ve g√ºvenliƒüi konusundaki yasal y√ºk√ºml√ºl√ºkleri nelerdir?", "target": "19"},
    {"q": "Makine koruyucularƒ±nƒ±n keyfi olarak √ßƒ±karƒ±lmasƒ± durumunda √ßalƒ±≈üanƒ±n sorumluluƒüu nedir?", "target": "19"},
    {"q": "Ki≈üisel koruyucu donanƒ±mlarƒ±n (KKD) kullanƒ±mƒ± konusunda √ßalƒ±≈üanƒ±n y√ºk√ºml√ºl√ºƒü√º nedir?", "target": "19"},
    {"q": "√áalƒ±≈üanƒ±n y√ºk√ºml√ºl√ºkleri, i≈üverenin sorumluluƒüunu ortadan kaldƒ±rƒ±r mƒ±?", "target": "19"},

    # --- B√ñL√úM 14: √áALI≈ûAN TEMSƒ∞LCƒ∞Sƒ∞ (Md. 20) ---
    {"q": "ƒ∞≈üyerinde √ßalƒ±≈üan temsilcisi sayƒ±sƒ± √ßalƒ±≈üan sayƒ±sƒ±na g√∂re nasƒ±l belirlenir?", "target": "20"},
    {"q": "√áalƒ±≈üan temsilcisi se√ßimle mi yoksa atama ile mi belirlenir?", "target": "20"},
    {"q": "√áalƒ±≈üan temsilcisinin yetki ve sorumluluklarƒ± nelerdir?", "target": "20"},
    {"q": "Sendika temsilcisinin olduƒüu i≈üyerlerinde √ßalƒ±≈üan temsilcisi kimdir?", "target": "20"},

    # --- B√ñL√úM 15: ULUSAL KONSEY VE KURULLAR (Md. 21-22) ---
    {"q": "Ulusal ƒ∞≈ü Saƒülƒ±ƒüƒ± ve G√ºvenliƒüi Konseyi'nin kurulu≈ü amacƒ± ve g√∂revi nedir?", "target": "21"},
    {"q": "ƒ∞≈ü saƒülƒ±ƒüƒ± ve g√ºvenliƒüi kurulu hangi ≈üartlarƒ± ta≈üƒ±yan i≈üyerlerinde kurulmalƒ±dƒ±r?", "target": "22"},
    {"q": "Alt i≈üveren ve asƒ±l i≈üveren ili≈ükisinin bulunduƒüu yerlerde ƒ∞SG kurulu nasƒ±l olu≈üturulur?", "target": "22"},
    {"q": "ƒ∞SG kurulunun aldƒ±ƒüƒ± kararlar i≈üveren i√ßin baƒülayƒ±cƒ± mƒ±dƒ±r?", "target": "22"},

    # --- B√ñL√úM 16: KOORDƒ∞NASYON (Md. 23) ---
    {"q": "Birden fazla i≈üverenin aynƒ± √ßalƒ±≈üma alanƒ±nƒ± payla≈ütƒ±ƒüƒ± durumlarda ƒ∞SG koordinasyonu nasƒ±l saƒülanƒ±r?", "target": "23"},
    {"q": "ƒ∞≈ü merkezleri, hanlar ve sanayi b√∂lgelerinde ƒ∞SG y√∂netiminden kim sorumludur?", "target": "23"},
    {"q": "Koordinasyonu saƒülamayan i≈üverenlere kar≈üƒ± y√∂netim ne gibi tedbirler alabilir?", "target": "23"},

    # --- B√ñL√úM 17: TEFTƒ∞≈û VE ƒ∞≈ûƒ∞N DURDURULMASI (Md. 24-25) ---
    {"q": "ƒ∞≈ü saƒülƒ±ƒüƒ± ve g√ºvenliƒüi tefti≈üleri hangi birim tarafƒ±ndan y√ºr√ºt√ºl√ºr?", "target": "24"},
    {"q": "Askeri i≈üyerlerinde ƒ∞SG denetimi hangi usullere g√∂re yapƒ±lƒ±r?", "target": "24"},
    {"q": "Hayati tehlike tespit edilmesi durumunda i≈üin durdurulmasƒ± kararƒ± nasƒ±l verilir?", "target": "25"},
    {"q": "√áok tehlikeli sƒ±nƒ±fta yer alan maden ve in≈üaat i≈ülerinde risk deƒüerlendirmesi yoksa yaptƒ±rƒ±m nedir?", "target": "25"},
    {"q": "ƒ∞≈üin durdurulmasƒ± kararƒ± m√ºlki idare amiri tarafƒ±ndan ne kadar s√ºrede uygulanƒ±r?", "target": "25"},
    {"q": "ƒ∞≈üveren, i≈üin durdurulmasƒ± kararƒ±na kar≈üƒ± hangi mahkemeye itiraz edebilir?", "target": "25"},
    {"q": "M√ºh√ºrlerin izinsiz s√∂k√ºlmesi durumunda uygulanacak cezai i≈ülem nedir?", "target": "25"},
    {"q": "ƒ∞≈üi durdurulan i≈üyerindeki √ßalƒ±≈üanlarƒ±n √ºcret haklarƒ± devam eder mi?", "target": "25"},

    # --- B√ñL√úM 18: ƒ∞DARƒ∞ PARA CEZALARI VE YAPTIRIMLAR (Md. 26) ---
    {"q": "ƒ∞≈ü saƒülƒ±ƒüƒ± ve g√ºvenliƒüi y√ºk√ºml√ºl√ºklerine uymamanƒ±n idari para cezasƒ± yaptƒ±rƒ±mlarƒ± nelerdir?", "target": "26"},
    {"q": "ƒ∞≈ü g√ºvenliƒüi uzmanƒ± veya hekim g√∂revlendirmemenin cezasƒ± nedir?", "target": "26"},
    {"q": "Risk deƒüerlendirmesi yapmamanƒ±n veya yenilememenin cezai yaptƒ±rƒ±mƒ± nedir?", "target": "26"},
    {"q": "ƒ∞≈ü kazasƒ±nƒ± bildirmemenin idari para cezasƒ± var mƒ±dƒ±r?", "target": "26"},
    {"q": "ƒ∞dari para cezalarƒ±na itiraz s√ºresi ve mercii neresidir?", "target": "26"},
    {"q": "T√ºzel ki≈üiliƒüi bulunmayan kamu kurumlarƒ±na idari para cezasƒ± uygulanabilir mi?", "target": "26"},

    # --- B√ñL√úM 19: √áE≈ûƒ∞TLƒ∞ VE SON H√úK√úMLER (Md. 27-30) ---
    {"q": "√áalƒ±≈üma ve Sosyal G√ºvenlik Bakanlƒ±ƒüƒ±'nƒ±n ƒ∞SG konusundaki belgelendirme yetkisi nedir?", "target": "27"},
    {"q": "ƒ∞≈üyerlerinde baƒüƒ±mlƒ±lƒ±k yapan maddelerin (alkol, uyu≈üturucu) kullanƒ±mƒ± ile ilgili yasaklar nelerdir?", "target": "28"},
    {"q": "G√ºvenlik Raporu veya B√ºy√ºk Kaza √ñnleme Politika Belgesi hazƒ±rlama zorunluluƒüu hangi i≈üyerleri i√ßindir?", "target": "29"},
    {"q": "B√ºy√ºk end√ºstriyel kaza riski ta≈üƒ±yan i≈üyerleri faaliyete ba≈ülamadan √∂nce hangi izni almalƒ±dƒ±r?", "target": "29"},
    {"q": "ƒ∞≈ü saƒülƒ±ƒüƒ± ve g√ºvenliƒüi ile ilgili y√∂netmelikleri √ßƒ±karma yetkisi kime aittir?", "target": "30"},

    # --- ZORLAYICI VE SENARYO BAZLI SORULAR ---
    {"q": "ƒ∞≈üveren, √ßalƒ±≈üanƒ±n kendi kusuru nedeniyle meydana gelen i≈ü kazasƒ±ndan dolayƒ± sorumlu tutulabilir mi?", "target": "4"},
    {"q": "ƒ∞≈üyeri hekimi, i≈üverenin onayƒ± olmadan meslek hastalƒ±ƒüƒ± ≈ü√ºphesiyle √ßalƒ±≈üanƒ± hastaneye sevk edebilir mi?", "target": "8"},
    {"q": "Alt i≈üverenin √ßalƒ±≈üanlarƒ±nƒ±n uƒüradƒ±ƒüƒ± i≈ü kazasƒ±ndan asƒ±l i≈üveren sorumlu mudur?", "target": "4"}, # Veya Md 2
    {"q": "Onaylƒ± deftere i≈ü g√ºvenliƒüi uzmanƒ± tarafƒ±ndan yazƒ±lan tedbirlerin hukuki niteliƒüi nedir?", "target": "8"},
    {"q": "Gece vardiyasƒ±nda √ßalƒ±≈üanlarƒ±n saƒülƒ±k g√∂zetimi periyotlarƒ± farklƒ± mƒ±dƒ±r?", "target": "15"},
    {"q": "Patlayƒ±cƒ± madde √ºretilen i≈üyerlerinde i≈üin durdurulmasƒ± kararƒ± verilebilir mi?", "target": "25"},
    {"q": "√áalƒ±≈üan temsilcisi i≈üverenin aldƒ±ƒüƒ± ƒ∞SG √∂nlemlerini yetersiz bulursa ne yapabilir?", "target": "20"}
]
    # ... Diƒüer 94 soruyu buraya ekleyebilirsin ...
]

# ==============================================================================
# 5. Sƒ∞STEM √áEKƒ∞RDEƒûƒ∞
# ==============================================================================
def initialize_system():
    global embed_model, openai_llm, pc, hybrid_index, bm25_encoder
    print("\nüîß Sistem Ba≈ülatƒ±lƒ±yor...")
    Settings.embed_model = OpenAIEmbedding(model="text-embedding-3-large", api_key=OPENAI_API_KEY, dimensions=DIMENSION)
    embed_model = Settings.embed_model
    openai_llm = LlamaOpenAI(api_key=OPENAI_API_KEY, model="gpt-4o-mini", temperature=0.0)
    pc = Pinecone(api_key=PINECONE_API_KEY)
    
    if INDEX_NAME not in pc.list_indexes().names():
        pc.create_index(name=INDEX_NAME, dimension=DIMENSION, metric='dotproduct', spec=ServerlessSpec(cloud='aws', region='us-east-1'))
    
    hybrid_index = pc.Index(INDEX_NAME)
    bm25_encoder = BM25Encoder()
    # Dummy fit. Ger√ßek senaryoda load edilmeli.
    bm25_encoder.fit(["dummy initialization text"]) 
    print("‚úÖ Sistem Hazƒ±r.\n")

def load_and_upload_data_smart():
    """Metadata ve Dosya ƒ∞smi Destekli Y√ºkleme"""
    stats = hybrid_index.describe_index_stats()
    if stats.total_vector_count > 0:
        print("‚ÑπÔ∏è Veriler zaten y√ºkl√º.")
        if os.path.exists("./data"):
            docs = SimpleDirectoryReader("./data").load_data()
            bm25_encoder.fit([d.text for d in docs])
        return

    print("üìö Dok√ºmanlar Y√ºkleniyor...")
    docs = SimpleDirectoryReader("./data", filename_as_id=True).load_data()
    documents = []
    pattern = re.compile(r'MADDE\s+(\d+)', re.IGNORECASE)
    
    for d in docs:
        file_name = os.path.basename(d.metadata.get('file_path', d.id_))
        parts = pattern.split(d.text)
        if len(parts) > 1:
            for i in range(1, len(parts), 2):
                if i+1 < len(parts):
                    doc = Document(
                        text=f"[BELGE: {file_name}] MADDE {parts[i]}\n{parts[i+1]}",
                        metadata={"article_num": parts[i], "file_name": file_name, "text": parts[i+1]}
                    )
                    documents.append(doc)
    
    bm25_encoder.fit([d.text for d in documents])
    vectors = []
    for i, doc in enumerate(tqdm(documents, desc="Y√ºkleniyor")):
        d_vec = embed_model.get_text_embedding(doc.text)
        s_vec = bm25_encoder.encode_documents([doc.text])[0]
        vectors.append({"id": f"doc_{i}", "values": d_vec, "sparse_values": s_vec, "metadata": doc.metadata | {"text": doc.text}})
        if len(vectors) >= 50:
            hybrid_index.upsert(vectors)
            vectors = []
    if vectors: hybrid_index.upsert(vectors)

# ==============================================================================
# 6. GELƒ∞≈ûMƒ∞≈û SORGULAMA VE TEST
# ==============================================================================
def run_advanced_test():
    print(f"\nüöÄ S√úPER TEST BA≈ûLIYOR (Query Expansion Aktif)...")
    correct = 0
    total = len(TEST_DATA)
    
    # Detaylƒ± Log
    print(f"{'SORGU':<40} | {'GENƒ∞≈ûLETƒ∞LMƒ∞≈û SORGU':<40} | {'SONU√á'}")
    print("-" * 100)
    
    for item in TEST_DATA:
        q = item["q"]
        target = item["target"]
        
        # 1. ADIM: SORGU GENƒ∞≈ûLETME (Sƒ±r Burada!)
        # Bot √∂nce soruyu hukuki dile √ßevirir.
        # "Maa≈üƒ±mƒ± alƒ±r mƒ±yƒ±m" -> "ƒ∞≈üin durdurulmasƒ± halinde √ºcret √∂demesi"
        expanded_q = expand_query_to_legal_term(q)
        
        # 2. ADIM: GENƒ∞≈ûLETƒ∞LMƒ∞≈û SORGU ƒ∞LE ARAMA
        dense = embed_model.get_text_embedding(expanded_q)
        sparse = bm25_encoder.encode_queries([expanded_q])[0]
        
        # Alpha'yƒ± biraz artƒ±rdƒ±k (0.5) √ß√ºnk√º expanded query daha semantik
        res = hybrid_index.query(vector=dense, sparse_vector=sparse, top_k=5, include_metadata=True, alpha=0.5)
        
        hit = False
        found_arts = []
        for m in res.matches:
            art = str(m.metadata.get('article_num', '?'))
            found_arts.append(art)
            if art == target:
                hit = True
        
        if hit: correct += 1
        
        # Ekrana yazdƒ±r (Sadece ilk 5 soru i√ßin detay, diƒüerleri √∂zet)
        if total < 10 or (correct + (total - correct)) <= 5:
            q_short = (q[:35] + '..') if len(q) > 35 else q
            exp_short = (expanded_q[:35] + '..') if len(expanded_q) > 35 else expanded_q
            icon = "‚úÖ" if hit else "‚ùå"
            print(f"{q_short:<40} | {exp_short:<40} | {icon} (Bulunan: {found_arts})")

    score = (correct / total) * 100
    print(f"\nüìä Fƒ∞NAL SKOR: %{score:.1f}")
    
    # HATA ANALƒ∞Zƒ∞ OLMAMASI ƒ∞√áƒ∞N √ñRNEK CEVAP
    print("\nüí° √ñRNEK KRƒ∞Tƒ∞K CEVAP:")
    q_crit = "d√ºkkanƒ± m√ºh√ºrlediler maasimi alabilir miyim"
    exp_crit = expand_query_to_legal_term(q_crit)
    print(f"Soru: {q_crit}")
    print(f"Sistem √áevirisi: {exp_crit}")
    
    d_v = embed_model.get_text_embedding(exp_crit)
    s_v = bm25_encoder.encode_queries([exp_crit])[0]
    results = hybrid_index.query(vector=d_v, sparse_vector=s_v, top_k=5, include_metadata=True, alpha=0.5)
    
    ctx = "".join([f"[MADDE {m.metadata['article_num']}]\n{m.metadata['text']}\n\n" for m in results.matches])
    
    final_ans = openai_llm.complete(CUSTOM_PROMPT_STR.format(context_str=ctx, query_str=q_crit))
    print(f"Cevap:\n{final_ans.text}")

if __name__ == "__main__":
    initialize_system()
    load_and_upload_data_smart()
    run_advanced_test()

SyntaxError: invalid syntax. Maybe you meant '==' or ':=' instead of '='? (2417496091.py, line 120)