# üïµÔ∏è‚Äç‚ôÇÔ∏è Sheep System: Deep Verification Notebook

Questo notebook serve a validare l'intera pipeline RAG, dal parsing semantico fino al retrieval ibrido.
Include una sezione **Playground** per testare manualmente i tool dell'Agente.

In [1]:
import os
import sys
import logging
import pandas as pd
from dotenv import load_dotenv, find_dotenv

# 1. Setup Ambiente
load_dotenv(find_dotenv())

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(message)s')
logger = logging.getLogger("NOTEBOOK")

# --- PATH SETUP ---
current_dir = os.getcwd()
project_root = os.path.abspath(os.path.join(current_dir, '..'))
src_path = os.path.join(project_root, 'src')
if src_path not in sys.path:
    sys.path.insert(0, src_path)

# Import Libreria
from code_graph_indexer import CodebaseIndexer, CodeRetriever, CodeReader, CodeNavigator
from code_graph_indexer.storage.postgres import PostgresGraphStorage
try:
    from code_graph_indexer.providers.openai_emb import OpenAIEmbeddingProvider
except ImportError:
    from code_graph_indexer.providers.embedding import OpenAIEmbeddingProvider

# CONFIGURAZIONE DB
DB_PORT = "5433" # Verifica la tua porta locale
DB_URL = f"postgresql://sheep_user:sheep_password@localhost:{DB_PORT}/sheep_index"

# CONFIGURAZIONE REPO TARGET (Modifica con il tuo path locale)
REPO_PATH = "/Users/filippodaminato/Desktop/test_repos/flask_clone"

print(f"üêò Connecting to: {DB_URL}")

try:
    storage = PostgresGraphStorage(DB_URL, vector_dim=1536)
    provider = OpenAIEmbeddingProvider(model="text-embedding-3-small")
    print("‚úÖ Storage e Provider pronti.")
except Exception as e:
    print(f"‚ùå Errore Connessione: {e}")

2025-12-08 21:45:47,645 - üêò Connecting to Postgres (Pool): localhost:5433/sheep_index | Vector Dim: 1536


üêò Connecting to: postgresql://sheep_user:sheep_password@localhost:5433/sheep_index
‚úÖ Storage e Provider pronti.


## 1. Indexing & Snapshot Creation
Eseguiamo l'indicizzazione. Se `force=True`, creiamo un nuovo snapshot pulito.

In [None]:
indexer = CodebaseIndexer(REPO_PATH, storage)

print("üöÄ Avvio Indexing...")
try:
    # Indexing restituisce l'ID dello snapshot attivo
    snapshot_id = "090fc913-0223-4e39-8dc1-0077e24137b5"
    
    # Recuperiamo anche l'ID stabile della repo per completezza
    repo_meta = indexer.parser.metadata_provider.get_repo_info()
    repo_id = "87197749-0176-4e2a-a529-4f7a4e8a5ba8"
    
    print(f"‚úÖ Indexing Completato.")
    print(f"üîë Snapshot ID: {snapshot_id}")
    print(f"üì¶ Repo ID: {repo_id}")
except Exception as e:
    print(f"‚ùå Errore Indexing: {e}")
    raise e

ValueError: Path not found: /Users/filippodaminato/Desktop/test_repos/flask_clone

## 2. Embedding Generation
Popoliamo i vettori per lo snapshot appena creato.

In [None]:
print(f"ü§ñ Generating Embeddings per Snapshot {snapshot_id[:8]}...")
# Consumiamo il generatore
for progress in indexer.embed(provider, batch_size=50, force_snapshot_id=snapshot_id):
    pass
print("‚úÖ Embeddings Generati.")

# üéÆ 3. Playground Interattivo
Usa le celle seguenti per testare i singoli componenti come farebbe l'Agente.

### üîé Search Playground (Retriever)
Testa la ricerca semantica e ibrida.

In [2]:
snapshot_id = "090fc913-0223-4e39-8dc1-0077e24137b5"
repo_id = "87197749-0176-4e2a-a529-4f7a4e8a5ba8"

In [10]:
# --- CONFIGURA LA TUA RICERCA QUI ---
QUERY = "application entry point"
FILTERS = {
   "exclude_category": ["test"],
   # "path_prefix": ["src/"],
    #"exclude_language": ["python"]
   # "cat": []
}
# ------------------------------------

retriever = CodeRetriever(storage, provider)

print(f"üîé Searching: '{QUERY}' (Snap: {snapshot_id[:8]})")
results = retriever.retrieve(
    QUERY, 
    repo_id=repo_id, 
    snapshot_id=snapshot_id, # Fondamentale per la consistenza
    limit=5, 
    filters=FILTERS
)

if not results:
    print("‚ùå Nessun risultato trovato.")
else:
    df_data = []
    for r in results:
        df_data.append({
            "Score": f"{r.score:.4f}",
            "Type": ", ".join(r.semantic_labels),
            "File": f"{r.file_path}:{r.start_line}",
            "Preview": r.content.split('\n')[0][:80]
        })
    display(pd.DataFrame(df_data))

2025-12-08 21:49:25,905 - üîé Retrieving [PINNED]: 'application entry point' su Snap 090fc913... | Filters: {'exclude_category': ['test']}


üîé Searching: 'application entry point' (Snap: 090fc913)


2025-12-08 21:49:26,909 - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Unnamed: 0,Score,Type,File,Preview
0,0.0328,Application Entry Point,src/flask/cli.py:1123,
1,0.0161,Code Block,src/flask/__main__.py:1,from .cli import main
2,0.0159,Function Definition,src/flask/cli.py:1119,
3,0.0156,Function Definition,src/flask/sansio/app.py:277,
4,0.0154,Code Block,src/flask/app.py:322,super().__init__(


### üìñ Reader Playground (Virtual Filesystem)
Testa l'esplorazione delle cartelle e la lettura dei file (senza toccare il disco).

In [15]:
reader = CodeReader(storage)

# 1. LIST DIRECTORY
TARGET_DIR = "src" # Root
print(f"\nüìÇ ls '{TARGET_DIR}':")
try:
    items = reader.list_directory(snapshot_id, TARGET_DIR)
    for item in items:
        icon = "üìÅ" if item['type'] == 'dir' else "üìÑ"
        print(f"   {icon} {item['name']}")
except Exception as e:
    print(f"   ‚ùå Error: {e}")

# 2. FIND DIRECTORY
SEARCH_DIR = "jso"
print(f"\nüîç find '{SEARCH_DIR}':")
found = reader.find_directories(snapshot_id, SEARCH_DIR)
print(f"   {found}")

# 3. READ FILE (Usa il primo risultato della ricerca se disponibile)
if 'results' in locals() and results:
    target_file = results[0].file_path
    print(f"\nüìñ Reading '{target_file}' (Lines 1-10):")
    try:
        data = reader.read_file(snapshot_id, target_file, start_line=1, end_line=20)
        print("---")
        print(data['content'])
        print("---")
    except Exception as e:
        print(f"‚ùå Read Error: {e}")


üìÇ ls 'src':
   üìÅ flask

üîç find 'jso':
   ['src/flask/json']

üìñ Reading 'src/flask/cli.py' (Lines 1-10):
---
from __future__ import annotations

import ast
import collections.abc as cabc
import importlib.metadata
import inspect
import os
import platform
import re
import sys
import traceback
import typing as t
from functools import update_wrapper
from operator import itemgetter
from types import ModuleType

import click
from click.core import ParameterSource
from werkzeug import run_simple
from werkzeug.serving import is_running_from_reloader

---


### üß≠ Navigator Playground (Graph Traversal)
Esplora le relazioni del nodo trovato (Genitori, Chiamate, ecc.).

In [16]:
navigator = CodeNavigator(storage)

if 'results' in locals() and results:
    # Prendiamo il primo nodo trovato dalla ricerca
    target_node = results[0]
    node_id = target_node.node_id
    
    print(f"üìç Analysing Node: {node_id}")
    print(f"   File: {target_node.file_path}")

    # 1. PARENT
    parent = navigator.read_parent_chunk(node_id)
    p_info = f"{parent.get('type')} ({parent.get('id')})" if parent else "None (Top Level)"
    print(f"   ‚¨ÜÔ∏è Parent: {p_info}")

    # 2. NEXT SIBLING
    nxt = navigator.read_neighbor_chunk(node_id, "next")
    n_info = f"{nxt.get('type')} ({nxt.get('id')})" if nxt else "None"
    print(f"   ‚û°Ô∏è Next: {n_info}")

    # 3. IMPACT (Chi mi chiama?)
    impact = navigator.analyze_impact(node_id)
    print(f"   ‚¨ÖÔ∏è Incoming Calls ({len(impact)}):")
    for i in impact[:3]:
        print(f"      - {i['file']} L{i['line']} ({i['relation']})")

    # 4. PIPELINE (Chi chiamo io?)
    pipe = navigator.visualize_pipeline(node_id, max_depth=1)
    calls = pipe.get('call_graph', {})
    print(f"   ‚§µÔ∏è Outgoing Calls ({len(calls)}):")
    # Visualizzazione semplice del dizionario
    for child_id, meta in list(calls.items())[:3]:
        print(f"      - Calls {meta['symbol']} in {meta['file']}")
else:
    print("‚ö†Ô∏è Esegui prima la cella di ricerca per selezionare un nodo target.")

2025-12-08 21:51:59,843 - üï∏Ô∏è Analyzing impact for: fcf5a825-7c78-44b8-b1c3-222e6c4d8b7c
2025-12-08 21:51:59,845 - üï∏Ô∏è Traversing pipeline for: fcf5a825-7c78-44b8-b1c3-222e6c4d8b7c


üìç Analysing Node: fcf5a825-7c78-44b8-b1c3-222e6c4d8b7c
   File: src/flask/cli.py
   ‚¨ÜÔ∏è Parent: None (Top Level)
   ‚û°Ô∏è Next: None
   ‚¨ÖÔ∏è Incoming Calls (0):
   ‚§µÔ∏è Outgoing Calls (1):
      - Calls main in src/flask/cli.py


In [None]:
# storage.close()