In [None]:
import sys
from pathlib import Path

project_root = Path('/home/khaldoun/prjt_vap')
sys.path.insert(0, str(project_root))

print(f"Project root: {project_root}")

## Etape 1: Verification Neo4j

On verifie que Neo4j est disponible.

In [None]:
from neo4j import GraphDatabase

try:
    driver = GraphDatabase.driver(
        "bolt://localhost:7687",
        auth=("neo4j", "password")
    )
    driver.verify_connectivity()
    print("Neo4j est accessible")
    driver.close()
except Exception as e:
    print(f"Erreur Neo4j: {e}")
    print("\nPour demarrer Neo4j:")
    print("docker run -d -p 7474:7474 -p 7687:7687 -e NEO4J_AUTH=neo4j/password neo4j:latest")

## Etape 2: Construction du Knowledge Graph

On construit le graphe avec les 1,050 questions et leurs relations.

In [None]:
from src.rag_graph.graph_builder import ISOGraphBuilder

builder = ISOGraphBuilder(
    uri="bolt://localhost:7687",
    user="neo4j",
    password="password"
)

print("Graph builder initialise")

In [None]:
# Verifier si le graphe existe deja
with builder.driver.session() as session:
    result = session.run("MATCH (n) RETURN count(n) as count")
    count = result.single()['count']
    print(f"Noeuds dans le graphe: {count}")

if count == 0:
    print("\nConstruction du graphe (cela prend 2-3 minutes)...")
    builder.build_graph(method=1)
    print("Graphe construit")
else:
    print("Graphe deja construit")

## Etape 3: Statistiques du graphe

In [None]:
stats = builder._get_statistics()

print("STATISTIQUES DU KNOWLEDGE GRAPH")
print("="*50)
print(f"Questions: {stats['questions']}")
print(f"Labels: {stats['labels']}")
print(f"Standards: {stats['standards']}")
print(f"Clauses: {stats['clauses']}")
print(f"Relations: {stats['relationships']}")

## Etape 4: Requetes Cypher directes

On teste quelques requetes Cypher pour explorer le graphe.

In [None]:
# Exemple 1: Questions sur backup
cypher_query = """
MATCH (q:Question)-[:HAS_LABEL]->(l:Label {name: 'backup'})
RETURN q.text as question, q.id as id
LIMIT 5
"""

with builder.driver.session() as session:
    result = session.run(cypher_query)
    questions = [record for record in result]

print("Questions avec label 'backup':\n")
for i, q in enumerate(questions, 1):
    print(f"[{i}] {q['question']}")
    print(f"    ID: {q['id']}\n")

In [None]:
# Exemple 2: Questions ISO 27001 sur policy
cypher_query = """
MATCH (q:Question)-[:BELONGS_TO_STANDARD]->(s:Standard {name: 'iso_27001'})
MATCH (q)-[:HAS_LABEL]->(l:Label {name: 'policy'})
RETURN q.text as question
LIMIT 5
"""

with builder.driver.session() as session:
    result = session.run(cypher_query)
    questions = [record for record in result]

print("Questions ISO 27001 sur 'policy':\n")
for i, q in enumerate(questions, 1):
    print(f"[{i}] {q['question']}\n")

In [None]:
# Exemple 3: Labels les plus utilises
cypher_query = """
MATCH (l:Label)<-[:HAS_LABEL]-(q:Question)
RETURN l.name as label, count(q) as usage
ORDER BY usage DESC
LIMIT 10
"""

with builder.driver.session() as session:
    result = session.run(cypher_query)
    labels = [record for record in result]

print("Top 10 labels les plus utilises:\n")
for i, label in enumerate(labels, 1):
    print(f"{i:2d}. {label['label']:30s} ({label['usage']} questions)")

## Etape 5: Detection automatique de labels

In [None]:
from src.rag_graph.label_detector import LabelDetector

detector = LabelDetector()

print(f"Label detector initialise")
print(f"Mots-cles mappes: {len(detector.keyword_mapping)}")

In [None]:
# Test 1: Detection simple
query1 = "Questions sur les sauvegardes de donnees"
labels = detector.detect_labels(query1)
standard = detector.detect_standard(query1)

print(f"Requete: '{query1}'")
print(f"Labels detectes: {labels}")
print(f"Standard: {standard if standard else 'Non specifie'}")

In [None]:
# Test 2: Detection multiple
query2 = "Politique de securite et formation des employes ISO 27001"
labels = detector.detect_labels(query2)
standard = detector.detect_standard(query2)

print(f"Requete: '{query2}'")
print(f"Labels detectes: {labels}")
print(f"Standard: {standard}")

## Etape 6: Generation de requetes Cypher

In [None]:
from src.rag_graph.cypher_generator import CypherQueryGenerator

cypher_gen = CypherQueryGenerator()

print("Cypher generator initialise")

In [None]:
# Generer une requete
query = "Questions sur les backups"
labels = detector.detect_labels(query)

cypher_query = cypher_gen.generate_label_query(
    labels=labels,
    limit=5
)

print(f"Requete: {query}")
print(f"Labels: {labels}\n")
print("Requete Cypher generee:")
print(cypher_query)

## Etape 7: Graph Retriever

On utilise le retriever pour chercher dans le graphe.

In [None]:
from src.rag_graph.graph_retriever import GraphRetriever

retriever = GraphRetriever(
    uri="bolt://localhost:7687",
    user="neo4j",
    password="password"
)

print("Graph retriever initialise")

In [None]:
# Test 1: Recherche simple
query = "Backup and disaster recovery"
results = retriever.retrieve(query, top_k=5)

print(f"Requete: '{query}'")
print(f"Resultats: {len(results)}\n")

for i, doc in enumerate(results, 1):
    print(f"[{i}] {doc['content']}")
    print(f"    Standard: {doc['metadata']['iso_standard']}")
    print(f"    Labels: {doc['metadata']['labels'][:50]}...\n")

In [None]:
# Test 2: Recherche avec contexte
query = "Security policy and employee training ISO 27001"
results = retriever.retrieve(query, top_k=5)

print(f"Requete: '{query}'")
print(f"Resultats: {len(results)}\n")

for i, doc in enumerate(results, 1):
    print(f"[{i}] {doc['content'][:100]}...")
    print(f"    Clause: {doc['metadata']['title']}\n")

## Etape 8: Generation avec LLM

In [None]:
from src.llm.llm_interface import OllamaLLM

llm = OllamaLLM(model="mistral", temperature=0.3)

print("LLM Ollama initialise avec Mistral")

In [None]:
# Generer un questionnaire
query = "Backup procedures and disaster recovery"

# Recuperer via le graphe
relevant_docs = retriever.retrieve(query, top_k=8)

print(f"Documents recuperes: {len(relevant_docs)}")

# Generer
response = llm.generate_with_context(
    query=f"Create an audit questionnaire about: {query}",
    context_docs=relevant_docs
)

print("\n" + "="*80)
print("QUESTIONNAIRE GENERE")
print("="*80)
print(f"\nRequete: {query}\n")
print(response)
print("\n" + "="*80)

In [None]:
# Test 2: Autre sujet
query2 = "Security policy and access control ISO 27001"

relevant_docs = retriever.retrieve(query2, top_k=8)

response = llm.generate_with_context(
    query=f"Create a detailed audit questionnaire about: {query2}",
    context_docs=relevant_docs
)

print("\n" + "="*80)
print("TEST 2")
print("="*80)
print(f"\nRequete: {query2}\n")
print(response)
print("\n" + "="*80)

## Etape 9: Questions liees (Multi-hop)

On explore les relations entre questions.

In [None]:
# Trouver des questions liees
question_id = "doc_001"

related = retriever.get_related_questions(
    question_id=question_id,
    top_k=5
)

print(f"Questions liees a '{question_id}':\n")
for i, doc in enumerate(related, 1):
    print(f"[{i}] {doc['content'][:80]}...")
    print(f"    Standard: {doc['metadata']['iso_standard']}\n")

## Statistiques finales

In [None]:
# Statistiques du graphe
with builder.driver.session() as session:
    # Compter par type
    result = session.run("""
        MATCH (q:Question) WITH count(q) as questions
        MATCH (l:Label) WITH questions, count(l) as labels
        MATCH (s:Standard) WITH questions, labels, count(s) as standards
        MATCH (c:Clause) WITH questions, labels, standards, count(c) as clauses
        MATCH ()-[r]->() WITH questions, labels, standards, clauses, count(r) as rels
        RETURN questions, labels, standards, clauses, rels
    """)
    stats = result.single()

print("STATISTIQUES FINALES")
print("="*50)
print(f"Questions: {stats['questions']}")
print(f"Labels: {stats['labels']}")
print(f"Standards: {stats['standards']}")
print(f"Clauses: {stats['clauses']}")
print(f"Relations: {stats['rels']}")

# Fermer les connexions
retriever.close()
builder.close()
print("\nConnexions fermees")

## Conclusion

La methode knowledge graph offre:
- Recherche relationnelle puissante
- Navigation multi-hop dans le graphe
- Requetes complexes avec plusieurs criteres
- Exploration des relations entre concepts

**Temps d'execution:** ~30-40 secondes apres construction du graphe

**Avantages vs methode vectorielle:**
- Meilleure pour les requetes complexes
- Comprend les relations entre concepts
- Permet la navigation contextuelle