# Legal Knowledge Graph - End-to-End Demo Pipeline

This notebook demonstrates the complete pipeline:
1. Document ingestion
2. Knowledge graph construction
3. Graph embedding
4. Query retrieval
5. Response generation


In [None]:
# Import required modules
from ingestion.loader import DocumentLoader
from ingestion.parser import DocumentParser
from kg_construction.extractor import EntityRelationExtractor
from kg_construction.graph_builder import GraphBuilder
from kg_embedding.maypl_wrapper import MAYPLWrapper
from retrieval.retriever import SubgraphRetriever
from retrieval.integration import RetrievalIntegration
from prompting.base_prompt import PromptBuilder


## Step 1: Document Ingestion


In [None]:
# Load and parse a document
loader = DocumentLoader()
parser = DocumentParser()

# TODO: Replace with actual document path
document_path = "path/to/document.pdf"
text = loader.load(document_path)
parsed = parser.parse(document_path)

print(f"Loaded document with {len(text)} characters")


## Step 2: Knowledge Graph Construction


In [None]:
# Extract entities and relations
extractor = EntityRelationExtractor()
entities = extractor.extract_entities(text)
relations = extractor.extract_relations(text, entities=entities)

print(f"Extracted {len(entities)} entities and {len(relations)} relations")

# Build knowledge graph
graph_builder = GraphBuilder()
graph = graph_builder.build_from_entities_relations(entities, relations)

print(f"Knowledge graph built: {graph.get_statistics()}")


## Step 3: Graph Embedding


In [None]:
# Create embeddings for the knowledge graph
embedder = MAYPLWrapper(embedding_dim=128)

# Prepare graph data
graph_data = {
    'entities': [e.__dict__ for e in entities],
    'relations': [r.__dict__ for r in relations]
}

# Fit embedder
embedder.fit(graph_data)

print("Graph embeddings created")


## Step 4: Query Retrieval


In [None]:
# Set up retrieval
retriever = SubgraphRetriever(graph=graph, strategy="embedding")
integration = RetrievalIntegration(retriever=retriever)

# Query the knowledge graph
user_query = "What are the key legal concepts in this document?"
result = integration.retrieve_context(user_query, top_k=5)

print(f"Retrieved context: {result['context'][:200]}...")


## Step 5: Response Generation


In [None]:
# Build prompt and generate response
prompt_builder = PromptBuilder()
prompt = prompt_builder.build_qa_prompt(
    question=user_query,
    context=result['context']
)

print("Generated prompt:")
print(prompt)

# TODO: Call LLM to generate response
# response = llm_client.generate(prompt)
# print(f"\nResponse: {response}")


## Summary

This demo shows the complete pipeline from document ingestion to response generation. 
Each step can be customized and extended based on specific requirements.
