# Hybrid RAG Temporal KG

## Overview

Advanced hybrid search: build temporal knowledge graph, generate vector embeddings, implement hybrid search (Vector + Temporal KG), and enable time-aware retrieval.

## Workflow: Build Temporal KG → Vector Embeddings → Hybrid Search → Time-Aware Retrieval


In [None]:
from semantica.ingest import FileIngestor, WebIngestor, FeedIngestor
from semantica.parse import DocumentParser, WebParser, StructuredDataParser
from semantica.semantic_extract import NERExtractor, RelationExtractor
from semantica.kg import GraphBuilder, TemporalGraphQuery
from semantica.embeddings import EmbeddingGenerator
from semantica.vector_store import VectorStore, HybridSearch
from datetime import datetime, timedelta
import numpy as np
import os
import tempfile
import json


## Step 1: Build Temporal Knowledge Graph


In [None]:
file_ingestor = FileIngestor()
web_ingestor = WebIngestor()
feed_ingestor = FeedIngestor()
document_parser = DocumentParser()
structured_parser = StructuredDataParser()
ner_extractor = NERExtractor()
relation_extractor = RelationExtractor()
builder = GraphBuilder()

temp_dir = tempfile.mkdtemp()

events_file = os.path.join(temp_dir, "events.json")
events_data = [
    {"event": "Product Launch", "date": "2023-10-15T10:00:00", "category": "product"},
    {"event": "Q4 Sales Meeting", "date": "2023-11-20T14:00:00", "category": "business"},
    {"event": "Year End Review", "date": "2023-12-31T09:00:00", "category": "business"},
    {"event": "New Year Planning", "date": "2024-01-05T10:00:00", "category": "planning"}
]

with open(events_file, 'w') as f:
    json.dump(events_data, f)

file_objects = file_ingestor.ingest_file(events_file, read_content=True)
parsed_events = structured_parser.parse_json(events_file)

entities = []
relationships = []

for i, event_data in enumerate(parsed_events.get("data", events_data), 1):
    event_id = f"event{i}"
    event_name = event_data.get("event", f"Event {i}")
    timestamp = event_data.get("date", "")
    category = event_data.get("category", "general")
    
    entities.append({
        "id": event_id,
        "type": "Event",
        "name": event_name,
        "properties": {"timestamp": timestamp, "category": category}
    })
    
    if i > 1:
        prev_event_id = f"event{i-1}"
        relationships.append({
            "source": prev_event_id,
            "target": event_id,
            "type": "followed_by",
            "properties": {"timestamp": timestamp}
        })

temporal_kg = builder.build(entities, relationships)

print(f"Ingested {len(file_objects)} event files")
print(f"Parsed {len(parsed_events.get('data', []))} events from structured data")
print(f"Built temporal knowledge graph with {len(entities)} entities and {len(relationships)} relationships")


## Step 2: Vector Embeddings


In [None]:
documents = []
for event_data in parsed_events.get("data", events_data):
    event_name = event_data.get("event", "")
    date_str = event_data.get("date", "")[:10]
    category = event_data.get("category", "")
    documents.append(f"{event_name}: Event occurred on {date_str} in category {category}.")

generator = EmbeddingGenerator()
embeddings = generator.generate(documents)

print(f"Generated embeddings for {len(documents)} documents from parsed events")


## Step 3: Hybrid Search Setup


In [None]:
vector_store = VectorStore()

metadata = [
    {"event_id": "event1", "timestamp": "2023-10-15", "category": "product"},
    {"event_id": "event2", "timestamp": "2023-11-20", "category": "business"},
    {"event_id": "event3", "timestamp": "2023-12-31", "category": "business"},
    {"event_id": "event4", "timestamp": "2024-01-05", "category": "planning"}
]

vector_ids = vector_store.store_vectors(embeddings, metadata)

hybrid_search = HybridSearch()
temporal_query = TemporalGraphQuery()

print(f"Stored {len(vector_ids)} vectors in vector store")
print("Hybrid search and temporal query initialized")


## Step 4: Time-Aware Retrieval


In [None]:
query = "What happened in Q4 2023?"
query_embedding = generator.generate([query])[0]

vector_results = vector_store.search_vectors(query_embedding, k=10)

temporal_query_result = temporal_query.query_time_range(
    graph=temporal_kg,
    query=query,
    start_time="2023-10-01",
    end_time="2023-12-31",
    temporal_aggregation="union"
)

temporal_results = []
entities_list = temporal_kg.get("entities", [])
entity_map = {e.get("id"): e for e in entities_list}

for rel in temporal_query_result.get("relationships", []):
    source_id = rel.get("source")
    target_id = rel.get("target")
    if source_id in entity_map:
        entity = entity_map[source_id]
        temporal_results.append({
            "entity_id": source_id,
            "name": entity.get("name"),
            "timestamp": entity.get("properties", {}).get("timestamp", ""),
            "type": entity.get("type")
        })

def combine_results(vector_results, temporal_results):
    combined = []
    
    vector_dict = {r.get("id", ""): r for r in vector_results}
    
    for temp_result in temporal_results:
        entity_id = temp_result.get("entity_id", "")
        if entity_id in vector_dict:
            combined.append({
                "id": entity_id,
                "name": temp_result.get("name"),
                "vector_score": vector_dict[entity_id].get("score", 0),
                "timestamp": temp_result.get("timestamp"),
                "type": "hybrid"
            })
        else:
            combined.append({
                "id": entity_id,
                "name": temp_result.get("name"),
                "vector_score": 0,
                "timestamp": temp_result.get("timestamp"),
                "type": "temporal_only"
            })
    
    for vec_result in vector_results:
        vec_id = vec_result.get("id", "")
        if not any(c.get("id") == vec_id for c in combined):
            combined.append({
                "id": vec_id,
                "vector_score": vec_result.get("score", 0),
                "type": "vector_only"
            })
    
    combined.sort(key=lambda x: x.get("vector_score", 0), reverse=True)
    return combined

hybrid_results = combine_results(vector_results, temporal_results)

print(f"Retrieved {len(hybrid_results)} time-aware results")
print(f"  Vector results: {len(vector_results)}")
print(f"  Temporal results: {len(temporal_results)}")
print(f"  Hybrid results: {len([r for r in hybrid_results if r.get('type') == 'hybrid'])}")


## Summary

Advanced hybrid search workflow:
- Temporal knowledge graph built
- Vector embeddings generated
- Hybrid search configured
- Time-aware retrieval implemented
