[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Hawksight-AI/semantica/blob/main/cookbook/use_cases/trading/01_Risk_Assessment.ipynb)

# Risk Assessment - Graph Analytics & Portfolio Risk Modeling

## Overview

This notebook demonstrates **portfolio risk assessment** using Semantica with focus on **graph-based analytics**, **portfolio risk modeling**, **market simulation**, and **dependency analysis**. The pipeline assesses portfolio risk using graph-based analytics and market simulations.

### Key Features

- **Graph-Based Analytics**: Uses graph analytics for portfolio risk analysis
- **Portfolio Risk Modeling**: Models portfolio relationships and dependencies
- **Market Simulation**: Simulates market scenarios using graph data
- **Dependency Analysis**: Analyzes dependencies between portfolio components
- **Risk Modeling**: Emphasizes graph analytics, reasoning, and risk modeling

### Pipeline Architecture

1. **Phase 0**: Setup & Configuration
2. **Phase 1**: Portfolio Data Ingestion
3. **Phase 2**: Entity Extraction (Price, Signal, Pattern, Indicator, Strategy)
4. **Phase 3**: Financial Knowledge Graph Construction
5. **Phase 4**: Graph Analytics (Dependencies, Relationships)
6. **Phase 5**: Portfolio Risk Modeling
7. **Phase 6**: Market Simulation
8. **Phase 7**: Visualization & Risk Reporting

---

## Installation


In [None]:
%pip install -qU semantica networkx matplotlib plotly pandas groq


---

## Phase 0: Setup & Configuration


In [None]:
import os
from semantica.core import Semantica, ConfigManager
from semantica.kg import GraphAnalytics
from semantica.reasoning import GraphReasoner
from semantica.graph_store import GraphStore

os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY", "your-key")

config_dict = {
    "project_name": "Risk_Assessment",
    "extraction": {"provider": "groq", "model": "llama-3.1-8b-instant"},
    "knowledge_graph": {"backend": "networkx"}
}

config = ConfigManager().load_from_dict(config_dict)
core = Semantica(config=config)
print("Configured for risk assessment with graph analytics focus")


---

## Phase 1: Portfolio Data Ingestion

Ingest portfolio data from databases, structured documents, and version-controlled configurations.


In [None]:
from semantica.ingest import DBIngestor, RepoIngestor, FileIngestor
from semantica.parse import DocumentParser
from semantica.normalize import TextNormalizer
from semantica.split import TextSplitter
import os

os.makedirs("data", exist_ok=True)

documents = []

# Option 1: Ingest from database (structured portfolio data)
db_connection = "sqlite:///data/portfolio.db"
try:
    db_ingestor = DBIngestor()
    # Create sample portfolio database structure
    import sqlite3
    conn = sqlite3.connect("data/portfolio.db")
    cursor = conn.cursor()
    cursor.execute("""
        CREATE TABLE IF NOT EXISTS holdings (
            id INTEGER PRIMARY KEY,
            symbol TEXT,
            allocation REAL,
            sector TEXT,
            risk_level TEXT
        )
    """)
    cursor.execute("""
        INSERT OR REPLACE INTO holdings VALUES
        (1, 'AAPL', 0.30, 'Technology', 'Medium'),
        (2, 'MSFT', 0.25, 'Technology', 'Low'),
        (3, 'GOOGL', 0.20, 'Technology', 'Medium'),
        (4, 'BND', 0.25, 'Bonds', 'Low')
    """)
    conn.commit()
    conn.close()
    
    db_data = db_ingestor.ingest_database(db_connection, method="sqlite")
    if db_data and "data" in db_data:
        for table_data in db_data["data"]:
            documents.append(str(table_data))
    print(f"Ingested portfolio data from database")
except Exception as e:
    print(f"Database ingestion failed: {e}")

# Option 2: Parse structured portfolio documents (CSV/PDF)
try:
    portfolio_csv = """symbol,allocation,sector,risk_level
AAPL,0.30,Technology,Medium
MSFT,0.25,Technology,Low
GOOGL,0.20,Technology,Medium
BND,0.25,Bonds,Low"""
    with open("data/portfolio.csv", "w") as f:
        f.write(portfolio_csv)
    
    doc_parser = DocumentParser()
    parsed_docs = doc_parser.parse("data/portfolio.csv", file_type="csv")
    if parsed_docs:
        documents.append(str(parsed_docs))
    print(f"Parsed portfolio document")
except Exception as e:
    print(f"Document parsing failed: {e}")

# Option 3: Ingest from version-controlled portfolio configs
try:
    portfolio_config = """
    Portfolio Configuration:
    - AAPL: 30% allocation, Tech sector
    - MSFT: 25% allocation, Tech sector  
    - GOOGL: 20% allocation, Tech sector
    - BND: 25% allocation, Bonds
    Risk: High tech concentration (75%)
    """
    os.makedirs("data/portfolio_config", exist_ok=True)
    with open("data/portfolio_config/config.txt", "w") as f:
        f.write(portfolio_config)
    repo_docs = FileIngestor().ingest("data/portfolio_config")
    documents.extend(repo_docs)
    print(f"Ingested portfolio configuration from repository structure")
except Exception as e:
    print(f"Repository ingestion failed: {e}")

# Fallback: Sample data
if not documents:
    portfolio_data = """
    Portfolio contains: Stock AAPL (30%), Stock MSFT (25%), Stock GOOGL (20%), Bond BND (25%).
    AAPL price correlates with tech sector performance.
    MSFT depends on cloud services market growth.
    Portfolio risk: High concentration in tech sector (75%).
    Dependency: Tech sector downturn impacts 75% of portfolio.
    """
    with open("data/portfolio.txt", "w") as f:
        f.write(portfolio_data)
    documents = FileIngestor().ingest("data/portfolio.txt")
    print(f"Ingested {len(documents)} documents from sample data")

# Normalize financial data
normalizer = TextNormalizer()
normalized_documents = []
for doc in documents:
    doc_content = doc.content if hasattr(doc, 'content') else str(doc)
    normalized_text = normalizer.normalize(
        doc_content,
        clean_html=True,
        normalize_entities=True,
        normalize_numbers=True,
        remove_extra_whitespace=True
    )
    normalized_documents.append(normalized_text)

print(f"Normalized {len(normalized_documents)} documents")

# Use token-based or recursive chunking for structured portfolio data
splitter = TextSplitter(method="token", chunk_size=1000, chunk_overlap=200)

chunked_docs = []
for doc_text in normalized_documents:
    chunks = splitter.split(doc_text)
    chunked_docs.extend([chunk.content if hasattr(chunk, 'content') else str(chunk) for chunk in chunks])

print(f"Created {len(chunked_docs)} token-based chunks")
print("Phase 1 complete: Portfolio data ingested from multiple structured sources")


In [None]:
from semantica.semantic_extract import NERExtractor, RelationExtractor, EventDetector

# Extract entities using LLM-based NER
ner_extractor = NERExtractor(method="llm", provider="groq", llm_model="llama-3.1-8b-instant")
entities = []
for doc_text in chunked_docs:
    extracted = ner_extractor.extract_entities(doc_text)
    entities.extend(extracted)

# Filter for financial entity types
financial_entities = [
    e for e in entities 
    if e.type in ["Price", "Signal", "Pattern", "Indicator", "Strategy"] or
       any(keyword in e.text.lower() for keyword in ["stock", "bond", "portfolio", "risk", "sector"])
]

# Extract relationships
relation_extractor = RelationExtractor(method="llm", provider="groq", llm_model="llama-3.1-8b-instant")
relationships = []
for doc_text in chunked_docs:
    extracted_rels = relation_extractor.extract_relations(doc_text, entities=financial_entities)
    relationships.extend(extracted_rels)

# Extract market events
event_detector = EventDetector()
events = []
for doc_text in chunked_docs:
    extracted_events = event_detector.detect_events(doc_text)
    events.extend(extracted_events)

print(f"Extracted {len(financial_entities)} financial entities")
print(f"Extracted {len(relationships)} relationships")
print(f"Extracted {len(events)} market events")
print("Phase 2 complete: Entity extraction with NER, relations, and events")


---

## Phase 3: Financial Knowledge Graph Construction

Build financial knowledge graph using GraphStore (Neo4j) and generate domain ontology.


In [None]:
from semantica.ontology import OntologyGenerator
from semantica.context import AgentContext
from semantica.vector_store import VectorStore

# Build knowledge graph using core pipeline
entity_dicts = [
    {"text": e.text, "type": e.type, "start": e.start, "end": e.end, "confidence": getattr(e, 'confidence', 0.9)}
    for e in financial_entities
]

relationship_dicts = [
    {
        "subject": rel.subject.text if hasattr(rel, 'subject') else str(rel.subject),
        "predicate": rel.predicate if hasattr(rel, 'predicate') else "related_to",
        "object": rel.object.text if hasattr(rel, 'object') else str(rel.object)
    }
    for rel in relationships
]

result = core.build_knowledge_base(
    sources=chunked_docs,
    custom_entity_types=["Price", "Signal", "Pattern", "Indicator", "Strategy"],
    graph=True,
    embeddings=True
)

kg = result["knowledge_graph"]
print(f"Built portfolio KG with {len(kg.get('entities', []))} entities")

# Generate financial domain ontology
ontology_generator = OntologyGenerator(base_uri="https://semantica.dev/ontology/finance/")
ontology = ontology_generator.generate_ontology({
    "entities": entity_dicts,
    "relationships": relationship_dicts
})

print(f"Generated ontology with {len(ontology.get('classes', []))} classes")
print(f"Generated {len(ontology.get('properties', []))} properties")

# Setup GraphStore for persistent storage (Neo4j backend)
# Note: For demo, we'll use networkx but show GraphStore usage
try:
    graph_store = GraphStore(backend="neo4j", uri="bolt://localhost:7687", user="neo4j", password="password")
    # In production, you would store the graph here
    # graph_store.create_node(labels=["Portfolio"], properties={"name": "Risk_Assessment"})
    print("GraphStore configured for Neo4j (connection skipped in demo)")
except Exception as e:
    print(f"GraphStore connection skipped (Neo4j not available): {e}")

# Setup GraphRAG for portfolio analysis
vector_store = VectorStore(backend="faiss", dimension=384)
if result.get("embeddings"):
    vector_store.store_vectors(
        vectors=result["embeddings"]["vectors"],
        metadata=result["embeddings"]["metadata"]
    )
context = AgentContext(vector_store=vector_store, knowledge_graph=kg)

print("Phase 3 complete: Financial KG constructed with ontology and GraphStore")


---

## Phase 4: Graph Analytics

Perform comprehensive graph analytics: centrality analysis, community detection, and path analysis.


In [None]:
# Perform comprehensive graph analytics
analytics = GraphAnalytics(kg)

# Multiple centrality methods
betweenness_centrality = analytics.calculate_centrality(method="betweenness")
degree_centrality = analytics.calculate_centrality(method="degree")
closeness_centrality = analytics.calculate_centrality(method="closeness")
eigenvector_centrality = analytics.calculate_centrality(method="eigenvector")

print(f"Betweenness centrality: {len(betweenness_centrality)} nodes analyzed")
print(f"Degree centrality: {len(degree_centrality)} nodes analyzed")
print(f"Closeness centrality: {len(closeness_centrality)} nodes analyzed")
print(f"Eigenvector centrality: {len(eigenvector_centrality)} nodes analyzed")

# Community detection for portfolio clusters
try:
    communities = analytics.detect_communities(method="louvain")
    print(f"Detected {len(communities)} communities/clusters")
except Exception as e:
    print(f"Community detection: {e}")

# Path analysis for dependency chains
try:
    paths = analytics.find_paths(source="AAPL", target="MSFT", max_length=3)
    print(f"Found {len(paths)} paths between portfolio components")
except Exception as e:
    print(f"Path analysis: {e}")

# Use reasoning for dependency analysis
reasoner = GraphReasoner(kg)
dependencies = reasoner.find_patterns(pattern_type="dependency")
risk_patterns = reasoner.find_patterns(pattern_type="risk")

print(f"Dependency analysis: {len(dependencies)} portfolio dependencies identified")
print(f"Risk modeling: {len(risk_patterns)} risk patterns detected")
print("Phase 4 complete: Comprehensive graph analytics performed")


---

## Deduplication

Resolve duplicate entities using graph-based clustering for risk assessment.


In [None]:
from semantica.deduplication import ClusterBuilder, EntityMerger
from semantica.semantic_extract import Entity

# Convert Entity objects to dictionaries
print(f"Converting {len(financial_entities)} entities to dictionaries...")
entity_dicts = [{"name": e.text, "type": e.label, "start_char": e.start_char, "end_char": e.end_char, "confidence": e.confidence} for e in financial_entities]

# Use graph_based clustering for risk assessment (identifies interconnected risks)
# merge_all strategy combines all information from related risk entities
cluster_builder = ClusterBuilder(method="graph_based", similarity_threshold=0.85)

print(f"Building clusters for {len(entity_dicts)} entities using graph-based method...")
clusters = cluster_builder.build_clusters(entity_dicts)

print(f"Detected {len(clusters)} clusters")
print(f"Merging entities within clusters using merge_all strategy...")
merger = EntityMerger()
merged_entities_dicts = []
for cluster in clusters:
    if len(cluster) > 1:
        # Merge entities within each cluster
        merge_operations = merger.merge_duplicates(cluster, strategy="merge_all", threshold=0.85)
        if merge_operations:
            merged_entities_dicts.extend([op.merged_entity for op in merge_operations])
    else:
        merged_entities_dicts.extend(cluster)

# Convert back to Entity objects
print(f"Converting {len(merged_entities_dicts)} merged entities back to Entity objects...")
merged_entities = [
    Entity(text=e["name"], label=e["type"], start_char=e.get("start_char", 0), end_char=e.get("end_char", 0), confidence=e.get("confidence", 1.0))
    for e in merged_entities_dicts
]

financial_entities = merged_entities
print(f"Deduplicated to {len(merged_entities)} unique entities")


---

## Phase 5: Portfolio Risk Modeling

Detect conflicts, apply risk inference rules, and perform temporal graph analysis.


In [None]:
from semantica.conflicts import ConflictDetector, ConflictResolver

# Use logical conflict detection for portfolio risk rules
# highest_confidence strategy prioritizes the most confident risk assessment
conflict_detector = ConflictDetector()
conflict_resolver = ConflictResolver()

print(f"Detecting logical conflicts in {len(financial_entities)} entities and relationships...")
conflicts = conflict_detector.detect_conflicts(
    entities=financial_entities,
    relationships=all_relationships,
    method="logical"  # Detect logical conflicts (e.g., conflicting risk indicators)
)

print(f"Detected {len(conflicts)} logical conflicts")

if conflicts:
    print(f"Resolving conflicts using highest_confidence strategy...")
    resolved = conflict_resolver.resolve_conflicts(
        conflicts,
        strategy="highest_confidence"  # Prioritize most confident risk assessment
    )
    print(f"Resolved {len(resolved)} conflicts")
else:
    print("No conflicts detected")

# Use reasoning for risk inference rules
risk_rules = [
    "IF sector_concentration > 0.7 THEN high_risk",
    "IF tech_sector_allocation > 0.5 AND bond_allocation < 0.3 THEN medium_risk",
    "IF portfolio_diversity < 0.3 THEN high_risk"
]

for rule in risk_rules:
    reasoner.add_rule(rule)

# Infer risk levels
inferred_risks = reasoner.infer_facts(kg)
print(f"Inferred {len(inferred_risks)} risk facts from rules")

# Temporal graph analysis (if temporal data available)
try:
    temporal_kg = core.build_knowledge_base(
        sources=chunked_docs,
        custom_entity_types=["Price", "Signal", "Pattern"],
        graph=True,
        temporal=True
    )
    print("Temporal knowledge graph constructed for time-series analysis")
except Exception as e:
    print(f"Temporal analysis: {e}")

print("Phase 5 complete: Risk modeling with conflict detection and inference")


---

## Phase 6: Market Simulation

Export risk data, generate reports, and prepare data for external analysis.


In [None]:
from semantica.export import ExportManager, ReportGenerator

# Export to multiple formats
export_manager = ExportManager()

# Export to JSON
export_manager.export_knowledge_graph(kg, "data/portfolio_risk.json", format="json")
print("Exported portfolio KG to JSON")

# Export to CSV
export_manager.export_knowledge_graph(kg, "data/portfolio_risk.csv", format="csv")
print("Exported portfolio KG to CSV")

# Export to GraphML for external analysis
export_manager.export_knowledge_graph(kg, "data/portfolio_risk.graphml", format="graphml")
print("Exported portfolio KG to GraphML")

# Export to RDF
export_manager.export_knowledge_graph(kg, "data/portfolio_risk.ttl", format="rdf", rdf_format="turtle")
print("Exported portfolio KG to RDF (Turtle)")

# Generate risk report
report_generator = ReportGenerator()
risk_report = report_generator.generate_report(
    kg,
    report_type="risk_assessment",
    output_path="data/risk_report.html",
    format="html"
)
print("Generated HTML risk assessment report")

# Market simulation data preparation
simulation_data = {
    "entities": len(kg.get("entities", [])),
    "relationships": len(kg.get("relationships", [])),
    "risk_patterns": len(risk_patterns),
    "conflicts": len(conflicts),
    "communities": len(communities) if 'communities' in locals() else 0
}

print(f"\nMarket Simulation Data Summary:")
for key, value in simulation_data.items():
    print(f"  {key}: {value}")

print("Phase 6 complete: Market simulation data exported and reports generated")


---

## Phase 7: Visualization & Risk Reporting

Visualize the knowledge graph and export to multiple formats.


In [None]:
from semantica.visualization import KGVisualizer

# Visualize knowledge graph
visualizer = KGVisualizer()
visualizer.visualize(kg, output_path="portfolio_risk_kg.html")

print("Risk assessment analysis complete")
print("Emphasizes: Graph analytics, portfolio risk modeling, market simulation, dependency analysis")
print("\nGenerated outputs:")
print("  - portfolio_risk_kg.html (visualization)")
print("  - data/portfolio_risk.json (JSON export)")
print("  - data/portfolio_risk.csv (CSV export)")
print("  - data/portfolio_risk.graphml (GraphML export)")
print("  - data/portfolio_risk.ttl (RDF export)")
print("  - data/risk_report.html (risk report)")
