[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Hawksight-AI/semantica/blob/main/cookbook/use_cases/blockchain/01_DeFi_Protocol_Intelligence.ipynb)

# DeFi Protocol Intelligence Pipeline

## Overview

This notebook demonstrates a complete DeFi protocol intelligence pipeline: ingest DeFi data from multiple sources (APIs, feeds, databases), extract protocol entities, build DeFi knowledge graph, analyze relationships, assess risks, optimize yields, and generate reports.


**Documentation**: [API Reference](https://semantica.readthedocs.io/use-cases/)

## Installation

Install Semantica from PyPI:

```bash
pip install semantica
# Or with all optional dependencies:
pip install semantica[all]
```

### Modules Used (20+)

- **Ingestion**: FileIngestor, WebIngestor, FeedIngestor, StreamIngestor, DBIngestor, EmailIngestor, RepoIngestor, MCPIngestor
- **Parsing**: JSONParser, HTMLParser, StructuredDataParser
- **Extraction**: NERExtractor, RelationExtractor, SemanticAnalyzer, EventDetector
- **KG**: GraphBuilder, GraphAnalyzer, CentralityCalculator, CommunityDetector
- **Analytics**: ConnectivityAnalyzer, TemporalGraphQuery, TemporalPatternDetector
- **Reasoning**: InferenceEngine, RuleManager, ExplanationGenerator
- **Ontology**: OntologyGenerator, ClassInferrer, PropertyGenerator, OntologyValidator
- **Export**: JSONExporter, RDFExporter, OWLExporter, ReportGenerator
- **Visualization**: KGVisualizer, OntologyVisualizer, AnalyticsVisualizer

### Pipeline

**DeFi Data Sources â†’ Parse â†’ Extract Entities (protocols, pools, tokens, strategies) â†’ Build DeFi KG â†’ Analyze Relationships â†’ Risk Assessment â†’ Yield Optimization â†’ Generate Reports â†’ Visualize**

---

## Step 1: Ingest DeFi Data from Multiple Sources

Ingest DeFi protocol data from APIs, feeds, and databases.


In [None]:
from semantica.ingest import WebIngestor, FeedIngestor, DBIngestor, FileIngestor
from semantica.parse import JSONParser, HTMLParser, StructuredDataParser
from semantica.semantic_extract import NERExtractor, RelationExtractor, SemanticAnalyzer, EventDetector
from semantica.kg import GraphBuilder, GraphAnalyzer, CentralityCalculator, CommunityDetector
from semantica.kg import ConnectivityAnalyzer, TemporalGraphQuery, TemporalPatternDetector
from semantica.reasoning import InferenceEngine, RuleManager, ExplanationGenerator
from semantica.ontology import OntologyGenerator, ClassInferrer, PropertyGenerator, OntologyValidator
from semantica.export import JSONExporter, RDFExporter, OWLExporter, ReportGenerator
from semantica.visualization import KGVisualizer, OntologyVisualizer, AnalyticsVisualizer
import tempfile
import os
import json
from datetime import datetime, timedelta

web_ingestor = WebIngestor()
feed_ingestor = FeedIngestor()
db_ingestor = DBIngestor()
file_ingestor = FileIngestor()

json_parser = JSONParser()
html_parser = HTMLParser()
structured_parser = StructuredDataParser()

# Real DeFi APIs
defi_apis = [
    "https://api.llama.fi/protocols",  # DeFiLlama API
    "https://api.thegraph.com/subgraphs/name/uniswap/uniswap-v2",  # The Graph - Uniswap
    "https://api.github.com/repos/Uniswap/interface"  # Uniswap GitHub
]

# Real DeFi protocol feeds
defi_feeds = [
    "https://defipulse.com/blog/feed",  # DeFi Pulse
    "https://feeds.feedburner.com/TheDefiant"  # The Defiant
]

# Real database connection for protocol metrics
db_connection_string = "postgresql://user:password@localhost:5432/defi_db"
db_query = "SELECT protocol_name, tvl, apy, token_address, pool_address, timestamp FROM defi_protocols WHERE timestamp > NOW() - INTERVAL '7 days' ORDER BY tvl DESC LIMIT 1000"

temp_dir = tempfile.mkdtemp()

# Sample DeFi protocol data for local ingestion
defi_data_file = os.path.join(temp_dir, "defi_protocols.json")
defi_data = [
    {
        "protocol_name": "Uniswap V3",
        "protocol_type": "DEX",
        "tvl": 2500000000,
        "apy": 12.5,
        "token_address": "0x1f9840a85d5aF5bf1D1762F925BDADdC4201F984",
        "pool_address": "0x8ad599c3A0ff1De082011EFDDc58f1908eb6e6D8",
        "token_symbol": "UNI",
        "chain": "Ethereum",
        "timestamp": (datetime.now() - timedelta(hours=1)).isoformat()
    },
    {
        "protocol_name": "Aave V3",
        "protocol_type": "Lending",
        "tvl": 1800000000,
        "apy": 8.3,
        "token_address": "0x7Fc66500c84A76Ad7e9c93437bFc5Ac33E2DDaE9",
        "pool_address": "0x87870Bca3F3fD6335C3F4ce8392D69350B4fA4E2",
        "token_symbol": "AAVE",
        "chain": "Ethereum",
        "timestamp": (datetime.now() - timedelta(hours=2)).isoformat()
    },
    {
        "protocol_name": "Compound V3",
        "protocol_type": "Lending",
        "tvl": 1200000000,
        "apy": 7.8,
        "token_address": "0xc00e94Cb662C3520282E6f5717214004A7f26888",
        "pool_address": "0xc3d688B667034EAD2F183C05b6e4B5e5B5b5b5b5",
        "token_symbol": "COMP",
        "chain": "Ethereum",
        "timestamp": (datetime.now() - timedelta(hours=3)).isoformat()
    },
    {
        "protocol_name": "Curve Finance",
        "protocol_type": "DEX",
        "tvl": 1500000000,
        "apy": 15.2,
        "token_address": "0xD533a949740bb3306d119CC777fa900bA034cd52",
        "pool_address": "0xbEbc44782C7dB0a1A60Cb6fe97d0b483032FF1C7",
        "token_symbol": "CRV",
        "chain": "Ethereum",
        "timestamp": (datetime.now() - timedelta(hours=4)).isoformat()
    },
    {
        "protocol_name": "MakerDAO",
        "protocol_type": "Lending",
        "tvl": 8000000000,
        "apy": 3.5,
        "token_address": "0x9f8F72aA9304c8B593d555F12eF6589cC3A579A2",
        "pool_address": "0x35D1b3F3D7966A1DFe207aa4514C12a2594E9c99",
        "token_symbol": "MKR",
        "chain": "Ethereum",
        "timestamp": (datetime.now() - timedelta(hours=5)).isoformat()
    }
]

with open(defi_data_file, 'w') as f:
    json.dump(defi_data, f, indent=2)

# Ingest from local file
file_data = file_ingestor.ingest_file(defi_data_file)
parsed_defi = structured_parser.parse_json(json.dumps(defi_data))

# Ingest from DeFi APIs (example with public API)
web_content = web_ingestor.ingest_url(defi_apis[2])  # GitHub API
if web_content:
    print(f"  Ingested DeFi API: {defi_apis[2]}")

# Ingest from DeFi feeds
feed_data_list = []
for feed_url in defi_feeds:
    feed_data = feed_ingestor.ingest_feed(feed_url)
    if feed_data:
        feed_data_list.append(feed_data)
        print(f"  Ingested feed: {feed_url}")

# Database ingestion pattern
db_data = db_ingestor.export_table(
    connection_string=db_connection_string,
    table_name="defi_protocols",
    limit=1000
)
print(f"  Query pattern: {db_query}")

print(f"\nðŸ“Š Ingestion Summary:")
print(f"  Local protocols: {len(defi_data)}")
print(f"  Database records: {len(db_data.get('data', [])) if db_data else 0}")
print(f"  Feeds ingested: {len(feed_data_list)}")
print(f"  Web APIs: {len(defi_apis)}")


## Step 2: Extract DeFi Entities

Extract protocols, pools, tokens, and strategies from the ingested data.


In [None]:
ner_extractor = NERExtractor()
relation_extractor = RelationExtractor()
semantic_analyzer = SemanticAnalyzer()
event_detector = EventDetector()

all_defi_texts = []
all_protocols = []

# Process parsed DeFi data
if parsed_defi and isinstance(parsed_defi, dict):
    protocols = parsed_defi.get("data", defi_data)
    for protocol in protocols:
        all_protocols.append(protocol)
        protocol_text = f"Protocol {protocol.get('protocol_name', '')} type {protocol.get('protocol_type', '')} TVL {protocol.get('tvl', 0)} APY {protocol.get('apy', 0)} token {protocol.get('token_symbol', '')}"
        all_defi_texts.append(protocol_text)

# Extract entities
all_entities = []
all_relationships = []
all_events = []

for text in all_defi_texts:
    entities = ner_extractor.extract(text)
    all_entities.extend(entities)
    
    relationships = relation_extractor.extract(text, entities)
    all_relationships.extend(relationships)
    
    events = event_detector.detect_events(text)
    all_events.extend(events)

# Build structured entity list
protocol_entities = []
pool_entities = []
token_entities = []

for protocol in all_protocols:
    protocol_entity = {
        "id": protocol.get("protocol_name", "").replace(" ", "_"),
        "type": "Protocol",
        "properties": {
            "name": protocol.get("protocol_name", ""),
            "type": protocol.get("protocol_type", ""),
            "tvl": protocol.get("tvl", 0),
            "apy": protocol.get("apy", 0),
            "chain": protocol.get("chain", ""),
            "timestamp": protocol.get("timestamp", "")
        }
    }
    protocol_entities.append(protocol_entity)
    
    # Add pool entity
    pool_entity = {
        "id": protocol.get("pool_address", ""),
        "type": "Pool",
        "properties": {
            "address": protocol.get("pool_address", ""),
            "protocol": protocol.get("protocol_name", ""),
            "tvl": protocol.get("tvl", 0),
            "apy": protocol.get("apy", 0)
        }
    }
    pool_entities.append(pool_entity)
    
    # Add token entity
    token_entity = {
        "id": protocol.get("token_address", ""),
        "type": "Token",
        "properties": {
            "address": protocol.get("token_address", ""),
            "symbol": protocol.get("token_symbol", ""),
            "protocol": protocol.get("protocol_name", "")
        }
    }
    token_entities.append(token_entity)

print(f"Extracted {len(protocol_entities)} protocols")
print(f"Extracted {len(pool_entities)} pools")
print(f"Extracted {len(token_entities)} tokens")
print(f"Extracted {len(all_relationships)} relationships")
print(f"Detected {len(all_events)} events")


## Step 3: Build DeFi Knowledge Graph

Build a knowledge graph from extracted DeFi entities and relationships.


In [None]:
builder = GraphBuilder()

# Add all entities
for protocol in protocol_entities:
    builder.add_entity(
        entity_id=protocol["id"],
        entity_type=protocol["type"],
        properties=protocol.get("properties", {})
    )

for pool in pool_entities:
    builder.add_entity(
        entity_id=pool["id"],
        entity_type=pool["type"],
        properties=pool.get("properties", {})
    )

for token in token_entities:
    builder.add_entity(
        entity_id=token["id"],
        entity_type=token["type"],
        properties=token.get("properties", {})
    )

# Add relationships
relationships = []
for i, protocol in enumerate(protocol_entities):
    protocol_id = protocol["id"]
    pool_id = pool_entities[i]["id"]
    token_id = token_entities[i]["id"]
    
    # Protocol-Pool relationship
    builder.add_relationship(
        source_id=protocol_id,
        target_id=pool_id,
        relationship_type="has_pool",
        properties={}
    )
    
    # Protocol-Token relationship
    builder.add_relationship(
        source_id=protocol_id,
        target_id=token_id,
        relationship_type="has_token",
        properties={}
    )
    
    # Pool-Token relationship
    builder.add_relationship(
        source_id=pool_id,
        target_id=token_id,
        relationship_type="contains",
        properties={}
    )
    
    relationships.append({
        "source": protocol_id,
        "target": pool_id,
        "type": "has_pool"
    })
    relationships.append({
        "source": protocol_id,
        "target": token_id,
        "type": "has_token"
    })

knowledge_graph = builder.build()

print(f"Built knowledge graph with {len(knowledge_graph.nodes)} nodes")
print(f"Built knowledge graph with {len(knowledge_graph.edges)} edges")
print(f"Added {len(relationships)} DeFi relationships")


## Step 4: Analyze DeFi Relationships and Assess Risks

Analyze protocol relationships, detect communities, and assess risks.


In [None]:
graph_analyzer = GraphAnalyzer()
centrality_calculator = CentralityCalculator()
community_detector = CommunityDetector()
connectivity_analyzer = ConnectivityAnalyzer()
temporal_query = TemporalGraphQuery(knowledge_graph)
pattern_detector = TemporalPatternDetector()

# Compute graph metrics
graph_metrics = graph_analyzer.compute_metrics(knowledge_graph)

# Calculate centrality
centrality_result = centrality_calculator.calculate_betweenness_centrality(knowledge_graph)
centrality_scores = centrality_result.get('centrality', {})
top_central_protocols = sorted(centrality_scores.items(), key=lambda x: x[1], reverse=True)[:10]

# Detect communities
communities_result = community_detector.detect_communities(knowledge_graph, algorithm="louvain")
communities = communities_result.get('communities', [])
community_count = len(communities) if communities else 0

# Analyze connectivity
connectivity_results = connectivity_analyzer.analyze_connectivity(knowledge_graph)

# Detect temporal patterns
temporal_patterns = pattern_detector.detect_temporal_patterns(
    knowledge_graph,
    relationship_types=["has_pool", "has_token"],
    time_window_hours=24
)

# Risk Assessment using Inference Engine
inference_engine = InferenceEngine()
rule_manager = RuleManager()

# Define risk rules
risk_rules = [
    {
        "name": "high_tvl_risk",
        "condition": "tvl > 5000000000 AND apy < 5",
        "action": "flag_as_low_yield_high_tvl"
    },
    {
        "name": "high_apy_risk",
        "condition": "apy > 20",
        "action": "flag_as_high_risk_high_yield"
    },
    {
        "name": "optimal_protocol",
        "condition": "tvl > 1000000000 AND apy BETWEEN 8 AND 15",
        "action": "flag_as_optimal"
    }
]

for rule in risk_rules:
    rule_manager.add_rule(rule["name"], rule["condition"], rule["action"])

# Assess protocol risks
protocol_risks = []
for protocol in protocol_entities:
    tvl = protocol["properties"].get("tvl", 0)
    apy = protocol["properties"].get("apy", 0)
    
    risk_score = 0
    risk_factors = []
    
    if tvl > 5000000000 and apy < 5:
        risk_score += 2
        risk_factors.append("low_yield_high_tvl")
    
    if apy > 20:
        risk_score += 3
        risk_factors.append("high_apy_risk")
    
    if tvl < 500000000:
        risk_score += 1
        risk_factors.append("low_tvl")
    
    if 1000000000 <= tvl <= 5000000000 and 8 <= apy <= 15:
        risk_score = max(0, risk_score - 1)
        risk_factors.append("optimal_range")
    
    protocol_risks.append({
        "protocol": protocol["id"],
        "name": protocol["properties"].get("name", ""),
        "risk_score": min(risk_score, 10),
        "risk_factors": risk_factors,
        "tvl": tvl,
        "apy": apy
    })

print(f"Analyzed {len(protocol_entities)} protocols")
print(f"Found {community_count} protocol communities")
print(f"Detected {len(temporal_patterns)} temporal patterns")
print(f"\nTop 5 Central Protocols:")
for i, (protocol_id, centrality) in enumerate(top_central_protocols[:5], 1):
    protocol_name = next((p["properties"].get("name", protocol_id) for p in protocol_entities if p["id"] == protocol_id), protocol_id)
    print(f"  {i}. {protocol_name} (centrality: {centrality:.3f})")
print(f"\nProtocol Risk Assessment:")
for risk in sorted(protocol_risks, key=lambda x: x["risk_score"], reverse=True)[:5]:
    print(f"  - {risk['name']}: Risk Score {risk['risk_score']}/10, Factors: {', '.join(risk['risk_factors'])}")


## Step 5: Generate DeFi Ontology and Optimize Yields

Generate DeFi protocol ontology and optimize yield strategies.


In [None]:
ontology_generator = OntologyGenerator()
class_inferrer = ClassInferrer()
property_generator = PropertyGenerator()
ontology_validator = OntologyValidator()

# Generate DeFi ontology
defi_ontology = ontology_generator.generate_ontology(
    {"entities": protocol_entities + pool_entities + token_entities, 
     "relationships": relationships},
    name="DeFiOntology",
    entities=protocol_entities + pool_entities + token_entities,
    relationships=relationships
)

# Infer classes/properties (for inspection)
classes = class_inferrer.infer_classes(protocol_entities + pool_entities + token_entities)
properties = property_generator.infer_properties(protocol_entities + pool_entities + token_entities, relationships, classes)

# Ensure ontology dict has classes/properties
if not defi_ontology.get("classes"):
    defi_ontology["classes"] = classes
if not defi_ontology.get("properties"):
    defi_ontology["properties"] = properties

# Validate ontology
validation_result = ontology_validator.validate_ontology(defi_ontology)

# Yield optimization
yield_optimization = []
for protocol in protocol_entities:
    tvl = protocol["properties"].get("tvl", 0)
    apy = protocol["properties"].get("apy", 0)
    protocol_type = protocol["properties"].get("type", "")
    
    # Calculate yield score
    yield_score = (apy * 0.6) + (min(tvl / 1000000000, 10) * 0.4)
    
    optimization_suggestions = []
    if apy < 8 and tvl > 1000000000:
        optimization_suggestions.append("Consider higher APY protocols for better yield")
    if tvl < 500000000:
        optimization_suggestions.append("Low TVL may indicate higher risk")
    if apy > 15:
        optimization_suggestions.append("High APY may indicate higher risk, diversify")
    
    yield_optimization.append({
        "protocol": protocol["properties"].get("name", ""),
        "yield_score": yield_score,
        "apy": apy,
        "tvl": tvl,
        "suggestions": optimization_suggestions
    })

print(f"Generated DeFi ontology with {len(defi_ontology.get('classes', []))} classes")
print(f"Ontology validation: {'Valid' if validation_result.valid else 'Invalid'}")
print(f"  Errors: {len(validation_result.errors)}")
print(f"  Warnings: {len(validation_result.warnings)}")
print(f"\nYield Optimization Recommendations:")
for opt in sorted(yield_optimization, key=lambda x: x["yield_score"], reverse=True)[:5]:
    print(f"  - {opt['protocol']}: Yield Score {opt['yield_score']:.2f}, APY {opt['apy']:.1f}%")
    for suggestion in opt['suggestions']:
        print(f"    â†’ {suggestion}")


## Step 6: Generate Reports and Visualize

Generate comprehensive DeFi intelligence reports and visualizations.


In [None]:
json_exporter = JSONExporter()
rdf_exporter = RDFExporter()
owl_exporter = OWLExporter()
report_generator = ReportGenerator()

# Export knowledge graph
kg_json = json_exporter.export(knowledge_graph, output_path=os.path.join(temp_dir, "defi_kg.json"))
kg_rdf = rdf_exporter.export(knowledge_graph, output_path=os.path.join(temp_dir, "defi_kg.rdf"))

# Export ontology
ontology_owl = owl_exporter.export(defi_ontology, output_path=os.path.join(temp_dir, "defi_ontology.owl"))

# Generate report
report_content = f"""
# DeFi Protocol Intelligence Report

## Executive Summary
- Total Protocols Analyzed: {len(protocol_entities)}
- Total Pools: {len(pool_entities)}
- Total Tokens: {len(token_entities)}
- Protocol Communities: {community_count}
- High-Risk Protocols: {len([r for r in protocol_risks if r['risk_score'] >= 7])}

## Top Protocols by Centrality
"""
for i, (protocol_id, centrality) in enumerate(top_central_protocols[:10], 1):
    protocol_name = next((p["properties"].get("name", protocol_id) for p in protocol_entities if p["id"] == protocol_id), protocol_id)
    report_content += f"\n{i}. {protocol_name} (Centrality: {centrality:.3f})"

report_content += f"""
## Risk Assessment
"""
for risk in sorted(protocol_risks, key=lambda x: x["risk_score"], reverse=True):
    report_content += f"""
### {risk['name']}
- Risk Score: {risk['risk_score']}/10
- TVL: ${risk['tvl']:,.0f}
- APY: {risk['apy']:.1f}%
- Risk Factors: {', '.join(risk['risk_factors'])}
"""

report_content += f"""
## Yield Optimization
"""
for opt in sorted(yield_optimization, key=lambda x: x["yield_score"], reverse=True)[:10]:
    report_content += f"""
### {opt['protocol']}
- Yield Score: {opt['yield_score']:.2f}
- APY: {opt['apy']:.1f}%
- TVL: ${opt['tvl']:,.0f}
- Suggestions:
"""
    for suggestion in opt['suggestions']:
        report_content += f"  - {suggestion}\n"

report_path = os.path.join(temp_dir, "defi_intelligence_report.md")
with open(report_path, 'w') as f:
    f.write(report_content)

print(f"Exported knowledge graph to JSON and RDF")
print(f"Exported ontology to OWL")
print(f"Generated intelligence report: {report_path}")


## Step 7: Visualize DeFi Network

Visualize the DeFi protocol network, ontology, and analytics.


In [None]:
kg_visualizer = KGVisualizer()
ontology_visualizer = OntologyVisualizer()
analytics_visualizer = AnalyticsVisualizer()

# Visualize knowledge graph
kg_viz = kg_visualizer.visualize(
    knowledge_graph,
    layout="force_directed",
    highlight_nodes=[p["id"] for p in protocol_entities],
    node_size_by="tvl"
)

# Visualize ontology
ontology_viz = ontology_visualizer.visualize(
    defi_ontology,
    layout="hierarchical"
)

# Visualize analytics
analytics_viz = analytics_visualizer.visualize(
    knowledge_graph,
    metrics={
        "centrality": dict(top_central_protocols[:10]),
        "communities": communities,
        "connectivity": connectivity_results,
        "risk_scores": {r["protocol"]: r["risk_score"] for r in protocol_risks}
    }
)
