# Market Intelligence Pipeline

## Overview

This notebook demonstrates a complete market intelligence pipeline: ingest market data from multiple sources (web APIs, financial feeds, databases), extract market entities, build temporal knowledge graph, analyze trends, and generate market insights.

### Modules Used (20+)

- **Ingestion**: WebIngestor, FeedIngestor, DBIngestor, FileIngestor, MCPIngestor
- **Parsing**: JSONParser, CSVParser, StructuredDataParser, HTMLParser, MCPParser
- **Extraction**: NERExtractor, RelationExtractor, EventDetector, SemanticAnalyzer
- **KG**: GraphBuilder, TemporalGraphQuery, TemporalPatternDetector, GraphAnalyzer
- **Analytics**: CentralityCalculator, CommunityDetector, ConnectivityAnalyzer
- **Reasoning**: InferenceEngine, RuleManager, ExplanationGenerator
- **Export**: JSONExporter, CSVExporter, RDFExporter, ReportGenerator
- **Visualization**: KGVisualizer, TemporalVisualizer, AnalyticsVisualizer

### Pipeline

**Multiple Market Sources (Web, Feeds, DB, Files, MCP) → Parse Data → Extract Market Entities → Build Temporal KG → Analyze Trends → Generate Insights → Export → Visualize**

---

## Step 1: Ingest Market Data from Multiple Sources

Ingest market data from web APIs, financial feeds, databases, and files.
# market_entities = extractor.extract(market_data)
'''

## Step 3: Build Temporal Market Knowledge Graph

'''
# from semantica.kg import GraphBuilder
# 
# builder = GraphBuilder()
# market_kg = builder.build(market_entities, relationships, temporal=True)
'''

## Step 4: Analyze Trends

'''
# from semantica.kg import TemporalQuery
# 
# temporal_query = TemporalQuery()
# 
# # Analyze market trends over time
# trends = temporal_query.analyze_trends(market_kg, time_window="1M")
# 
# # Market research
# print(f"Analyzed trends for {len(market_kg.nodes)} market entities")
'''


In [None]:
from semantica.ingest import WebIngestor, FeedIngestor, DBIngestor, FileIngestor, MCPIngestor, ingest_mcp
from semantica.parse import JSONParser, CSVParser, StructuredDataParser, HTMLParser, MCPParser
from semantica.semantic_extract import NERExtractor, RelationExtractor, EventDetector, SemanticAnalyzer
from semantica.kg import GraphBuilder, TemporalGraphQuery, TemporalPatternDetector, GraphAnalyzer
from semantica.kg import CentralityCalculator, CommunityDetector, ConnectivityAnalyzer
from semantica.reasoning import InferenceEngine, RuleManager, ExplanationGenerator
from semantica.export import JSONExporter, CSVExporter, RDFExporter, ReportGenerator
from semantica.visualization import KGVisualizer, TemporalVisualizer, AnalyticsVisualizer
import tempfile
import os
import json
from datetime import datetime, timedelta

web_ingestor = WebIngestor()
feed_ingestor = FeedIngestor()
db_ingestor = DBIngestor()
file_ingestor = FileIngestor()
mcp_ingestor = MCPIngestor()

json_parser = JSONParser()
csv_parser = CSVParser()
structured_parser = StructuredDataParser()
html_parser = HTMLParser()
mcp_parser = MCPParser()

temp_dir = tempfile.mkdtemp()

# Real-world market data formats
market_data_json = os.path.join(temp_dir, "market_data.json")
market_data = [
    {
        "symbol": "AAPL",
        "company": "Apple Inc.",
        "price": 175.50,
        "change": 2.30,
        "change_percent": 1.33,
        "volume": 45000000,
        "timestamp": (datetime.now() - timedelta(hours=1)).isoformat(),
        "sector": "Technology"
    },
    {
        "symbol": "MSFT",
        "company": "Microsoft Corporation",
        "price": 380.25,
        "change": -1.50,
        "change_percent": -0.39,
        "volume": 28000000,
        "timestamp": (datetime.now() - timedelta(hours=1)).isoformat(),
        "sector": "Technology"
    },
    {
        "symbol": "GOOGL",
        "company": "Alphabet Inc.",
        "price": 142.80,
        "change": 3.20,
        "change_percent": 2.29,
        "volume": 32000000,
        "timestamp": (datetime.now() - timedelta(minutes=30)).isoformat(),
        "sector": "Technology"
    }
]

with open(market_data_json, 'w') as f:
    json.dump(market_data, f, indent=2)

# CSV format market data (common in financial data exports)
market_data_csv = os.path.join(temp_dir, "market_data.csv")
csv_content = """symbol,company,price,change,volume,timestamp,sector
TSLA,Tesla Inc.,245.60,5.40,55000000,2024-01-15T10:00:00,Automotive
AMZN,Amazon.com Inc.,155.30,1.20,42000000,2024-01-15T10:00:00,Retail
NVDA,NVIDIA Corporation,520.75,12.50,68000000,2024-01-15T10:00:00,Technology"""

with open(market_data_csv, 'w') as f:
    f.write(csv_content)

# Ingest from files
file_objects_json = file_ingestor.ingest_file(market_data_json, read_content=True)
file_objects_csv = file_ingestor.ingest_file(market_data_csv, read_content=True)

# Parse structured data
parsed_json = json_parser.parse(market_data_json)
parsed_csv = csv_parser.parse(market_data_csv)

# Real financial news feed URLs
financial_feeds = [
    "https://feeds.reuters.com/reuters/businessNews",  # Reuters Business
    "https://feeds.reuters.com/reuters/topNews",  # Reuters Top News
    "https://rss.cnn.com/rss/money_latest.rss",  # CNN Money
    "https://feeds.bloomberg.com/markets/news.rss",  # Bloomberg Markets
    "https://www.ft.com/?format=rss"  # Financial Times
]

financial_feed_list = []
for feed_url in financial_feeds:
    try:
        financial_feed = feed_ingestor.ingest_feed(feed_url)
        if financial_feed:
            financial_feed_list.append(financial_feed)
            print(f"✓ Ingested financial feed: {financial_feed.title if hasattr(financial_feed, 'title') else feed_url}")
            print(f"  Items: {len(financial_feed.items) if hasattr(financial_feed, 'items') else 0}")
    except Exception as e:
        print(f"⚠ Feed ingestion for {feed_url}: {str(e)[:100]}")

# Real financial API endpoints (examples - require API keys)
financial_apis = [
    "https://api.polygon.io/v2/aggs/ticker/AAPL/range/1/day/2024-01-01/2024-01-31",  # Polygon.io (requires API key)
    "https://www.alphavantage.co/query?function=TIME_SERIES_INTRADAY&symbol=AAPL&interval=5min&apikey=demo",  # Alpha Vantage
    "https://api.github.com/repos/ranaroussi/yfinance"  # Yahoo Finance API wrapper
]

web_content_list = []
for api_url in financial_apis[:1]:  # Process first API
    try:
        web_content = web_ingestor.ingest_url(api_url)
        if web_content:
            web_content_list.append(web_content)
            print(f"✓ Ingested API content: {web_content.url if hasattr(web_content, 'url') else api_url}")
    except Exception as e:
        print(f"⚠ API ingestion for {api_url}: {str(e)[:100]}")

# Real database connection for market data
db_connection_string = "postgresql://user:password@localhost:5432/market_data_db"
db_query = "SELECT symbol, price, volume, timestamp FROM market_data WHERE timestamp > NOW() - INTERVAL '1 day' ORDER BY timestamp DESC"

# Optional: Ingest from MCP server
# Users can bring their own financial data MCP server via URL
mcp_market_data = []
try:
    # Connect to financial data MCP server via URL
    # Example: http://localhost:8000/mcp or https://api.example.com/financial-mcp
    financial_mcp_url = "http://localhost:8000/mcp"  # Replace with your MCP server URL
    
    mcp_ingestor.connect(
        "market_mcp_server",
        url=financial_mcp_url,
        headers={"Authorization": "Bearer your_token"} if "api.example.com" in financial_mcp_url else {}
    )
    
    # Ingest market data from MCP server resources or tools
    mcp_data = mcp_ingestor.ingest_resources(
        "market_mcp_server",
        resource_uris=["resource://market_data/daily"]
    )
    mcp_market_data.extend(mcp_data)
    
    # Or use tool-based ingestion
    tool_data = mcp_ingestor.ingest_tool_output(
        "market_mcp_server",
        tool_name="get_stock_prices",
        arguments={"symbols": ["AAPL", "MSFT", "GOOGL"], "date": datetime.now().isoformat()}
    )
    if tool_data:
        mcp_market_data.append(tool_data)
    
    # Parse MCP responses
    for mcp_item in mcp_market_data:
        parsed_mcp = mcp_parser.parse_response(mcp_item, response_type="json")
        if isinstance(parsed_mcp, dict) and "stock_prices" in parsed_mcp:
            # Merge MCP data with existing market data
            market_data.extend(parsed_mcp.get("stock_prices", []))
    
    print(f"✓ Ingested {len(mcp_market_data)} items from MCP server")
    mcp_ingestor.disconnect("market_mcp_server")
except Exception as e:
    print(f"⚠ MCP ingestion skipped: {e}")
    print("  Note: MCP ingestion is optional. You can bring your own MCP server via URL.")

print(f"Ingested {len([file_objects_json]) if file_objects_json else 0} JSON market data files")
print(f"Ingested {len([file_objects_csv]) if file_objects_csv else 0} CSV market data files")
print(f"Parsed {len(parsed_json.data) if parsed_json and parsed_json.data else 0} JSON market entries")
print(f"Parsed {len(parsed_csv.rows) if parsed_csv else 0} CSV market rows")


## Step 2: Extract Market Entities and Relationships

Extract market entities (companies, stocks, sectors) and relationships from market data.


In [None]:
ner_extractor = NERExtractor()
relation_extractor = RelationExtractor()
event_detector = EventDetector()
semantic_analyzer = SemanticAnalyzer()

market_entities = []
market_relationships = []

# Extract from JSON data
if parsed_json and parsed_json.data:
    for entry in parsed_json.data:
        if isinstance(entry, dict):
            market_entities.append({
                "id": entry.get("symbol", ""),
                "type": "Stock",
                "name": entry.get("symbol", ""),
                "properties": {
                    "price": entry.get("price", 0),
                    "change": entry.get("change", 0),
                    "change_percent": entry.get("change_percent", 0),
                    "volume": entry.get("volume", 0),
                    "timestamp": entry.get("timestamp", "")
                }
            })
            market_entities.append({
                "id": entry.get("company", ""),
                "type": "Company",
                "name": entry.get("company", ""),
                "properties": {}
            })
            market_entities.append({
                "id": entry.get("sector", ""),
                "type": "Sector",
                "name": entry.get("sector", ""),
                "properties": {}
            })
            
            market_relationships.append({
                "source": entry.get("symbol", ""),
                "target": entry.get("company", ""),
                "type": "ticker_for",
                "properties": {"timestamp": entry.get("timestamp", "")}
            })
            market_relationships.append({
                "source": entry.get("company", ""),
                "target": entry.get("sector", ""),
                "type": "belongs_to",
                "properties": {}
            })

# Extract from CSV data
if parsed_csv and parsed_csv.rows:
    for row in parsed_csv.rows:
        if isinstance(row, dict):
            market_entities.append({
                "id": row.get("symbol", ""),
                "type": "Stock",
                "name": row.get("symbol", ""),
                "properties": {
                    "price": float(row.get("price", 0)) if row.get("price") else 0,
                    "change": float(row.get("change", 0)) if row.get("change") else 0,
                    "volume": int(row.get("volume", 0)) if row.get("volume") else 0,
                    "timestamp": row.get("timestamp", "")
                }
            })

print(f"Extracted {len(market_entities)} market entities")
print(f"Extracted {len(market_relationships)} market relationships")


## Step 3: Build Temporal Market Knowledge Graph

Build a temporal knowledge graph from market data.


In [None]:
builder = GraphBuilder()
temporal_query = TemporalGraphQuery()
temporal_pattern_detector = TemporalPatternDetector()
graph_analyzer = GraphAnalyzer()

market_kg = builder.build(market_entities, market_relationships)

# Analyze graph structure
metrics = graph_analyzer.compute_metrics(market_kg)
centrality_calculator = CentralityCalculator()
community_detector = CommunityDetector()
connectivity_analyzer = ConnectivityAnalyzer()

centrality_scores = centrality_calculator.calculate_centrality(market_kg, measure="degree")
communities = community_detector.detect_communities(market_kg)
connectivity = connectivity_analyzer.analyze_connectivity(market_kg)

print(f"Built temporal market knowledge graph")
print(f"  Entities: {len(market_kg.get('entities', []))}")
print(f"  Relationships: {len(market_kg.get('relationships', []))}")
print(f"  Graph density: {metrics.get('density', 0):.3f}")
print(f"  Communities: {len(communities)}")
print(f"  Central entities: {len([e for e, score in centrality_scores.items() if score > 0])}")


## Step 4: Analyze Market Trends

Analyze market trends using temporal queries and pattern detection.


In [None]:
start_time = (datetime.now() - timedelta(days=7)).isoformat()
end_time = datetime.now().isoformat()

temporal_results = temporal_query.query_time_range(
    graph=market_kg,
    query="Find market movements in the last 7 days",
    start_time=start_time,
    end_time=end_time
)

temporal_patterns = temporal_pattern_detector.detect_temporal_patterns(
    market_kg,
    pattern_type="trend",
    min_frequency=1
)

inference_engine = InferenceEngine()
rule_manager = RuleManager()
explanation_generator = ExplanationGenerator()

# Market analysis rules
inference_engine.add_rule("IF change_percent > 2 AND volume > 40000000 THEN strong_momentum")
inference_engine.add_rule("IF change_percent < -1 AND volume > 50000000 THEN selling_pressure")

# Add facts from market data
for entry in parsed_json.data if parsed_json and parsed_json.data else []:
    if isinstance(entry, dict):
        inference_engine.add_fact({
            "symbol": entry.get("symbol", ""),
            "change_percent": entry.get("change_percent", 0),
            "volume": entry.get("volume", 0)
        })

market_insights = inference_engine.forward_chain()

print(f"Temporal query returned {len(temporal_results.get('entities', []))} entities")
print(f"Detected {len(temporal_patterns)} temporal patterns")
print(f"Generated {len(market_insights)} market insights")


## Step 5: Generate Market Intelligence Reports

Generate comprehensive market intelligence reports.


In [None]:
json_exporter = JSONExporter()
csv_exporter = CSVExporter()
rdf_exporter = RDFExporter()
report_generator = ReportGenerator()

json_exporter.export_knowledge_graph(market_kg, os.path.join(temp_dir, "market_kg.json"))
csv_exporter.export_entities(market_entities, os.path.join(temp_dir, "market_entities.csv"))
rdf_exporter.export_knowledge_graph(market_kg, os.path.join(temp_dir, "market_kg.rdf"))

report_data = {
    "summary": f"Market intelligence analysis identified {len(market_insights)} insights and {len(temporal_patterns)} trends",
    "stocks_analyzed": len(market_entities),
    "patterns": len(temporal_patterns),
    "insights": len(market_insights),
    "sectors": len(set([e.get("properties", {}).get("sector", "") for e in market_entities if e.get("type") == "Stock"]))
}

report = report_generator.generate_report(report_data, format="markdown")

print("Generated market intelligence report")
print(f"Report length: {len(report)} characters")


## Step 6: Visualize Market Intelligence

Visualize market knowledge graph and trends.


In [None]:
kg_visualizer = KGVisualizer()
temporal_visualizer = TemporalVisualizer()
analytics_visualizer = AnalyticsVisualizer()

kg_viz = kg_visualizer.visualize_network(market_kg, output="interactive")
temporal_viz = temporal_visualizer.visualize_timeline(market_kg, output="interactive")
analytics_viz = analytics_visualizer.visualize_analytics(market_kg, output="interactive")

print("Generated visualizations for market knowledge graph, temporal trends, and analytics")
print(f"Total modules used: 20+")
print(f"Pipeline complete: Multiple Market Sources → Parse → Extract → Temporal KG → Analyze Trends → Generate Insights → Export → Visualize")
