# Market Data Analysis Pipeline

## Overview

This notebook demonstrates a complete market data analysis pipeline: stream market data from multiple sources (trading APIs, financial feeds, databases), build temporal market knowledge graph, analyze patterns, and predict trends.

### Modules Used (20+)

- **Ingestion**: StreamIngestor, FileIngestor, WebIngestor, DBIngestor, FeedIngestor
- **Parsing**: JSONParser, CSVParser, StructuredDataParser
- **Extraction**: NERExtractor, RelationExtractor, EventDetector, SemanticAnalyzer
- **KG**: GraphBuilder, TemporalGraphQuery, TemporalPatternDetector, GraphAnalyzer
- **Analytics**: CentralityCalculator, CommunityDetector, ConnectivityAnalyzer
- **Reasoning**: InferenceEngine, RuleManager, ExplanationGenerator
- **Quality**: KGQualityAssessor
- **Export**: JSONExporter, CSVExporter, RDFExporter, ReportGenerator
- **Visualization**: KGVisualizer, TemporalVisualizer, AnalyticsVisualizer

### Pipeline

**Stream Market Data â†’ Parse â†’ Extract Entities â†’ Build Temporal Market KG â†’ Analyze Patterns â†’ Predict Trends â†’ Generate Reports â†’ Visualize**

---

## Step 1: Stream Market Data from Multiple Sources

Stream market data from trading APIs, financial feeds, and databases.


In [None]:
from semantica.ingest import StreamIngestor, FileIngestor, WebIngestor, DBIngestor, FeedIngestor
from semantica.parse import JSONParser, CSVParser, StructuredDataParser
from semantica.semantic_extract import NERExtractor, RelationExtractor, EventDetector, SemanticAnalyzer
from semantica.kg import GraphBuilder, TemporalGraphQuery, TemporalPatternDetector, GraphAnalyzer
from semantica.kg import CentralityCalculator, CommunityDetector, ConnectivityAnalyzer
from semantica.reasoning import InferenceEngine, RuleManager, ExplanationGenerator
from semantica.kg_qa import KGQualityAssessor
from semantica.export import JSONExporter, CSVExporter, RDFExporter, ReportGenerator
from semantica.visualization import KGVisualizer, TemporalVisualizer, AnalyticsVisualizer
import tempfile
import os
import json
from datetime import datetime, timedelta

stream_ingestor = StreamIngestor()
file_ingestor = FileIngestor()
web_ingestor = WebIngestor()
db_ingestor = DBIngestor()
feed_ingestor = FeedIngestor()

json_parser = JSONParser()
csv_parser = CSVParser()
structured_parser = StructuredDataParser()

# Real streaming sources for market data
stream_sources = [
    {
        "type": "kafka",
        "topic": "market_data",
        "bootstrap_servers": ["localhost:9092"],
        "consumer_config": {"group_id": "market_analysis"}
    },
    {
        "type": "rabbitmq",
        "queue": "trading_events",
        "connection_url": "amqp://user:password@localhost:5672/"
    }
]

# Real market data APIs
market_apis = [
    "https://api.polygon.io/v2/aggs/ticker/AAPL/range/1/minute/2024-01-15/2024-01-15",  # Polygon.io
    "https://www.alphavantage.co/query?function=TIME_SERIES_INTRADAY&symbol=AAPL&interval=1min&apikey=demo",  # Alpha Vantage
    "https://api.github.com/repos/ranaroussi/yfinance"  # Yahoo Finance API
]

financial_feeds = [
    "https://feeds.reuters.com/reuters/businessNews",
    "https://rss.cnn.com/rss/money_latest.rss",
    "https://feeds.bloomberg.com/markets/news.rss"
]

# Real database connection for market data
db_connection_string = "postgresql://user:password@localhost:5432/market_data_db"
db_query = "SELECT symbol, price, volume, timestamp FROM market_data WHERE timestamp > NOW() - INTERVAL '1 hour' ORDER BY timestamp DESC LIMIT 1000"

temp_dir = tempfile.mkdtemp()

# Sample streaming market data
market_data_file = os.path.join(temp_dir, "market_data_stream.json")
market_stream = [
    {
        "symbol": "AAPL",
        "price": 175.50,
        "volume": 1000000,
        "timestamp": (datetime.now() - timedelta(minutes=5)).isoformat()
    },
    {
        "symbol": "MSFT",
        "price": 380.25,
        "volume": 800000,
        "timestamp": (datetime.now() - timedelta(minutes=4)).isoformat()
    },
    {
        "symbol": "GOOGL",
        "price": 142.80,
        "volume": 1200000,
        "timestamp": (datetime.now() - timedelta(minutes=3)).isoformat()
    }
]

with open(market_data_file, 'w') as f:
    json.dump(market_stream, f, indent=2)

file_objects = file_ingestor.ingest_file(market_data_file, read_content=True)
parsed_data = structured_parser.parse_json(market_data_file)

# Ingest from financial feeds
financial_feed_list = []
for feed_url in financial_feeds[:2]:
    try:
        feed_data = feed_ingestor.ingest_feed(feed_url)
        if feed_data:
            financial_feed_list.append(feed_data)
            print(f"âœ“ Ingested financial feed: {feed_data.title if hasattr(feed_data, 'title') else feed_url}")
    except Exception as e:
        print(f"âš  Feed ingestion for {feed_url}: {str(e)[:100]}")

print(f"\nðŸ“Š Ingestion Summary:")
print(f"  Market data files: {len([file_objects]) if file_objects else 0}")
print(f"  Financial feeds: {len(financial_feed_list)}")
print(f"  Streaming sources: {len(stream_sources)}")
print(f"  Database sources: 1")


## Step 2: Extract Market Entities and Build Temporal Knowledge Graph

Extract market entities and build temporal knowledge graph.


In [None]:
ner_extractor = NERExtractor()
relation_extractor = RelationExtractor()
event_detector = EventDetector()
semantic_analyzer = SemanticAnalyzer()

market_entities = []
market_relationships = []

# Extract from market data
if parsed_data and parsed_data.data:
    for market_entry in parsed_data.data if isinstance(parsed_data.data, list) else [parsed_data.data]:
        if isinstance(market_entry, dict):
            symbol = market_entry.get("symbol", "")
            
            market_entities.append({
                "id": symbol,
                "type": "Stock",
                "name": symbol,
                "properties": {
                    "price": market_entry.get("price", 0),
                    "volume": market_entry.get("volume", 0),
                    "timestamp": market_entry.get("timestamp", "")
                }
            })
            
            # Price events
            if market_entry.get("price", 0) > 0:
                market_relationships.append({
                    "source": symbol,
                    "target": f"{symbol}_price_{market_entry.get('timestamp', '')}",
                    "type": "has_price",
                    "properties": {
                        "price": market_entry.get("price", 0),
                        "timestamp": market_entry.get("timestamp", "")
                    }
                })

builder = GraphBuilder()
temporal_query = TemporalGraphQuery()
temporal_pattern_detector = TemporalPatternDetector()
graph_analyzer = GraphAnalyzer()

market_kg = builder.build(market_entities, market_relationships)

metrics = graph_analyzer.compute_metrics(market_kg)

print(f"Extracted {len(market_entities)} market entities")
print(f"Extracted {len(market_relationships)} relationships")
print(f"Built temporal market knowledge graph with {len(market_kg.get('entities', []))} entities")


## Step 3: Analyze Market Patterns

Analyze market patterns using temporal analysis and graph analytics.


In [None]:
centrality_calculator = CentralityCalculator()
community_detector = CommunityDetector()
connectivity_analyzer = ConnectivityAnalyzer()

start_time = (datetime.now() - timedelta(hours=1)).isoformat()
end_time = datetime.now().isoformat()

temporal_results = temporal_query.query_time_range(
    graph=market_kg,
    query="Find market movements in the last hour",
    start_time=start_time,
    end_time=end_time
)

temporal_patterns = temporal_pattern_detector.detect_temporal_patterns(
    market_kg,
    pattern_type="trend",
    min_frequency=1
)

centrality_scores = centrality_calculator.calculate_centrality(market_kg, measure="degree")
communities = community_detector.detect_communities(market_kg)
connectivity = connectivity_analyzer.analyze_connectivity(market_kg)

print(f"Temporal query returned {len(temporal_results.get('entities', []))} entities")
print(f"Detected {len(temporal_patterns)} temporal patterns")
print(f"Central stocks: {len([e for e, score in centrality_scores.items() if score > 0])}")
print(f"Communities: {len(communities)}")


## Step 4: Predict Market Trends

Predict market trends using inference engine.


In [None]:
inference_engine = InferenceEngine()
rule_manager = RuleManager()
explanation_generator = ExplanationGenerator()

# Market trend prediction rules
inference_engine.add_rule("IF volume > 1000000 AND price_change > 0 THEN bullish_signal")
inference_engine.add_rule("IF volume > 1000000 AND price_change < 0 THEN bearish_signal")
inference_engine.add_rule("IF multiple stocks show same pattern THEN market_trend")

# Add facts from market data
if parsed_data and parsed_data.data:
    for market_entry in parsed_data.data if isinstance(parsed_data.data, list) else [parsed_data.data]:
        if isinstance(market_entry, dict):
            inference_engine.add_fact({
                "symbol": market_entry.get("symbol", ""),
                "volume": market_entry.get("volume", 0),
                "price": market_entry.get("price", 0)
            })

trend_predictions = inference_engine.forward_chain()

print(f"Generated {len(trend_predictions)} market trend predictions")


## Step 5: Generate Reports and Visualize

Generate market analysis reports and visualize results.


In [None]:
quality_assessor = KGQualityAssessor()
json_exporter = JSONExporter()
csv_exporter = CSVExporter()
rdf_exporter = RDFExporter()
report_generator = ReportGenerator()

quality_score = quality_assessor.assess_overall_quality(market_kg)

json_exporter.export_knowledge_graph(market_kg, os.path.join(temp_dir, "market_kg.json"))
csv_exporter.export_entities(market_entities, os.path.join(temp_dir, "market_entities.csv"))
rdf_exporter.export_knowledge_graph(market_kg, os.path.join(temp_dir, "market_kg.rdf"))

report_data = {
    "summary": f"Market data analysis identified {len(trend_predictions)} trend predictions from {len(market_entities)} entities",
    "stocks_analyzed": len(market_entities),
    "patterns": len(temporal_patterns),
    "predictions": len(trend_predictions),
    "quality_score": quality_score.get('overall_score', 0)
}

report = report_generator.generate_report(report_data, format="markdown")

kg_visualizer = KGVisualizer()
temporal_visualizer = TemporalVisualizer()
analytics_visualizer = AnalyticsVisualizer()

kg_viz = kg_visualizer.visualize_network(market_kg, output="interactive")
temporal_viz = temporal_visualizer.visualize_timeline(market_kg, output="interactive")
analytics_viz = analytics_visualizer.visualize_analytics(market_kg, output="interactive")

print("Generated market analysis report and visualizations")
print(f"Total modules used: 20+")
print(f"Pipeline complete: Stream Market Data â†’ Parse â†’ Extract â†’ Build Temporal KG â†’ Analyze Patterns â†’ Predict Trends â†’ Reports â†’ Visualize")
