<a href="https://colab.research.google.com/github/shehan6000/Chatting-with-the-SEC-Knowledge-Graph/blob/main/Untitled8.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# SEC Knowledge Graph Chat System - Free Interview Demo
# 100% Free Tools: NetworkX, SQLite, Ollama (Local LLM)

"""
SETUP INSTRUCTIONS:
1. Install required packages:
   !pip install networkx pandas sentence-transformers scikit-learn sqlite3 requests

2. Install Ollama locally (optional for LLM features):
   - Download from: https://ollama.ai
   - Run: ollama pull llama2

3. Run all cells in order
4. Use the chat interface at the bottom
"""

# ============================================================================
# SECTION 1: IMPORTS AND CONFIGURATION
# ============================================================================

import json
import sqlite3
import re
from typing import Dict, List, Any, Optional, Tuple
from dataclasses import dataclass
import logging
from datetime import datetime
import networkx as nx
import pandas as pd
from collections import defaultdict
import warnings
warnings.filterwarnings('ignore')

# For embeddings and similarity (free alternatives)
try:
    from sentence_transformers import SentenceTransformer
    from sklearn.metrics.pairwise import cosine_similarity
    import numpy as np
    EMBEDDINGS_AVAILABLE = True
except ImportError:
    EMBEDDINGS_AVAILABLE = False
    print("⚠️ Sentence transformers not available. Some features will be limited.")

# Setup logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

print("✅ Imports loaded successfully")

# ============================================================================
# SECTION 2: CONFIGURATION MANAGER
# ============================================================================

@dataclass
class AppConfig:
    """Configuration for the application"""
    db_path: str = "sec_knowledge_graph.db"
    llm_endpoint: str = "http://localhost:11434/api/generate"  # Ollama default
    llm_model: str = "llama2"
    use_local_llm: bool = True
    embedding_model: str = "all-MiniLM-L6-v2"
    log_level: str = "INFO"
    max_results: int = 50

class ConfigManager:
    """Manages application configuration"""

    def __init__(self):
        self.config = AppConfig()
        logger.info("Configuration initialized")

    def get_app_config(self) -> AppConfig:
        return self.config

    def update_config(self, **kwargs):
        for key, value in kwargs.items():
            if hasattr(self.config, key):
                setattr(self.config, key, value)
                logger.info(f"Updated config: {key} = {value}")

print("✅ Configuration Manager ready")

# ============================================================================
# SECTION 3: GRAPH MANAGER (NetworkX + SQLite)
# ============================================================================

class GraphManager:
    """Manages the knowledge graph using NetworkX and SQLite"""

    def __init__(self, config: AppConfig):
        self.config = config
        self.graph = nx.MultiDiGraph()
        self.db_conn = None
        self.embedding_model = None

        # Initialize database
        self._init_database()

        # Initialize embeddings if available
        if EMBEDDINGS_AVAILABLE:
            try:
                self.embedding_model = SentenceTransformer(config.embedding_model)
                logger.info("Embedding model loaded")
            except Exception as e:
                logger.warning(f"Could not load embedding model: {e}")

    def _init_database(self):
        """Initialize SQLite database for storing graph data"""
        self.db_conn = sqlite3.connect(self.config.db_path)
        cursor = self.db_conn.cursor()

        # Create tables
        cursor.execute("""
            CREATE TABLE IF NOT EXISTS companies (
                id INTEGER PRIMARY KEY AUTOINCREMENT,
                name TEXT UNIQUE NOT NULL,
                city TEXT,
                state TEXT,
                cusip TEXT,
                description TEXT,
                latitude REAL,
                longitude REAL
            )
        """)

        cursor.execute("""
            CREATE TABLE IF NOT EXISTS managers (
                id INTEGER PRIMARY KEY AUTOINCREMENT,
                name TEXT UNIQUE NOT NULL,
                city TEXT,
                state TEXT,
                cik TEXT,
                latitude REAL,
                longitude REAL
            )
        """)

        cursor.execute("""
            CREATE TABLE IF NOT EXISTS ownership (
                id INTEGER PRIMARY KEY AUTOINCREMENT,
                manager_id INTEGER,
                company_id INTEGER,
                shares REAL,
                value REAL,
                FOREIGN KEY(manager_id) REFERENCES managers(id),
                FOREIGN KEY(company_id) REFERENCES companies(id)
            )
        """)

        # Create indexes
        cursor.execute("CREATE INDEX IF NOT EXISTS idx_company_name ON companies(name)")
        cursor.execute("CREATE INDEX IF NOT EXISTS idx_company_city ON companies(city)")
        cursor.execute("CREATE INDEX IF NOT EXISTS idx_manager_name ON managers(name)")
        cursor.execute("CREATE INDEX IF NOT EXISTS idx_manager_city ON managers(city)")

        self.db_conn.commit()
        logger.info("Database initialized")

    def add_sample_data(self):
        """Add sample SEC data for demonstration"""
        cursor = self.db_conn.cursor()

        # Sample companies
        companies = [
            ("APPLE INC", "Cupertino", "CA", "037833100", "Technology company specializing in consumer electronics", 37.3229, -122.0322),
            ("MICROSOFT CORP", "Redmond", "WA", "594918104", "Software and cloud computing services", 47.6740, -122.1215),
            ("NVIDIA CORP", "Santa Clara", "CA", "67066G104", "Graphics processing units and AI chips", 37.3688, -121.9644),
            ("TESLA INC", "Austin", "TX", "88160R101", "Electric vehicles and clean energy", 30.2672, -97.7431),
            ("ALPHABET INC", "Mountain View", "CA", "02079K107", "Internet services and technology", 37.4220, -122.0841),
            ("AMAZON COM INC", "Seattle", "WA", "023135106", "E-commerce and cloud computing", 47.6062, -122.3321),
            ("META PLATFORMS INC", "Menlo Park", "CA", "30303M102", "Social media and technology", 37.4849, -122.1477),
            ("INTEL CORP", "Santa Clara", "CA", "458140100", "Semiconductor chip manufacturer", 37.3874, -121.9638),
        ]

        for comp in companies:
            try:
                cursor.execute("""
                    INSERT OR IGNORE INTO companies (name, city, state, cusip, description, latitude, longitude)
                    VALUES (?, ?, ?, ?, ?, ?, ?)
                """, comp)
            except Exception as e:
                logger.error(f"Error inserting company: {e}")

        # Sample investment managers
        managers = [
            ("VANGUARD GROUP INC", "Malvern", "PA", "0000102909", 40.0359, -75.5138),
            ("BLACKROCK INC", "New York", "NY", "0001364742", 40.7128, -74.0060),
            ("STATE STREET CORP", "Boston", "MA", "0000093751", 42.3601, -71.0589),
            ("FIDELITY MANAGEMENT & RESEARCH CO", "Boston", "MA", "0000315066", 42.3601, -71.0589),
            ("GEODE CAPITAL MANAGEMENT LLC", "Boston", "MA", "0001422183", 42.3601, -71.0589),
            ("CAPITAL RESEARCH GLOBAL INVESTORS", "Los Angeles", "CA", "0001067983", 34.0522, -118.2437),
            ("T ROWE PRICE ASSOCIATES INC", "Baltimore", "MD", "0001113169", 39.2904, -76.6122),
        ]

        for mgr in managers:
            try:
                cursor.execute("""
                    INSERT OR IGNORE INTO managers (name, city, state, cik, latitude, longitude)
                    VALUES (?, ?, ?, ?, ?, ?)
                """, mgr)
            except Exception as e:
                logger.error(f"Error inserting manager: {e}")

        # Sample ownership relationships
        self.db_conn.commit()

        # Get IDs for relationships
        cursor.execute("SELECT id, name FROM companies")
        company_map = {name: id for id, name in cursor.fetchall()}

        cursor.execute("SELECT id, name FROM managers")
        manager_map = {name: id for id, name in cursor.fetchall()}

        # Create ownership relationships
        ownerships = [
            ("VANGUARD GROUP INC", "APPLE INC", 1200000000, 175000000000),
            ("VANGUARD GROUP INC", "MICROSOFT CORP", 900000000, 350000000000),
            ("BLACKROCK INC", "APPLE INC", 1050000000, 153000000000),
            ("BLACKROCK INC", "NVIDIA CORP", 300000000, 150000000000),
            ("STATE STREET CORP", "TESLA INC", 180000000, 45000000000),
            ("FIDELITY MANAGEMENT & RESEARCH CO", "AMAZON COM INC", 400000000, 70000000000),
            ("CAPITAL RESEARCH GLOBAL INVESTORS", "ALPHABET INC", 350000000, 50000000000),
            ("T ROWE PRICE ASSOCIATES INC", "META PLATFORMS INC", 250000000, 90000000000),
        ]

        for mgr_name, comp_name, shares, value in ownerships:
            if mgr_name in manager_map and comp_name in company_map:
                try:
                    cursor.execute("""
                        INSERT OR IGNORE INTO ownership (manager_id, company_id, shares, value)
                        VALUES (?, ?, ?, ?)
                    """, (manager_map[mgr_name], company_map[comp_name], shares, value))
                except Exception as e:
                    logger.error(f"Error inserting ownership: {e}")

        self.db_conn.commit()
        self._build_graph()
        logger.info("Sample data added successfully")

    def _build_graph(self):
        """Build NetworkX graph from database"""
        cursor = self.db_conn.cursor()

        # Add company nodes
        cursor.execute("SELECT id, name, city, state FROM companies")
        for row in cursor.fetchall():
            self.graph.add_node(f"company_{row[0]}",
                              type="Company",
                              name=row[1],
                              city=row[2],
                              state=row[3])

        # Add manager nodes
        cursor.execute("SELECT id, name, city, state FROM managers")
        for row in cursor.fetchall():
            self.graph.add_node(f"manager_{row[0]}",
                              type="Manager",
                              name=row[1],
                              city=row[2],
                              state=row[3])

        # Add ownership edges
        cursor.execute("""
            SELECT m.id, c.id, o.shares, o.value
            FROM ownership o
            JOIN managers m ON o.manager_id = m.id
            JOIN companies c ON o.company_id = c.id
        """)
        for row in cursor.fetchall():
            self.graph.add_edge(f"manager_{row[0]}",
                              f"company_{row[1]}",
                              relationship="OWNS_STOCK_IN",
                              shares=row[2],
                              value=row[3])

        logger.info(f"Graph built: {self.graph.number_of_nodes()} nodes, {self.graph.number_of_edges()} edges")

    def execute_query(self, query_type: str, params: Dict[str, Any]) -> List[Dict[str, Any]]:
        """Execute different types of queries"""
        cursor = self.db_conn.cursor()

        if query_type == "companies_in_city":
            city = params.get("city", "")
            cursor.execute("""
                SELECT name, city, state FROM companies
                WHERE LOWER(city) = LOWER(?)
                ORDER BY name
            """, (city,))
            return [{"companyName": row[0], "city": row[1], "state": row[2]}
                   for row in cursor.fetchall()]

        elif query_type == "managers_in_city":
            city = params.get("city", "")
            cursor.execute("""
                SELECT name, city, state FROM managers
                WHERE LOWER(city) = LOWER(?)
                ORDER BY name
            """, (city,))
            return [{"managerName": row[0], "city": row[1], "state": row[2]}
                   for row in cursor.fetchall()]

        elif query_type == "company_description":
            company = params.get("company", "")
            cursor.execute("""
                SELECT name, description, city, state FROM companies
                WHERE LOWER(name) LIKE LOWER(?)
                LIMIT 1
            """, (f"%{company}%",))
            row = cursor.fetchone()
            if row:
                return [{
                    "companyName": row[0],
                    "description": row[1],
                    "city": row[2],
                    "state": row[3]
                }]
            return []

        elif query_type == "spatial_search":
            city = params.get("city", "")
            distance = params.get("distance", 50000)  # meters
            entity_type = params.get("entity_type", "Manager")

            # Get reference location
            if entity_type == "Manager":
                cursor.execute("SELECT latitude, longitude FROM managers WHERE LOWER(city) = LOWER(?) LIMIT 1", (city,))
            else:
                cursor.execute("SELECT latitude, longitude FROM companies WHERE LOWER(city) = LOWER(?) LIMIT 1", (city,))

            ref_loc = cursor.fetchone()
            if not ref_loc:
                return []

            # Simple distance calculation (Haversine approximation)
            lat1, lon1 = ref_loc

            if entity_type == "Manager":
                cursor.execute("SELECT name, city, state, latitude, longitude FROM managers")
            else:
                cursor.execute("SELECT name, city, state, latitude, longitude FROM companies")

            results = []
            for row in cursor.fetchall():
                lat2, lon2 = row[3], row[4]
                dist = self._calculate_distance(lat1, lon1, lat2, lon2)
                if dist <= distance:
                    key = "managerName" if entity_type == "Manager" else "companyName"
                    results.append({
                        key: row[0],
                        "city": row[1],
                        "state": row[2],
                        "distance_km": round(dist / 1000, 2)
                    })

            return sorted(results, key=lambda x: x["distance_km"])

        elif query_type == "full_text_search":
            search_term = params.get("search_term", "")
            entity_type = params.get("entity_type", "Company")

            if entity_type == "Company":
                cursor.execute("""
                    SELECT name, city, state, description FROM companies
                    WHERE LOWER(name) LIKE LOWER(?)
                    ORDER BY name
                    LIMIT ?
                """, (f"%{search_term}%", self.config.max_results))
                return [{"companyName": row[0], "city": row[1], "state": row[2], "description": row[3]}
                       for row in cursor.fetchall()]
            else:
                cursor.execute("""
                    SELECT name, city, state FROM managers
                    WHERE LOWER(name) LIKE LOWER(?)
                    ORDER BY name
                    LIMIT ?
                """, (f"%{search_term}%", self.config.max_results))
                return [{"managerName": row[0], "city": row[1], "state": row[2]}
                       for row in cursor.fetchall()]

        elif query_type == "state_analysis":
            entity_type = params.get("entity_type", "Manager")

            if entity_type == "Manager":
                cursor.execute("""
                    SELECT state, COUNT(*) as count FROM managers
                    GROUP BY state
                    ORDER BY count DESC
                """)
                return [{"state": row[0], "count": row[1]} for row in cursor.fetchall()]
            else:
                cursor.execute("""
                    SELECT state, COUNT(*) as count FROM companies
                    GROUP BY state
                    ORDER BY count DESC
                """)
                return [{"state": row[0], "count": row[1]} for row in cursor.fetchall()]

        return []

    def _calculate_distance(self, lat1: float, lon1: float, lat2: float, lon2: float) -> float:
        """Calculate distance between two points (Haversine formula)"""
        from math import radians, sin, cos, sqrt, atan2

        R = 6371000  # Earth radius in meters

        lat1, lon1, lat2, lon2 = map(radians, [lat1, lon1, lat2, lon2])
        dlat = lat2 - lat1
        dlon = lon2 - lon1

        a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
        c = 2 * atan2(sqrt(a), sqrt(1-a))

        return R * c

    def health_check(self) -> bool:
        """Check if graph database is healthy"""
        try:
            cursor = self.db_conn.cursor()
            cursor.execute("SELECT COUNT(*) FROM companies")
            count = cursor.fetchone()[0]
            return count > 0
        except Exception as e:
            logger.error(f"Health check failed: {e}")
            return False

    def get_stats(self) -> Dict[str, Any]:
        """Get graph statistics"""
        cursor = self.db_conn.cursor()

        cursor.execute("SELECT COUNT(*) FROM companies")
        company_count = cursor.fetchone()[0]

        cursor.execute("SELECT COUNT(*) FROM managers")
        manager_count = cursor.fetchone()[0]

        cursor.execute("SELECT COUNT(*) FROM ownership")
        relationship_count = cursor.fetchone()[0]

        return {
            "companies": company_count,
            "managers": manager_count,
            "relationships": relationship_count,
            "graph_nodes": self.graph.number_of_nodes(),
            "graph_edges": self.graph.number_of_edges()
        }

print("✅ Graph Manager ready")

# ============================================================================
# SECTION 4: CYPHER GENERATOR (Free LLM Alternative)
# ============================================================================

class CypherGenerator:
    """Generates queries using local LLM or rule-based approach"""

    def __init__(self, config: AppConfig):
        self.config = config
        self.use_llm = config.use_local_llm

    def generate_query_plan(self, question: str) -> Dict[str, Any]:
        """Generate a query plan from natural language question"""
        question_lower = question.lower()

        # Rule-based query understanding
        plan = {
            "query_type": None,
            "params": {},
            "confidence": 0.0
        }

        # Pattern: Companies in city
        if "companies" in question_lower and "in" in question_lower:
            city = self._extract_city(question)
            if city:
                plan["query_type"] = "companies_in_city"
                plan["params"]["city"] = city
                plan["confidence"] = 0.9

        # Pattern: Managers/Investment firms in city
        elif any(term in question_lower for term in ["managers", "investment firms", "firms"]):
            if "in" in question_lower:
                city = self._extract_city(question)
                if city:
                    plan["query_type"] = "managers_in_city"
                    plan["params"]["city"] = city
                    plan["confidence"] = 0.9

        # Pattern: What does X do / Describe X
        elif any(term in question_lower for term in ["what does", "describe", "tell me about"]):
            company = self._extract_company_name(question)
            if company:
                plan["query_type"] = "company_description"
                plan["params"]["company"] = company
                plan["confidence"] = 0.85

        # Pattern: Near/spatial queries
        elif any(term in question_lower for term in ["near", "close to", "around"]):
            city = self._extract_city(question)
            if city:
                entity_type = "Manager" if any(t in question_lower for t in ["managers", "firms"]) else "Company"
                plan["query_type"] = "spatial_search"
                plan["params"]["city"] = city
                plan["params"]["entity_type"] = entity_type
                plan["params"]["distance"] = 50000  # 50km default
                plan["confidence"] = 0.8

        # Pattern: State analysis
        elif "state" in question_lower and any(term in question_lower for term in ["most", "top"]):
            entity_type = "Manager" if any(t in question_lower for t in ["managers", "firms"]) else "Company"
            plan["query_type"] = "state_analysis"
            plan["params"]["entity_type"] = entity_type
            plan["confidence"] = 0.85

        # Pattern: Search/find
        elif any(term in question_lower for term in ["find", "search"]):
            search_term = self._extract_search_term(question)
            if search_term:
                entity_type = "Manager" if any(t in question_lower for t in ["managers", "firms"]) else "Company"
                plan["query_type"] = "full_text_search"
                plan["params"]["search_term"] = search_term
                plan["params"]["entity_type"] = entity_type
                plan["confidence"] = 0.75

        return plan

    def _extract_city(self, question: str) -> Optional[str]:
        """Extract city name from question"""
        # Common patterns
        patterns = [
            r'in ([A-Z][a-z]+(?: [A-Z][a-z]+)*)',
            r'near ([A-Z][a-z]+(?: [A-Z][a-z]+)*)',
            r'around ([A-Z][a-z]+(?: [A-Z][a-z]+)*)',
        ]

        for pattern in patterns:
            match = re.search(pattern, question)
            if match:
                return match.group(1)

        # Common city names
        cities = ["Boston", "New York", "San Francisco", "Seattle", "Austin",
                 "Chicago", "Los Angeles", "Palo Alto", "Santa Clara",
                 "Cupertino", "Redmond", "Mountain View", "Menlo Park"]

        for city in cities:
            if city.lower() in question.lower():
                return city

        return None

    def _extract_company_name(self, question: str) -> Optional[str]:
        """Extract company name from question"""
        # Look for capitalized words
        words = question.split()
        for i, word in enumerate(words):
            if word[0].isupper() and i > 0:
                # Take 1-3 capitalized words
                company_parts = [word]
                for j in range(i+1, min(i+3, len(words))):
                    if words[j][0].isupper() or words[j].lower() in ['inc', 'corp', 'co']:
                        company_parts.append(words[j])
                    else:
                        break
                return " ".join(company_parts)

        return None

    def _extract_search_term(self, question: str) -> Optional[str]:
        """Extract search term from question"""
        # Remove common words
        stop_words = {"find", "search", "for", "companies", "managers", "firms", "investment", "named", "called"}
        words = [w for w in question.split() if w.lower() not in stop_words]

        # Return remaining words
        if words:
            return " ".join(words[:3])  # Take first 3 words

        return None

print("✅ Cypher Generator ready")

# ============================================================================
# SECTION 5: QUERY ENGINE
# ============================================================================

class QueryEngine:
    """Main query processing engine"""

    def __init__(self, config: AppConfig):
        self.config = config
        self.graph_manager = GraphManager(config)
        self.cypher_generator = CypherGenerator(config)

        # Initialize with sample data
        self.graph_manager.add_sample_data()

        logger.info("Query Engine initialized")

    def process_query(self, question: str) -> Dict[str, Any]:
        """Process a natural language question"""
        try:
            # Generate query plan
            plan = self.cypher_generator.generate_query_plan(question)

            if plan["query_type"] is None:
                return {
                    "success": False,
                    "question": question,
                    "error": "Could not understand the question. Try rephrasing."
                }

            # Execute query
            results = self.graph_manager.execute_query(plan["query_type"], plan["params"])

            return {
                "success": True,
                "question": question,
                "query_type": plan["query_type"],
                "confidence": plan["confidence"],
                "result": results,
                "count": len(results)
            }

        except Exception as e:
            logger.error(f"Query processing error: {e}")
            return {
                "success": False,
                "question": question,
                "error": str(e)
            }

    def format_result(self, result: Dict[str, Any], width: int = 80) -> str:
        """Format query results for display"""
        if not result["success"]:
            return f"❌ Error: {result.get('error', 'Unknown error')}"

        output = []
        output.append("=" * width)
        output.append(f"Question: {result['question']}")
        output.append(f"Query Type: {result.get('query_type', 'N/A')}")
        output.append(f"Confidence: {result.get('confidence', 0):.1%}")
        output.append(f"Results Found: {result.get('count', 0)}")
        output.append("=" * width)

        if result.get("count", 0) == 0:
            output.append("\n⚠️ No results found.")
        else:
            output.append("\nResults:")
            output.append("-" * width)

            for i, item in enumerate(result["result"][:20], 1):  # Limit to 20 results
                output.append(f"\n{i}. {self._format_item(item)}")

            if len(result["result"]) > 20:
                output.append(f"\n... and {len(result['result']) - 20} more results")

        output.append("\n" + "=" * width)
        return "\n".join(output)

    def _format_item(self, item: Dict[str, Any]) -> str:
        """Format a single result item"""
        parts = []
        for key, value in item.items():
            if key != "description" or len(str(value)) < 100:
                parts.append(f"{key}: {value}")

        # Add description separately if it exists and is long
        if "description" in item and len(str(item["description"])) >= 100:
            parts.append(f"description: {item['description'][:150]}...")

        return "\n   ".join(parts)

    def health_check(self) -> Dict[str, str]:
        """Check system health"""
        graph_health = "healthy" if self.graph_manager.health_check() else "unhealthy"

        return {
            "graph_database": graph_health,
            "query_generation": "healthy",
            "overall": graph_health
        }

    def get_system_stats(self) -> Dict[str, Any]:
        """Get system statistics"""
        return self.graph_manager.get_stats()

print("✅ Query Engine ready")

# ============================================================================
# SECTION 6: INITIALIZE SYSTEM
# ============================================================================

print("\n" + "="*80)
print("🚀 SEC KNOWLEDGE GRAPH CHAT SYSTEM - FREE VERSION")
print("="*80)

# Initialize system
config_manager = ConfigManager()
app_config = config_manager.get_app_config()
query_engine = QueryEngine(app_config)

# Run health check
health = query_engine.health_check()
print("\n📊 System Health Check:")
for component, status in health.items():
    emoji = "✅" if status == "healthy" else "❌"
    print(f"  {emoji} {component}: {status}")

# Display stats
stats = query_engine.get_system_stats()
print("\n📈 System Statistics:")
for key, value in stats.items():
    print(f"  • {key}: {value}")

print("\n" + "="*80)
print("✅ System Ready! Use the functions below to interact with the system.")
print("="*80)

# ============================================================================
# SECTION 7: INTERACTIVE FUNCTIONS
# ============================================================================

def ask(question: str):
    """Ask a question to the knowledge graph"""
    print(f"\n💬 Question: {question}")
    print("⏳ Processing...\n")

    result = query_engine.process_query(question)
    formatted = query_engine.format_result(result)
    print(formatted)

    return result

def show_examples():
    """Show example queries"""
    examples = [
        "What companies are in Santa Clara?",
        "What investment firms are in Boston?",
        "What does Apple do?",
        "What investment firms are near Palo Alto?",
        "Which state has the most investment firms?",
        "Find companies named Tesla",
        "Describe Microsoft",
    ]

    print("\n📚 Example Queries:")
    print("="*80)
    for i, example in enumerate(examples, 1):
        print(f"{i}. {example}")
    print("="*80)
    print("\nUse: ask('your question here')")

def run_demo():
    """Run a demo of various queries"""
    demo_questions = [
        "What companies are in Santa Clara?",
        "What investment firms are in Boston?",
        "What does Apple do?",
    ]

    print("\n🎬 Running Demo Queries...")
    print("="*80)

    for question in demo_questions:
        ask(question)
        print("\n")



✅ Imports loaded successfully
✅ Configuration Manager ready
✅ Graph Manager ready
✅ Cypher Generator ready
✅ Query Engine ready

🚀 SEC KNOWLEDGE GRAPH CHAT SYSTEM - FREE VERSION


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]


📊 System Health Check:
  ✅ graph_database: healthy
  ✅ query_generation: healthy
  ✅ overall: healthy

📈 System Statistics:
  • companies: 8
  • managers: 7
  • relationships: 8
  • graph_nodes: 15
  • graph_edges: 8

✅ System Ready! Use the functions below to interact with the system.


In [2]:
show_examples()



📚 Example Queries:
1. What companies are in Santa Clara?
2. What investment firms are in Boston?
3. What does Apple do?
4. What investment firms are near Palo Alto?
5. Which state has the most investment firms?
6. Find companies named Tesla
7. Describe Microsoft

Use: ask('your question here')


In [3]:
run_demo()



🎬 Running Demo Queries...

💬 Question: What companies are in Santa Clara?
⏳ Processing...

Question: What companies are in Santa Clara?
Query Type: companies_in_city
Confidence: 90.0%
Results Found: 2

Results:
--------------------------------------------------------------------------------

1. companyName: INTEL CORP
   city: Santa Clara
   state: CA

2. companyName: NVIDIA CORP
   city: Santa Clara
   state: CA




💬 Question: What investment firms are in Boston?
⏳ Processing...

Question: What investment firms are in Boston?
Query Type: managers_in_city
Confidence: 90.0%
Results Found: 3

Results:
--------------------------------------------------------------------------------

1. managerName: FIDELITY MANAGEMENT & RESEARCH CO
   city: Boston
   state: MA

2. managerName: GEODE CAPITAL MANAGEMENT LLC
   city: Boston
   state: MA

3. managerName: STATE STREET CORP
   city: Boston
   state: MA




💬 Question: What does Apple do?
⏳ Processing...

Question: What does Apple do?
Query Ty

In [4]:
ask("What companies are in Santa Clara?")



💬 Question: What companies are in Santa Clara?
⏳ Processing...

Question: What companies are in Santa Clara?
Query Type: companies_in_city
Confidence: 90.0%
Results Found: 2

Results:
--------------------------------------------------------------------------------

1. companyName: INTEL CORP
   city: Santa Clara
   state: CA

2. companyName: NVIDIA CORP
   city: Santa Clara
   state: CA



{'success': True,
 'question': 'What companies are in Santa Clara?',
 'query_type': 'companies_in_city',
 'confidence': 0.9,
 'result': [{'companyName': 'INTEL CORP',
   'city': 'Santa Clara',
   'state': 'CA'},
  {'companyName': 'NVIDIA CORP', 'city': 'Santa Clara', 'state': 'CA'}],
 'count': 2}