# IMPORTS

In [1]:
import os
import json
import glob
from string import Template
from timeit import default_timer as timer
from dotenv import load_dotenv
from time import sleep
from groq import Groq
from transformers import pipeline
import networkx as nx
import matplotlib.pyplot as plt
import logging
from typing import Dict, List, Any, Optional, Tuple
from dataclasses import dataclass
from abc import ABC, abstractmethod
import sqlite3

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

# Load environment variables
load_dotenv()

True

In [3]:
# Initialize clients with error handling
try:
    groq_client = Groq(api_key=os.getenv("GROQ_API_KEY"))
    if not os.getenv("GROQ_API_KEY"):
        raise ValueError("GROQ_API_KEY not found in environment variables")
except Exception as e:
    logger.error(f"Failed to initialize Groq client: {e}")
    groq_client = None

In [4]:
# Initialize confidence scorer with error handling
try:
    classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
except Exception as e:
    logger.error(f"Failed to initialize classifier: {e}")
    classifier = None

Device set to use cpu


# GRAPH DATABASE

In [10]:
@dataclass
class Entity:
    """Data class for entities"""
    id: str
    label: str
    properties: Dict[str, Any]

@dataclass
class Relationship:
    """Data class for relationships"""
    source: str
    target: str
    type: str
    properties: Dict[str, Any]

In [11]:
class GraphDatabaseConnector(ABC):
    """Abstract base class for graph database connectors"""
    
    @abstractmethod
    def connect(self) -> bool:
        pass
    
    @abstractmethod
    def close(self):
        pass
    
    @abstractmethod
    def create_entity(self, entity: Entity) -> bool:
        pass
    
    @abstractmethod
    def create_relationship(self, relationship: Relationship) -> bool:
        pass
    
    @abstractmethod
    def query(self, query: str, parameters: Dict[str, Any] = None) -> List[Dict[str, Any]]:
        pass
    
    @abstractmethod
    def clear_database(self):
        pass

In [12]:
class SQLiteGraphConnector(GraphDatabaseConnector):
    """SQLite-based graph database connector for lightweight usage"""
    
    def __init__(self, db_path: str = "graph_data.db"):
        self.db_path = db_path
        self.connection = None
    
    def connect(self) -> bool:
        """Connect to SQLite database"""
        try:
            self.connection = sqlite3.connect(self.db_path)
            self.connection.row_factory = sqlite3.Row
            self._create_tables()
            logger.info(f"Connected to SQLite database: {self.db_path}")
            return True
        except Exception as e:
            logger.error(f"Failed to connect to SQLite: {e}")
            return False
    
    def _create_tables(self):
        """Create tables for entities and relationships"""
        cursor = self.connection.cursor()
        
        # Entities table
        cursor.execute("""
            CREATE TABLE IF NOT EXISTS entities (
                id TEXT PRIMARY KEY,
                label TEXT NOT NULL,
                properties TEXT,
                created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
            )
        """)
        
        # Relationships table
        cursor.execute("""
            CREATE TABLE IF NOT EXISTS relationships (
                id INTEGER PRIMARY KEY AUTOINCREMENT,
                source TEXT NOT NULL,
                target TEXT NOT NULL,
                type TEXT NOT NULL,
                properties TEXT,
                created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
                FOREIGN KEY (source) REFERENCES entities (id),
                FOREIGN KEY (target) REFERENCES entities (id)
            )
        """)
        
        # Create indexes for better performance
        cursor.execute("CREATE INDEX IF NOT EXISTS idx_entities_label ON entities(label)")
        cursor.execute("CREATE INDEX IF NOT EXISTS idx_relationships_source ON relationships(source)")
        cursor.execute("CREATE INDEX IF NOT EXISTS idx_relationships_target ON relationships(target)")
        cursor.execute("CREATE INDEX IF NOT EXISTS idx_relationships_type ON relationships(type)")
        
        self.connection.commit()
    
    def close(self):
        """Close database connection"""
        if self.connection:
            self.connection.close()
            logger.info("SQLite connection closed")
    
    def create_entity(self, entity: Entity) -> bool:
        """Create entity in SQLite"""
        try:
            cursor = self.connection.cursor()
            cursor.execute(
                "INSERT OR REPLACE INTO entities (id, label, properties) VALUES (?, ?, ?)",
                (entity.id, entity.label, json.dumps(entity.properties))
            )
            self.connection.commit()
            return True
        except Exception as e:
            logger.error(f"Failed to create entity {entity.id}: {e}")
            return False
    
    def create_relationship(self, relationship: Relationship) -> bool:
        """Create relationship in SQLite"""
        try:
            cursor = self.connection.cursor()
            # Check if relationship already exists to avoid duplicates
            cursor.execute(
                "SELECT id FROM relationships WHERE source = ? AND target = ? AND type = ?",
                (relationship.source, relationship.target, relationship.type)
            )
            if cursor.fetchone():
                logger.debug(f"Relationship already exists: {relationship.source}->{relationship.target}")
                return True
            
            cursor.execute(
                "INSERT INTO relationships (source, target, type, properties) VALUES (?, ?, ?, ?)",
                (relationship.source, relationship.target, relationship.type, 
                 json.dumps(relationship.properties))
            )
            self.connection.commit()
            return True
        except Exception as e:
            logger.error(f"Failed to create relationship {relationship.source}->{relationship.target}: {e}")
            return False
    
    def query(self, query: str, parameters: Dict[str, Any] = None) -> List[Dict[str, Any]]:
        """Execute SQL query"""
        try:
            cursor = self.connection.cursor()
            if parameters:
                # Handle both dict and tuple parameters
                if isinstance(parameters, dict):
                    # Convert named parameters to positional for SQLite
                    cursor.execute(query, tuple(parameters.values()) if parameters else ())
                else:
                    cursor.execute(query, parameters)
            else:
                cursor.execute(query)
            rows = cursor.fetchall()
            return [dict(row) for row in rows]
        except Exception as e:
            logger.error(f"Query failed: {e}")
            return []
    
    def clear_database(self):
        """Clear all data from database"""
        try:
            cursor = self.connection.cursor()
            cursor.execute("DELETE FROM relationships")
            cursor.execute("DELETE FROM entities")
            self.connection.commit()
            logger.info("Database cleared")
        except Exception as e:
            logger.error(f"Failed to clear database: {e}")

In [13]:
def validate_graph_data(data: Dict[str, Any]) -> bool:
    """Validate the structure of extracted graph data."""
    required_keys = ['entities', 'relationships']
    
    if not all(key in data for key in required_keys):
        logger.error(f"Missing required keys. Expected: {required_keys}, Got: {list(data.keys())}")
        return False
    
    if not isinstance(data['entities'], list):
        logger.error("Entities should be a list")
        return False
    
    if not isinstance(data['relationships'], list):
        logger.error("Relationships should be a list")
        return False
    
    # Validate entity structure
    for entity in data['entities']:
        if not isinstance(entity, dict) or 'id' not in entity or 'label' not in entity:
            logger.error(f"Invalid entity structure: {entity}")
            return False
    
    return True

In [14]:
def store_in_graph_database(data: Dict[str, Any], db_connector: SQLiteGraphConnector) -> bool:
    """Store extracted data in graph database"""
    if not validate_graph_data(data):
        return False
    
    try:
        # Store entities
        for entity_data in data['entities']:
            entity_id = str(entity_data['id'])
            label = entity_data['label']
            properties = {k: v for k, v in entity_data.items() if k not in ['id', 'label']}
            
            entity = Entity(id=entity_id, label=label, properties=properties)
            if not db_connector.create_entity(entity):
                logger.error(f"Failed to store entity: {entity_id}")
                return False
        
        # Store relationships
        for rel in data['relationships']:
            try:
                if isinstance(rel, str) and "|" in rel:
                    parts = rel.split("|")
                    if len(parts) >= 3:
                        source, rel_type, target = parts[0], parts[1], parts[2]
                        properties = {"confidence": score_confidence(f"{source} {rel_type} {target}", rel_type)}
                    else:
                        logger.warning(f"Invalid relationship format: {rel}")
                        continue
                elif isinstance(rel, dict):
                    source = str(rel.get('source', ''))
                    target = str(rel.get('target', ''))
                    rel_type = rel.get('type', 'RELATED_TO')
                    properties = {k: v for k, v in rel.items() if k not in ['source', 'target', 'type']}
                else:
                    logger.warning(f"Unsupported relationship format: {rel}")
                    continue
                
                relationship = Relationship(
                    source=source, 
                    target=target, 
                    type=rel_type, 
                    properties=properties
                )
                
                if not db_connector.create_relationship(relationship):
                    logger.error(f"Failed to store relationship: {source}->{target}")
                    
            except Exception as e:
                logger.error(f"Error processing relationship {rel}: {e}")
                continue
        
        return True
        
    except Exception as e:
        logger.error(f"Error storing data in graph database: {e}")
        return False

# GRAPH ANALYTICS

In [15]:
class GraphAnalytics:
    """Analytics and querying utilities for graph data"""
    
    def __init__(self, db_connector: SQLiteGraphConnector):
        self.db = db_connector
    
    def get_entity_counts(self) -> Dict[str, int]:
        """Get count of entities by label/type"""
        query = "SELECT label, COUNT(*) as count FROM entities GROUP BY label ORDER BY count DESC"
        results = self.db.query(query)
        return {row['label']: row['count'] for row in results}
    
    def get_relationship_counts(self) -> Dict[str, int]:
        """Get count of relationships by type"""
        query = "SELECT type, COUNT(*) as count FROM relationships GROUP BY type ORDER BY count DESC"
        results = self.db.query(query)
        return {row['type']: row['count'] for row in results}
    
    def find_central_entities(self, limit: int = 10) -> List[Dict[str, Any]]:
        """Find most connected entities"""
        query = """
        SELECT e.id, e.label, 
               JSON_EXTRACT(e.properties, '$.name') as name,
               (SELECT COUNT(*) FROM relationships WHERE source = e.id OR target = e.id) as connections
        FROM entities e
        ORDER BY connections DESC
        LIMIT ?
        """
        return self.db.query(query, {"limit": limit})
    
    def find_related_entities(self, entity_id: str, relationship_type: str = None) -> List[Dict[str, Any]]:
        """Find entities related to a given entity"""
        if relationship_type:
            query = """
            SELECT DISTINCT e.id, e.label, JSON_EXTRACT(e.properties, '$.name') as name, r.type
            FROM entities e
            JOIN relationships r ON (r.source = e.id OR r.target = e.id)
            WHERE (r.source = ? OR r.target = ?) AND e.id != ? AND r.type = ?
            """
            parameters = {"entity_id": entity_id, "type": relationship_type}
        else:
            query = """
            SELECT DISTINCT e.id, e.label, JSON_EXTRACT(e.properties, '$.name') as name, r.type
            FROM entities e
            JOIN relationships r ON (r.source = e.id OR r.target = e.id)
            WHERE (r.source = ? OR r.target = ?) AND e.id != ?
            """
            parameters = {"entity_id": entity_id}
        
        return self.db.query(query, parameters)
    
    def get_entity_neighbors(self, entity_id: str, depth: int = 1) -> List[Dict[str, Any]]:
        """Get neighboring entities within specified depth"""
        query = """
        SELECT DISTINCT e.id, e.label, JSON_EXTRACT(e.properties, '$.name') as name
        FROM entities e
        JOIN relationships r ON (r.source = e.id OR r.target = e.id)
        WHERE (r.source = ? OR r.target = ?) AND e.id != ?
        """
        return self.db.query(query, {"entity_id": entity_id})
    
    def find_entities_by_label(self, label: str) -> List[Dict[str, Any]]:
        """Find all entities with a specific label"""
        query = "SELECT id, label, properties FROM entities WHERE label = ?"
        return self.db.query(query, {"label": label})
    
    def get_relationship_details(self, source_id: str = None, target_id: str = None, rel_type: str = None) -> List[Dict[str, Any]]:
        """Get detailed information about relationships"""
        conditions = []
        params = {}
        
        if source_id:
            conditions.append("r.source = ?")
            params["source"] = source_id
        if target_id:
            conditions.append("r.target = ?")
            params["target"] = target_id
        if rel_type:
            conditions.append("r.type = ?")
            params["type"] = rel_type
        
        where_clause = "WHERE " + " AND ".join(conditions) if conditions else ""
        
        query = f"""
        SELECT r.source, r.target, r.type, r.properties,
               e1.label as source_label, JSON_EXTRACT(e1.properties, '$.name') as source_name,
               e2.label as target_label, JSON_EXTRACT(e2.properties, '$.name') as target_name
        FROM relationships r
        JOIN entities e1 ON r.source = e1.id
        JOIN entities e2 ON r.target = e2.id
        {where_clause}
        """
        
        return self.db.query(query, params)

In [16]:
def run_analytics_demo(analytics: GraphAnalytics):
    """Demonstrate analytics capabilities"""
    logger.info("Running analytics demo...")
    
    # Entity counts
    entity_counts = analytics.get_entity_counts()
    logger.info(f"Entity counts: {entity_counts}")
    
    # Relationship counts
    rel_counts = analytics.get_relationship_counts()
    logger.info(f"Relationship counts: {rel_counts}")
    
    # Central entities
    central_entities = analytics.find_central_entities(5)
    logger.info(f"Most connected entities: {central_entities}")
    
    # Find entities by label
    projects = analytics.find_entities_by_label("Project")
    logger.info(f"Projects found: {len(projects)}")
    
    # Show sample relationships
    relationships = analytics.get_relationship_details()[:5]  # First 5 relationships
    logger.info(f"Sample relationships: {relationships}")

# LLM

In [18]:
def run_llm_query(file_prompt: str, system_msg: str, max_retries: int = 3) -> Optional[str]:
    """Call the Groq LLM with retry logic and better error handling."""
    if not groq_client:
        logger.error("Groq client not initialized")
        return None
    
    for attempt in range(max_retries):
        try:
            response = groq_client.chat.completions.create(
                model="llama3-70b-8192",
                messages=[
                    {"role": "system", "content": system_msg},
                    {"role": "user", "content": file_prompt},
                ],
                temperature=0.5,
                max_tokens=2048,
            )
            content = response.choices[0].message.content
            sleep(1)  # Reduced sleep time
            return content
        except Exception as e:
            logger.warning(f"Attempt {attempt + 1} failed: {e}")
            if attempt < max_retries - 1:
                sleep(2 ** attempt)  # Exponential backoff
            else:
                logger.error(f"All attempts failed for LLM query")
                return None

# CONFIDENCE SCORE

In [19]:
def score_confidence(text: str, label: str) -> float:
    """Score entity and relationship confidence using zero-shot classification."""
    if not classifier:
        logger.warning("Classifier not available, returning default confidence")
        return 3.0  # Default middle confidence
    
    try:
        result = classifier(text, [label], multi_label=False)
        return result['scores'][0] * 5  # Convert to 1-5 scale
    except Exception as e:
        logger.error(f"Error scoring confidence: {e}")
        return 3.0  # Default confidence on error

# VISUALIZE

In [20]:
def visualize_graph(data: Dict[str, Any], output_file: Optional[str] = None) -> bool:
    """Visualize graph with confidence scores and explanations."""
    if not validate_graph_data(data):
        return False
    
    try:
        G = nx.DiGraph()
        labels = {}

        # Add nodes
        for entity in data['entities']:
            entity_id = str(entity['id'])
            entity_label = entity.get('label', 'Unknown')
            entity_name = entity.get('name', entity_id)
            
            G.add_node(entity_id, label=entity_label)
            labels[entity_id] = f"{entity_label}\n{entity_name}"

        # Add edges
        edge_confidences = {}
        for rel in data['relationships']:
            try:
                if isinstance(rel, str) and "|" in rel:
                    parts = rel.split("|")
                    if len(parts) >= 3:
                        head, relation, tail = parts[0], parts[1], parts[2]
                    else:
                        logger.warning(f"Invalid relationship format: {rel}")
                        continue
                elif isinstance(rel, dict):
                    head = str(rel.get('source', ''))
                    tail = str(rel.get('target', ''))
                    relation = rel.get('type', 'RELATED_TO')
                else:
                    logger.warning(f"Unsupported relationship format: {rel}")
                    continue
                
                # Only add edge if both nodes exist
                if head in [str(e['id']) for e in data['entities']] and tail in [str(e['id']) for e in data['entities']]:
                    text = f"{head} {relation} {tail}"
                    confidence = score_confidence(text, relation)
                    G.add_edge(head, tail, label=relation, weight=confidence)
                    edge_confidences[(head, tail)] = confidence
                else:
                    logger.warning(f"Relationship references non-existent entities: {head} -> {tail}")
                    
            except Exception as e:
                logger.error(f"Error processing relationship {rel}: {e}")
                continue

        if len(G.nodes()) == 0:
            logger.warning("No valid nodes found for visualization")
            return False

        # Create visualization
        plt.figure(figsize=(14, 10))
        pos = nx.spring_layout(G, k=2, iterations=50)
        
        # Draw nodes
        nx.draw_networkx_nodes(G, pos, node_color='lightblue', 
                              node_size=3000, alpha=0.7)
        
        # Draw edges with varying thickness based on confidence
        edges = G.edges()
        if edges:
            weights = [G[u][v].get('weight', 1) for u, v in edges]
            nx.draw_networkx_edges(G, pos, edge_color='gray', 
                                 width=[w/2 for w in weights], alpha=0.6)
        
        # Draw labels
        nx.draw_networkx_labels(G, pos, labels, font_size=8, font_weight='bold')
        
        # Draw edge labels
        if edges:
            edge_labels = nx.get_edge_attributes(G, 'label')
            edge_weights = nx.get_edge_attributes(G, 'weight')
            edge_text = {k: f"{edge_labels.get(k, 'RELATED')} ({edge_weights.get(k, 0):.1f})" 
                        for k in edge_labels}
            nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_text, 
                                       font_size=6, font_color='red')
        
        plt.title("Entity-Relation Graph with Confidence Scores", fontsize=16)
        plt.axis('off')
        plt.tight_layout()
        
        if output_file:
            plt.savefig(output_file, dpi=300, bbox_inches='tight')
            logger.info(f"Graph saved to {output_file}")
        
        # plt.show()
        plt.close()  # Prevent auto-display in Jupyter

        return True
        
    except Exception as e:
        logger.error(f"Error creating visualization: {e}")
        return False

# CODE

In [21]:
def parse_json_response(response: str) -> Optional[Dict[str, Any]]:
    """Parse JSON response with better error handling."""
    if not response:
        return None
    
    try:
        # Try to find JSON in the response
        response = response.strip()
        if response.startswith('```json'):
            response = response[7:]
        if response.endswith('```'):
            response = response[:-3]
        response = response.strip()
        
        return json.loads(response)
    except json.JSONDecodeError as e:
        logger.error(f"Failed to parse JSON response: {e}")
        logger.debug(f"Response content: {response[:500]}...")
        return None

In [22]:
def parse_entities_and_links(folder: str, prompt_template: str, db_connector: SQLiteGraphConnector = None) -> List[Dict[str, Any]]:
    """Process folder with improved error handling and database storage."""
    results = []
    data_path = f"./data/{folder}"
    
    if not os.path.exists(data_path):
        logger.error(f"Data folder does not exist: {data_path}")
        return results
    
    files = glob.glob(f"{data_path}/*")
    if not files:
        logger.warning(f"No files found in {data_path}")
        return results
    
    logger.info(f"Processing {len(files)} files in {folder}")
    system_msg = "You are an expert in extracting structured data from unstructured text documents. Return valid JSON only."
    
    for file_path in files:
        try:
            logger.info(f"Processing file: {file_path}")
            
            with open(file_path, "r", encoding='utf-8') as f:
                content = f.read().strip()
                
            if not content:
                logger.warning(f"Empty file: {file_path}")
                continue
                
            prompt = Template(prompt_template).substitute(ctext=content)
            response = run_llm_query(prompt, system_msg)
            
            if not response:
                logger.error(f"No response from LLM for file: {file_path}")
                continue
                
            parsed = parse_json_response(response)
            if parsed and validate_graph_data(parsed):
                results.append(parsed)
                
                # Store in graph database if connector provided
                if db_connector:
                    if store_in_graph_database(parsed, db_connector):
                        logger.info(f"Successfully stored data from {file_path} in graph database")
                    else:
                        logger.error(f"Failed to store data from {file_path} in graph database")
                
                # Create output filename for graph
                base_name = os.path.splitext(os.path.basename(file_path))[0]
                output_file = f"./output/graph_{folder}_{base_name}.png"
                os.makedirs('./output', exist_ok=True)
                visualize_graph(parsed, output_file)
            else:
                logger.error(f"Invalid data structure in file: {file_path}")
                
        except Exception as e:
            logger.error(f"Error processing {file_path}: {e}")
            continue
    
    logger.info(f"Successfully processed {len(results)} files from {folder}")
    return results

In [23]:
def graph_ingestion_process(folders: Dict[str, str], db_connector: SQLiteGraphConnector = None) -> List[Dict[str, Any]]:
    """Main processing function with database integration."""
    all_results = []
    
    for folder, prompt in folders.items():
        logger.info(f"Starting processing for folder: {folder}")
        folder_results = parse_entities_and_links(folder, prompt, db_connector)
        all_results.extend(folder_results)
        logger.info(f"Completed processing for folder: {folder}. Found {len(folder_results)} valid results.")
    
    return all_results

# PROMPT TEMPLATES

In [25]:
# Improved prompt templates with clearer instructions
project_prompt_template = """
Analyze the following Project Brief and extract entities and relationships. Return ONLY valid JSON in this exact format:

{
  "entities": [
    {"id": "unique_id", "label": "Project", "name": "project_name", "summary": "brief_summary"},
    {"id": "unique_id", "label": "Technology", "name": "tech_name"},
    {"id": "unique_id", "label": "Client", "name": "client_name", "industry": "industry_name"},
    {"id": "unique_id", "label": "Risk", "description": "risk_description"}
  ],
  "relationships": [
    "project_id|USES_TECH|technology_id",
    "project_id|HAS_CLIENT|client_id",
    "project_id|HAS_RISK|risk_id"
  ]
}

Project Brief:
$ctext
"""

people_prompt_template = """
Extract information from the text and return ONLY valid JSON in this exact format:

{
  "entities": [
    {"id": "unique_id", "label": "Person", "name": "person_name"},
    {"id": "unique_id", "label": "Project", "name": "project_name", "summary": "brief_summary"},
    {"id": "unique_id", "label": "Technology", "name": "tech_name"}
  ],
  "relationships": [
    "person_id|HAS_SKILLS|technology_id",
    "project_id|HAS_PEOPLE|person_id",
    "person_id|HAS_ROLE|project_id"
  ]
}

People Profiles:
$ctext
"""

slack_prompt_template = """
Extract information from Slack messages and return ONLY valid JSON in this exact format:

{
  "entities": [
    {"id": "unique_id", "label": "Person", "name": "person_name"},
    {"id": "unique_id", "label": "SlackMessage", "text": "message_text"},
    {"id": "unique_id", "label": "Channel", "name": "channel_name"}
  ],
  "relationships": [
    "person_id|SENT|message_id",
    "message_id|POSTED_IN|channel_id",
    "message_id|MENTIONS|person_id"
  ]
}

Slack Messages:
$ctext
"""

# Configuration
folders = {
    "people_profiles": people_prompt_template,
    "project_briefs": project_prompt_template,
    "slack_messages": slack_prompt_template,
}

In [26]:
# Main execution
if __name__ == "__main__":
    try:
        logger.info("Starting graph ingestion process with SQLite database")
        
        # Initialize SQLite connector
        db_connector = SQLiteGraphConnector("graph_data.db")
        
        # Connect to database
        if not db_connector.connect():
            logger.error("Failed to connect to SQLite database. Exiting.")
            exit(1)
        
        # Process files and store in database
        entities_relationships = graph_ingestion_process(folders, db_connector)
        logger.info(f"Process completed. Total results: {len(entities_relationships)}")
        
        # Save combined results
        output_path = "./output/combined_results.json"
        os.makedirs('./output', exist_ok=True)
        with open(output_path, 'w', encoding='utf-8') as f:
            json.dump(entities_relationships, f, indent=2, ensure_ascii=False)
        logger.info(f"Combined results saved to {output_path}")
        
        # Run analytics
        analytics = GraphAnalytics(db_connector)
        run_analytics_demo(analytics)
        
        # Example queries specific to SQLite
        logger.info("Running SQLite-specific example queries...")
        
        # Find projects and their technologies
        cursor = db_connector.connection.cursor()
        cursor.execute("""
            SELECT e1.id, JSON_EXTRACT(e1.properties, '$.name') as project_name,
                   e2.id as tech_id, JSON_EXTRACT(e2.properties, '$.name') as tech_name
            FROM entities e1
            JOIN relationships r ON e1.id = r.source
            JOIN entities e2 ON r.target = e2.id
            WHERE e1.label = 'Project' AND e2.label = 'Technology' AND r.type = 'USES_TECH'
            LIMIT 10
        """)
        project_tech = [dict(row) for row in cursor.fetchall()]
        logger.info(f"Project-Technology relationships: {project_tech}")
        
        # Close database connection
        db_connector.close()
        
    except Exception as e:
        logger.error(f"Fatal error in main execution: {e}")
        raise

2025-06-04 23:59:54,829 - INFO - Starting graph ingestion process with SQLite database
2025-06-04 23:59:54,857 - INFO - Connected to SQLite database: graph_data.db
2025-06-04 23:59:54,858 - INFO - Starting processing for folder: people_profiles
2025-06-04 23:59:54,859 - INFO - Processing 2 files in people_profiles
2025-06-04 23:59:54,860 - INFO - Processing file: ./data/people_profiles\ex1.txt
2025-06-04 23:59:55,898 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
2025-06-04 23:59:58,124 - INFO - Successfully stored data from ./data/people_profiles\ex1.txt in graph database
2025-06-04 23:59:59,731 - INFO - Graph saved to ./output/graph_people_profiles_ex1.png
2025-06-04 23:59:59,732 - INFO - Processing file: ./data/people_profiles\ex2.txt
2025-06-05 00:00:00,538 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
2025-06-05 00:00:01,555 - ERROR - Invalid entity structure: {'id': 'AWSSageMaker', 'nam