[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Hawksight-AI/semantica/blob/main/cookbook/use_cases/finance/03_Earnings_Call_Analysis.ipynb)

# Earnings Call Analysis with Docling and Semantica: MDA Space Q3 2025

##  Data Sources

This analysis dual-tracks two critical documents from **MDA Space Ltd.** for the third quarter of 2025 to provide a holistic view of the company's performance and strategy:
1. **Press Release:** A structured summary of financial results, strategic milestones, and executive commentary.
2. **Earnings Transcript:** A detailed record of the management presentation and the subsequent Q&A session with analysts, providing deeper qualitative context.

###  Q3 2025 Strategic Highlights
* **Robust Backlog:** The company maintained a significant backlog at quarter-end, providing strong revenue visibility and supporting long-term growth objectives.
* **Strong Revenue Growth:** Substantial year-over-year revenue expansion driven by successful execution across major programs and satellite system deliveries.
* **Profitability & Margins:** Strong operational performance led to a significant increase in Adjusted EBITDA and healthy overall margin expansion.
* **Earnings Performance:** Noteworthy growth in adjusted net income and earnings per share, reflecting scaled operations and improved bottom-line efficiency.
* **Cash Flow & Capital Structure:** Positive operating cash flow and a healthy balance sheet with a very low leverage ratio, positioning the company well for future investment.

---

## Overview

This notebook demonstrates how to extract deep insights from unstructured financial documents. By combining **Docling** for high-fidelity parsing with **Semantica** for knowledge engineering, we build a structured semantic layer that enables complex analysis and advanced GraphRAG capabilities.

**Workflow:** `Dual PDF Input → Docling Parsing → Entity & Relation Extraction → Knowledge Graph Construction → GraphRAG → Strategic Q&A`

In [None]:
!pip install -qU semantica docling pdfplumber groq


In [None]:
# Initialize Groq LLM provider
from semantica.llms import Groq
import os

GROQ_API_KEY = os.getenv("GROQ_API_KEY", "")

if not GROQ_API_KEY:
    try:
        from google.colab import userdata
        GROQ_API_KEY = userdata.get("GROQ_API_KEY", "")
    except ImportError:
        pass

if not GROQ_API_KEY:
    raise ValueError("GROQ_API_KEY not found. Please set it as an environment variable or update this cell.")

os.environ["GROQ_API_KEY"] = GROQ_API_KEY
    
groq_llm = Groq(
    model="llama-3.1-8b-instant",
    api_key=GROQ_API_KEY
)

print(f"✓ Groq LLM initialized: {groq_llm.model}")


## Step 1: Parse PDF with Docling

Parse earnings call PDF and extract financial tables using DoclingParser.


In [None]:
import requests
from pathlib import Path
from semantica.parse import DoclingParser

# Initialize DoclingParser with default settings
# Optional: configure with export_format="html" or enable_ocr=True if needed
parser = DoclingParser()

# PDF URLs for MDA Space Q3 2025 earnings documents
press_release_url = "https://filecache.investorroom.com/mr5ircnw_mda/677/MDA_Space_Ltd_Q3_2025_Press_Release_Nov_14_2025_FINAL.pdf"
transcript_url = "https://filecache.investorroom.com/mr5ircnw_mda/681/MDA%20Space%20Ltd.%20Q3%202025%20Earnings%20Conference%20Call%20Transcript%20%28November%2014%202025%29.pdf"

# Setup download directory
download_dir = Path("downloads")
download_dir.mkdir(exist_ok=True)

press_release_pdf = download_dir / "mda_space_q3_2025_press_release.pdf"
transcript_pdf = download_dir / "mda_space_q3_2025_transcript.pdf"

# Download PDFs if they don't exist
if not press_release_pdf.exists():
    press_release_pdf.write_bytes(requests.get(press_release_url).content)

if not transcript_pdf.exists():
    transcript_pdf.write_bytes(requests.get(transcript_url).content)

# Parse documents using DoclingParser
try:
    press_release = parser.parse(press_release_pdf)
    transcript = parser.parse(transcript_pdf)
except Exception as e:
    print(f"⚠️ Parsing failed: {e}")
    print("Using fallback empty documents for demonstration.")
    press_release = {"full_text": "", "tables": []}
    transcript = {"full_text": "", "tables": []}

# Combine parsed documents
parsed_doc = {
    "full_text": f"# Press Release\n\n{press_release['full_text']}\n\n# Transcript\n\n{transcript['full_text']}",
    "tables": press_release['tables'] + transcript['tables'],
    "metadata": {
        "title": "MDA Space Ltd. Q3 2025 Earnings Analysis",
        "company": "MDA Space Ltd.",
        "quarter": "Q3 2025",
        "date": "November 14, 2025"
    }
}

print(f"✓ Parsed {len(parsed_doc['tables'])} tables from {len(press_release['tables']) + len(transcript['tables'])} documents")


## Step 2: Normalize Text

Normalize extracted text using TextNormalizer for consistent processing.


In [None]:
# Step 2: Normalize full document text (run ONCE)
from semantica.normalize import TextNormalizer
# Initialize normalizer
normalizer = TextNormalizer()

normalized_text = normalizer.normalize(
    parsed_doc["full_text"],
    clean_html=False,
    remove_extra_whitespace=False,
    lowercase=False        # preserve casing for entities & finance terms
)

print(f"Text normalized: {len(normalized_text)} characters")



## Step 3: Split Text into Chunks

Split the normalized text into overlapping chunks to enable scalable and accurate entity and relation extraction.
This step prepares the text for LLM-based semantic processing.

In [None]:
from semantica.split import TextSplitter
CHUNK_SIZE = 1000
CHUNK_OVERLAP = 250

splitter = TextSplitter(
    method="recursive",   # safest for long PDFs
    chunk_size=CHUNK_SIZE,
    chunk_overlap=CHUNK_OVERLAP
)

chunks = splitter.split(normalized_text)

print(f"✓ Created {len(chunks)} chunks")

# Version-safe access to chunk text
def get_chunk_text(chunk):
    return getattr(chunk, "content", getattr(chunk, "text", ""))

# Inspect one chunk
print("Sample chunk:\n", get_chunk_text(chunks[0])[:])

## Step 4: Extract Entities

Extract entities (organizations, people, financial terms) using NERExtractor with Groq LLM.


In [None]:
# Step 4: Extract entities from ALL chunks using NERExtractor (Groq)

from semantica.semantic_extract import NERExtractor
import os

# Initialize NER extractor
ner = NERExtractor(
    method="llm",
    provider="groq",
    llm_model="llama-3.1-8b-instant",
    min_confidence=0.5,
    temperature=0.0,
    api_key=os.getenv("GROQ_API_KEY")
)

entity_types = [
    "ORGANIZATION", "ORG", "PERSON", "MONEY", "CURRENCY",
    "PERCENT", "PERCENTAGE", "DATE", "TIME", "PRODUCT",
    "LOCATION", "GPE", "EVENT", "QUANTITY", "CARDINAL"
]

# Version-safe chunk text accessor
def get_chunk_text(chunk):
    return getattr(chunk, "content", getattr(chunk, "text", ""))

all_entities = []

for i, chunk in enumerate(chunks, 1):
    text = get_chunk_text(chunk)

    if not text.strip():
        continue

    try:
        entities = ner.extract_entities(
            text,
            entity_types=entity_types
        )
        all_entities.extend(entities)

    except Exception as e:
        print(f"⚠️ Chunk {i} failed: {e}")

    if i % 10 == 0 or i == len(chunks):
        print(f"Processed {i}/{len(chunks)} chunks")

print(f"✓ Total entities extracted: {len(all_entities)}")


## Step 5: Extract Financial Metrics

Extract financial metrics (money, percentages, dates) from text and tables.


In [None]:
# Step 5: Extract financial metrics from ALL chunks (explicit + safe)

financial_entity_types = [
    "MONEY", "CURRENCY", "PERCENT", "PERCENTAGE",
    "QUANTITY", "CARDINAL"
]

def get_chunk_text(chunk):
    return getattr(chunk, "content", getattr(chunk, "text", ""))

financial_entities = []

total_chunks = len(chunks)
print(f"Processing {total_chunks} chunks for financial entities...")

for idx, chunk in enumerate(chunks, start=1):
    text = get_chunk_text(chunk)

    # Skip empty chunks but still count them
    if not text.strip():
        print(f"  Skipping empty chunk {idx}/{total_chunks}")
        continue

    # ALWAYS run NER per chunk
    entities = ner.extract_entities(
        text,
        entity_types=financial_entity_types
    )

    financial_entities.extend(entities)

    if idx % 10 == 0 or idx == total_chunks:
        print(f"  Processed {idx}/{total_chunks} chunks")

# Aggregate results
financial_metrics = {"money": [], "percentages": [], "quantities": []}

for e in financial_entities:
    label = e.label.lower()
    if "money" in label or "currency" in label:
        financial_metrics["money"].append(e.text)
    elif "percent" in label:
        financial_metrics["percentages"].append(e.text)
    elif "quantity" in label or "cardinal" in label:
        financial_metrics["quantities"].append(e.text)

print(f"✓ Financial entity mentions extracted: {len(financial_entities)}")
print(f"  Money/Currency: {len(financial_metrics['money'])}")
print(f"  Percentages: {len(financial_metrics['percentages'])}")
print(f"  Quantities: {len(financial_metrics['quantities'])}")

if financial_entities:
    sample = financial_entities[0]
    print(f"  Sample: {sample.text} ({sample.label})")


## Step 5: Extract Relationships

Extract relationships between entities using RelationExtractor with Groq LLM.


In [None]:
# Step 6: Extract relationships from all chunks (minimal handling)

from semantica.semantic_extract import RelationExtractor
import os

relation_extractor = RelationExtractor(
    method="llm",
    confidence_threshold=0.5,
    relation_types=[
        "HAS_REVENUE", "HAS_EPS", "HAS_MARGIN", "HAS_PROFIT", "HAS_GROWTH",
        "PROVIDES_GUIDANCE", "STATES", "ANNOUNCES", "REPORTS", "EXPECTS",
        "OPERATES_IN", "LOCATED_IN", "PARTNERS_WITH", "SERVES",
        "COMPARED_TO", "INCREASED_BY", "DECREASED_BY", "CHANGED_BY",
        "DURING", "IN_QUARTER", "FOR_PERIOD",
        "RELATED_TO", "PART_OF", "AFFECTS"
    ],
    api_key=os.getenv("GROQ_API_KEY")
)

def get_chunk_text(chunk):
    return getattr(chunk, "content", getattr(chunk, "text", ""))

relationships = []

for chunk in chunks:
    text = get_chunk_text(chunk)
    if not text.strip():
        continue

    relationships.extend(
        relation_extractor.extract_relations(
            text,
            entities=all_entities,
            provider="groq",
            llm_model="llama-3.1-8b-instant",
            temperature=0.0
        )
    )

print(f"✓ Relationships extracted: {len(relationships)}")

if relationships:
    sample = relationships[0]
    print(f"Sample: {sample.subject.text} → {sample.predicate} → {sample.object.text}")


## Step 6: Extract RDF Triplets

Extract RDF triplets (subject-predicate-object) using TripletExtractor with Groq LLM.


In [None]:
# Step 7: Extract RDF triplets from all chunks (minimal handling)

from semantica.semantic_extract import TripletExtractor
import os

triplet_extractor = TripletExtractor(
    method="llm",
    include_temporal=True,
    include_provenance=True,
    provider="groq",
    llm_model="llama-3.1-8b-instant",
    temperature=0.0,
    api_key=os.getenv("GROQ_API_KEY")
)

def get_chunk_text(chunk):
    return getattr(chunk, "content", getattr(chunk, "text", ""))

triplets = []

for chunk in chunks:
    text = get_chunk_text(chunk)
    if not text.strip():
        continue

    triplets.extend(
        triplet_extractor.extract_triplets(
            text,
            entities=all_entities,
            relations=relationships if relationships else None
        )
    )

# Optional validation (if available)
validated_triplets = (
    triplet_extractor.validate_triplets(triplets)
    if hasattr(triplet_extractor, "validate_triplets")
    else triplets
)

print(f"✓ RDF triplets extracted: {len(validated_triplets)}")

if validated_triplets:
    t = validated_triplets[0]
    print(f"Sample: {t.subject} → {t.predicate} → {t.object}")

## Step 7: Detect Conflicts

Detect conflicts in extracted entities and relationships using ConflictDetector.


In [None]:
# Step 7: Detect conflicts in extracted entities and relationships
from semantica.conflicts import ConflictDetector, SourceTracker, SourceReference

source_tracker = SourceTracker()
conflict_detector = ConflictDetector(
    source_tracker=source_tracker,
    confidence_threshold=0.7
)

# Track sources for entities
for entity in all_entities:
    entity_id = getattr(entity, 'id', None) or getattr(entity, 'text', '')
    entity_name = getattr(entity, 'text', '')
    source_tracker.track_property_source(
        entity_id,
        'name',
        entity_name,
        source=SourceReference(
            source='earnings_call',
            timestamp='2024-Q1',
            metadata={'entity_type': getattr(entity, 'label', 'UNKNOWN')}
        )
    )

# Detect value conflicts
value_conflicts = conflict_detector.detect_value_conflicts(
    [{'id': getattr(e, 'id', ''), 'name': getattr(e, 'text', '')} for e in all_entities],
    property_name='name'
)

# Detect relationship conflicts
relationship_conflicts = conflict_detector.detect_relationship_conflicts(relationships)

print(f"✓ Conflicts detected")
print(f"  Value conflicts: {len(value_conflicts)}")
print(f"  Relationship conflicts: {len(relationship_conflicts)}")


## Step 8: Resolve Conflicts

Resolve detected conflicts using ConflictResolver with voting strategy.


In [None]:
# Step 8: Resolve conflicts using ConflictResolver
from semantica.conflicts import ConflictResolver

conflict_resolver = ConflictResolver(
    default_strategy='voting',
    source_tracker=source_tracker
)

# Resolve value conflicts
resolved_entities = list(all_entities)
resolved_conflicts = []
for conflict in value_conflicts:
    resolution = conflict_resolver.resolve_conflict(conflict, strategy='voting')
    resolved_conflicts.append(resolution)

# Resolve relationship conflicts
resolved_relationships = list(relationships)
for conflict in relationship_conflicts:
    resolution = conflict_resolver.resolve_conflict(conflict, strategy='voting')
    resolved_conflicts.append(resolution)

print(f"✓ Conflicts resolved: {len(resolved_conflicts)}")


## Step 9: Deduplicate Entities

Detect and merge duplicate entities using DuplicateDetector and EntityMerger.


In [None]:
# Step 9: Deduplicate entities using DuplicateDetector and EntityMerger
from semantica.deduplication import DuplicateDetector, EntityMerger

duplicate_detector = DuplicateDetector(
    similarity_threshold=0.8,
    confidence_threshold=0.7
)

# Convert entities to dict format
entity_dicts = []
for entity in resolved_entities:
    entity_dicts.append({
        'id': getattr(entity, 'id', ''),
        'name': getattr(entity, 'text', ''),
        'type': getattr(entity, 'label', 'UNKNOWN'),
        'confidence': getattr(entity, 'confidence', 1.0),
        'metadata': getattr(entity, 'metadata', {})
    })

# Detect duplicates
duplicates = duplicate_detector.detect_duplicates(entity_dicts)

# Merge duplicates
entity_merger = EntityMerger(preserve_provenance=True)
merge_operations = entity_merger.merge_duplicates(
    entity_dicts,
    strategy='keep_most_complete'
)

merged_entities = [op.merged_entity for op in merge_operations]

print(f"✓ Deduplication complete")
print(f"  Original entities: {len(entity_dicts)}")
print(f"  Merged entities: {len(merged_entities)}")
print(f"  Duplicates removed: {len(entity_dicts) - len(merged_entities)}")


## Step 10: Build Knowledge Graph

Build knowledge graph from cleaned entities, relationships, and triplets using GraphBuilder.


In [None]:
# Step 10: Build knowledge graph from cleaned entities, resolved relationships, and triplets
from semantica.kg import GraphBuilder

graph_builder = GraphBuilder(
    merge_entities=True,
    entity_resolution_strategy="fuzzy"
)

# Convert triplets to relationships format
triplet_relationships = []
for triplet in validated_triplets:
    triplet_relationships.append({
        "source": triplet.subject,
        "predicate": triplet.predicate,
        "target": triplet.object,
        "confidence": triplet.confidence,
        "metadata": triplet.metadata
    })

all_relationships = resolved_relationships + triplet_relationships

kg_data = {
    "entities": merged_entities,
    "relationships": all_relationships,
    "triplets": validated_triplets,
    "metadata": {
        "source": "earnings_call_transcript",
        "financial_metrics": financial_metrics,
        "extraction_method": "Groq LLM"
    }
}

knowledge_graph = graph_builder.build(
    sources=[kg_data],
    merge_entities=True
)

print(f"✓ Knowledge graph built")
print(f"  Entities: {len(knowledge_graph.get('entities', []))}")
print(f"  Relationships: {len(knowledge_graph.get('relationships', []))}")


## Step 11: Analyze Knowledge Graph

Analyze graph structure using GraphAnalyzer (centrality, communities, connectivity).


In [None]:
# Step 11: Analyze knowledge graph using GraphAnalyzer
from semantica.kg import GraphAnalyzer

graph_analyzer = GraphAnalyzer()
analysis = graph_analyzer.analyze_graph(knowledge_graph)
centrality = graph_analyzer.calculate_centrality(knowledge_graph, 'degree')
communities = graph_analyzer.detect_communities(knowledge_graph, algorithm='louvain')
connectivity = graph_analyzer.analyze_connectivity(knowledge_graph)
metrics = graph_analyzer.compute_metrics(knowledge_graph)

top_entities = []
if centrality and 'rankings' in centrality:
    top_entities = centrality['rankings'][:5]

num_communities = len(communities.get('communities', [])) if isinstance(communities, dict) else 0

print(f"✓ Graph analysis complete")
print(f"  Communities: {num_communities}")
print(f"  Top entities: {len(top_entities)}")


## Step 12: Build Context Graph

Build ContextGraph from knowledge graph for enhanced retrieval and GraphRAG.


In [None]:
# Step 12: Build context graph for enhanced retrieval
from semantica.context import ContextGraph

context_graph = ContextGraph(
    extract_entities=True,
    extract_relationships=True
)

# Convert knowledge graph to context graph format
nodes = []
for entity in knowledge_graph.get('entities', []):
    nodes.append({
        "id": entity.get('id', entity.get('name', '')),
        "type": entity.get('type', 'entity'),
        "properties": {
            "content": entity.get('name', ''),
            "confidence": entity.get('confidence', 1.0),
            **entity.get('metadata', {})
        }
    })

edges = []
for rel in knowledge_graph.get('relationships', []):
    edges.append({
        "source_id": rel.get('source', ''),
        "target_id": rel.get('target', ''),
        "type": rel.get('predicate', 'related_to'),
        "weight": rel.get('confidence', 1.0)
    })

node_count = context_graph.add_nodes(nodes)
edge_count = context_graph.add_edges(edges)

print(f"✓ Context graph built")
print(f"  Nodes: {node_count}")
print(f"  Edges: {edge_count}")


## Step 13: Context Retrieval

Set up hybrid retrieval (vector + graph) using ContextRetriever for GraphRAG queries.


In [None]:
# Step 13: Set up hybrid context retrieval and demonstrate GraphRAG
from semantica.vector_store import VectorStore
from semantica.context import ContextRetriever

# Initialize VectorStore
vector_store = VectorStore(backend="faiss")
vector_store.add(
    texts=[parsed_doc["full_text"]],
    metadata=[{"source": "earnings_call", "type": "transcript"}]
)

# Initialize ContextRetriever
context_retriever = ContextRetriever(
    knowledge_graph=context_graph,
    vector_store=vector_store,
    hybrid_alpha=0.6,
    use_graph_expansion=True,
    max_expansion_hops=2
)

# Retrieve context for financial queries
financial_queries = [
    "What was the company's revenue guidance?",
    "What were the key financial metrics discussed?"
]

retrieved_contexts = []
for query in financial_queries:
    results = context_retriever.retrieve(
        query=query,
        max_results=3,
        min_relevance_score=0.2
    )
    retrieved_contexts.append({
        "query": query,
        "results": results,
        "count": len(results)
    })

print(f"✓ Hybrid retrieval configured")
print(f"  Queries processed: {len(retrieved_contexts)}")


## Step 14: Entity Linking

Link entities across sources and assign URIs using EntityLinker.


In [None]:
# Step 14: Link entities using EntityLinker
from semantica.context import EntityLinker

entity_linker = EntityLinker(knowledge_graph=knowledge_graph)

# Assign URIs to key entities
linked_entities = []
for entity in merged_entities[:10]:
    entity_id = entity.get('id', entity.get('name', ''))
    entity_name = entity.get('name', '')
    entity_type = entity.get('type', 'UNKNOWN')
    
    uri = entity_linker.assign_uri(
        entity_id=entity_id,
        text=entity_name,
        entity_type=entity_type
    )
    linked_entities.append({
        "entity_id": entity_id,
        "name": entity_name,
        "uri": uri,
        "type": entity_type
    })

# Build entity web
entity_web = entity_linker.build_entity_web()

print(f"✓ Entity linking complete")
print(f"  Entities linked: {len(linked_entities)}")
print(f"  Entity web nodes: {len(entity_web.get('nodes', []))}")


## Step 15: Agent Memory

Store and retrieve memories using AgentMemory with RAG integration.


In [None]:
# Step 15: Store and retrieve memories using AgentMemory
from semantica.context import AgentMemory

agent_memory = AgentMemory(
    vector_store=vector_store,
    knowledge_graph=knowledge_graph,
    retention_days=30
)

# Store earnings call memories
memory_ids = []
memory_contents = [
    f"Earnings call transcript: {parsed_doc['metadata'].get('title', 'Q1 2024')}",
    f"Financial metrics extracted: {sum(len(v) for v in financial_metrics.values())} metrics",
    f"Key entities identified: {len(merged_entities)} entities"
]

for content in memory_contents:
    memory_id = agent_memory.store(
        content=content,
        metadata={"source": "earnings_call", "type": "transcript_analysis"},
        extract_entities=True,
        extract_relationships=True
    )
    memory_ids.append(memory_id)

# Retrieve memories
financial_memories = agent_memory.retrieve(
    query="financial metrics and earnings",
    max_results=5
)

memory_stats = agent_memory.get_statistics()

print(f"✓ Agent memory configured")
print(f"  Memories stored: {len(memory_ids)}")
print(f"  Total memories: {memory_stats.get('total_memories', 0)}")


## Step 16: Agent Context

Unified context management with AgentContext (auto-detects RAG vs GraphRAG).


In [None]:
# Step 16: High-level context management with AgentContext
from semantica.context import AgentContext

agent_context = AgentContext(
    vector_store=vector_store,
    knowledge_graph=context_graph,
    use_graph_expansion=True,
    max_expansion_hops=2,
    hybrid_alpha=0.6,
    retention_days=30
)

# Store content with auto-extraction
memory_id = agent_context.store(
    content=parsed_doc["full_text"][:1000],
    metadata={"source": "earnings_call", "date": "2024-Q1"},
    extract_entities=True,
    extract_relationships=True,
    link_entities=True
)

# Retrieve with auto-detected GraphRAG
graphrag_results = agent_context.retrieve(
    query="What was discussed about revenue growth?",
    max_results=5,
    expand_graph=True,
    include_entities=True
)

context_stats = agent_context.stats()

print(f"✓ AgentContext configured")
print(f"  Memory stored: {memory_id}")
print(f"  GraphRAG results: {len(graphrag_results)}")
print(f"  Total memories: {context_stats.get('total_memories', 0)}")


## Step 17: Answer Generation

Generate answers to financial questions using Groq LLM with retrieved context and knowledge graph.


## Step 18: Export Results

Export knowledge graph and analysis results to JSON and RDF formats.


In [None]:
# Step 17: Generate answers using Groq LLM from semantica.llms module
financial_questions = [
    "What were the key financial metrics discussed in the earnings call?",
    "What guidance did management provide for future quarters?"
]

generated_answers = []
for question in financial_questions:
    # Retrieve relevant context
    context_results = context_retriever.retrieve(
        query=question,
        max_results=3,
        min_relevance_score=0.2
    )
    
    # Build context from retrieved results
    context_text = "\n\n".join([
        f"Context {i+1}: {result.get('content', result.get('text', ''))}"
        for i, result in enumerate(context_results[:3])
    ])
    
    # Extract relevant entities
    relevant_entities = [
        entity.get('name', '') for entity in knowledge_graph.get('entities', [])[:10]
    ]
    entities_text = ", ".join(relevant_entities[:5]) if relevant_entities else "N/A"
    
    # Build prompt
    prompt = f"""Based on the following earnings call transcript context and knowledge graph, answer the question.

Context from transcript:
{context_text[:1000]}

Key entities identified: {entities_text}

Question: {question}

Provide a comprehensive answer based on the context provided. If information is not available in the context, state that clearly.

Answer:"""
    
    # Generate answer using Groq LLM
    try:
        answer = groq_llm.generate(
            prompt,
            temperature=0.7,
            max_tokens=500
        )
        generated_answers.append({
            "question": question,
            "answer": answer,
            "context_sources": len(context_results),
            "model": groq_llm.model
        })
    except Exception as e:
        generated_answers.append({
            "question": question,
            "answer": f"Error generating answer: {str(e)}",
            "context_sources": len(context_results),
            "model": groq_llm.model
        })

print(f"✓ Answer generation complete using Groq LLM")
print(f"  LLM Provider: Groq ({groq_llm.model})")
print(f"  Questions answered: {len(generated_answers)}")
if generated_answers:
    print(f"  Sample question: '{generated_answers[0]['question']}'")


In [None]:
# Step 18: Export structured outputs including triplets
from semantica.export import JSONExporter, RDFExporter

json_exporter = JSONExporter()
rdf_exporter = RDFExporter()

# Export knowledge graph to JSON
kg_json = json_exporter.export(knowledge_graph, format="json")

# Export knowledge graph to RDF (Turtle format)
rdf_output = rdf_exporter.export_to_rdf(knowledge_graph, format="turtle")

# Create analysis summary
analysis_summary = {
    "financial_metrics": financial_metrics,
    "extraction_stats": {
        "entities": len(all_entities),
        "relationships": len(relationships),
        "triplets": len(triplets),
        "provider": f"Groq LLM (semantica.llms module) - {groq_llm.model}"
    },
    "conflict_resolution": {
        "conflicts_detected": len(value_conflicts) + len(relationship_conflicts),
        "conflicts_resolved": len(resolved_conflicts),
        "strategy": "voting"
    },
    "deduplication": {
        "original_entities": len(entity_dicts),
        "duplicates_detected": len(duplicates),
        "merged_entities": len(merged_entities),
        "strategy": "keep_most_complete"
    },
    "knowledge_graph": {
        "entities": len(knowledge_graph.get('entities', [])),
        "relationships": len(knowledge_graph.get('relationships', []))
    },
    "graph_analytics": {
        "metrics": metrics,
        "communities": num_communities,
        "top_entities": top_entities[:5] if top_entities else []
    },
    "context_graph": {
        "nodes": len(context_graph.nodes),
        "edges": len(context_graph.edges)
    },
    "context_retrieval": {
        "queries_processed": len(retrieved_contexts),
        "total_results": sum(c["count"] for c in retrieved_contexts)
    },
    "entity_linking": {
        "entities_linked": len(linked_entities),
        "entity_web_nodes": len(entity_web.get('nodes', [])),
        "entity_web_edges": len(entity_web.get('edges', []))
    },
    "agent_memory": {
        "memories_stored": len(memory_ids),
        "total_memories": memory_stats.get('total_memories', 0)
    },
    "agent_context": {
        "graphrag_results": len(graphrag_results),
        "total_memories": context_stats.get('total_memories', 0)
    },
    "answer_generation": {
        "questions_answered": len(generated_answers),
        "llm_provider": "Groq",
        "llm_model": groq_llm.model,
        "answers": [
            {
                "question": ans["question"],
                "answer_length": len(ans["answer"]),
                "context_sources": ans["context_sources"]
            }
            for ans in generated_answers
        ]
    }
}

print(f"✓ Export complete")
print(f"  Analysis summary: {len(analysis_summary)} sections")
print(f"  Knowledge graph (JSON): {len(kg_json) if isinstance(kg_json, dict) else 0} items")
print(f"  RDF (Turtle): {len(rdf_output)} characters")
print(f"  LLM answers generated: {len(generated_answers)}")
print(f"  LLM provider: Groq ({groq_llm.model})")
