From 661de91d1219b86cd11e93991cda6e0f024db6fe Mon Sep 17 00:00:00 2001 From: Hillary Mutisya <150286414+hillary-mutisya@users.noreply.github.com> Date: Wed, 29 Oct 2025 22:52:39 -0700 Subject: [PATCH 01/10] Enable graph layout calcualtion server-side. Use graphology. --- ts/packages/agents/browser/package.json | 5 + .../agent/knowledge/actions/graphActions.mts | 335 +++++++++++- .../knowledge/schema/knowledgeExtraction.mts | 2 +- .../agent/knowledge/utils/graphologyCache.mts | 196 +++++++ .../utils/graphologyLayoutEngine.mts | 491 ++++++++++++++++++ .../src/extension/views/entityGraphView.ts | 34 +- .../extension/views/entityGraphVisualizer.ts | 85 ++- .../extension/views/extensionServiceBase.ts | 7 + .../src/extension/views/graphDataProvider.ts | 56 +- .../src/extension/views/topicGraphView.html | 25 + .../src/extension/views/topicGraphView.ts | 86 ++- .../extension/views/topicGraphVisualizer.ts | 137 +++-- .../conversation/hierarchicalTopicSchema.ts | 2 +- .../src/conversation/hierarchicalTopics.ts | 31 +- .../memory/website/src/websiteCollection.ts | 137 +++-- ts/pnpm-lock.yaml | 114 ++++ 16 files changed, 1595 insertions(+), 148 deletions(-) create mode 100644 ts/packages/agents/browser/src/agent/knowledge/utils/graphologyCache.mts create mode 100644 ts/packages/agents/browser/src/agent/knowledge/utils/graphologyLayoutEngine.mts diff --git a/ts/packages/agents/browser/package.json b/ts/packages/agents/browser/package.json index a3e658b1b..feba53900 100644 --- a/ts/packages/agents/browser/package.json +++ b/ts/packages/agents/browser/package.json @@ -66,6 +66,11 @@ "dompurify": "^3.2.5", "express": "^4.18.2", "express-rate-limit": "^7.5.0", + "graphology": "^0.25.4", + "graphology-communities-louvain": "^2.0.1", + "graphology-layout": "^0.6.1", + "graphology-layout-forceatlas2": "^0.10.1", + "graphology-layout-noverlap": "^0.4.1", "html-to-text": "^9.0.5", "jsdom": "^26.1.0", "jsonpath": "^1.1.1", diff --git a/ts/packages/agents/browser/src/agent/knowledge/actions/graphActions.mts b/ts/packages/agents/browser/src/agent/knowledge/actions/graphActions.mts index f89757be1..d3025e37c 100644 --- a/ts/packages/agents/browser/src/agent/knowledge/actions/graphActions.mts +++ b/ts/packages/agents/browser/src/agent/knowledge/actions/graphActions.mts @@ -7,6 +7,19 @@ import { searchByEntities } from "../../searchWebMemories.mjs"; import { GraphCache, TopicGraphCache } from "../types/knowledgeTypes.mjs"; import { calculateTopicImportance } from "../utils/topicMetricsCalculator.mjs"; import { getPerformanceTracker } from "../utils/performanceInstrumentation.mjs"; +import { + buildGraphologyGraph, + convertToCytoscapeElements, + calculateLayoutQualityMetrics, + type GraphNode, + type GraphEdge, +} from "../utils/graphologyLayoutEngine.mjs"; +import { + getGraphologyCache, + setGraphologyCache, + createGraphologyCache, + invalidateAllGraphologyCaches, +} from "../utils/graphologyCache.mjs"; import registerDebug from "debug"; import { openai as ai } from "aiclient"; import { createJsonTranslator } from "typechat"; @@ -956,7 +969,80 @@ export async function getEntityNeighborhood( }, }; - return optimizedResult; + const cacheKey = `entity_neighborhood_${entityId}_${depth}_${maxNodes}`; + let cachedGraph = getGraphologyCache(cacheKey); + + if (!cachedGraph) { + debug("[Graphology] Building layout for entity neighborhood..."); + const layoutStart = performance.now(); + + const allEntities = [ + optimizedResult.centerEntity, + ...optimizedResult.neighbors, + ].filter((e) => e !== null); + + const graphNodes: GraphNode[] = allEntities.map((entity: any) => ({ + id: entity.id, + name: entity.name, + type: entity.type, + confidence: entity.confidence || 0.5, + count: entity.count || 1, + importance: entity.importance || entity.degree || 0, + })); + + const graphEdges: GraphEdge[] = optimizedResult.relationships.map( + (rel: any) => ({ + from: rel.fromEntity, + to: rel.toEntity, + type: rel.relationshipType, + confidence: rel.confidence || 0.5, + strength: rel.confidence || 0.5, + }), + ); + + const graph = buildGraphologyGraph(graphNodes, graphEdges, { + nodeLimit: maxNodes * 2, + minEdgeConfidence: 0.2, + denseClusterThreshold: 50, + forceAtlas2Iterations: 100, + noverlapIterations: 300, + }); + + const cytoscapeElements = convertToCytoscapeElements(graph, 1500); + const layoutMetrics = calculateLayoutQualityMetrics(graph); + const layoutDuration = performance.now() - layoutStart; + + cachedGraph = createGraphologyCache( + graph, + cytoscapeElements, + layoutDuration, + layoutMetrics.avgSpacing, + ); + + setGraphologyCache(cacheKey, cachedGraph); + + debug( + `[Graphology] Layout complete in ${layoutDuration.toFixed(2)}ms`, + ); + debug( + `[Graphology] Average node spacing: ${layoutMetrics.avgSpacing.toFixed(2)}`, + ); + } else { + debug("[Graphology] Using cached layout"); + } + + return { + ...optimizedResult, + metadata: { + ...optimizedResult.metadata, + graphologyLayout: { + elements: cachedGraph.cytoscapeElements, + layoutDuration: cachedGraph.metadata.layoutDuration, + avgSpacing: cachedGraph.metadata.avgSpacing, + communityCount: cachedGraph.metadata.communityCount, + }, + }, + }; } catch (error) { console.error("Error getting entity neighborhood:", error); return { @@ -1373,10 +1459,92 @@ export async function getGlobalImportanceLayer( size: entity.size, })); + // Build graphology layout for entities + const cacheKey = `entity_importance_${maxNodes}`; + let cachedGraph = getGraphologyCache(cacheKey); + + if (!cachedGraph) { + debug("[Graphology] Building layout for entity importance layer..."); + const layoutStart = performance.now(); + + const graphNodes: GraphNode[] = optimizedEntities.map( + (entity: any) => ({ + id: entity.id || entity.name, + name: entity.name, + type: entity.type || "entity", + confidence: entity.confidence || 0.5, + count: entity.count || 1, + importance: entity.importance || 0, + }), + ); + + const graphEdges: GraphEdge[] = optimizedRelationships.map( + (rel: any) => ({ + from: rel.fromEntity, + to: rel.toEntity, + type: rel.relationshipType, + confidence: rel.confidence || 0.5, + strength: rel.confidence || 0.5, + }), + ); + + const graph = buildGraphologyGraph(graphNodes, graphEdges, { + nodeLimit: maxNodes * 2, + minEdgeConfidence: 0.3, + denseClusterThreshold: 100, + }); + + const cytoscapeElements = convertToCytoscapeElements(graph, 2000); + const layoutMetrics = calculateLayoutQualityMetrics(graph); + const layoutDuration = performance.now() - layoutStart; + + cachedGraph = createGraphologyCache( + graph, + cytoscapeElements, + layoutDuration, + layoutMetrics.avgSpacing, + ); + + setGraphologyCache(cacheKey, cachedGraph); + + debug( + `[Graphology] Entity layout complete in ${layoutDuration.toFixed(2)}ms`, + ); + debug( + `[Graphology] Average node spacing: ${layoutMetrics.avgSpacing.toFixed(2)}`, + ); + } else { + debug("[Graphology] Using cached entity layout"); + } + + // Enrich entities with graphology colors and sizes + const enrichedEntities = optimizedEntities.map((entity: any) => { + const graphElement = cachedGraph!.cytoscapeElements.find( + (el: any) => el.data?.id === entity.id || el.data?.label === entity.name, + ); + if (graphElement?.data) { + return { + ...entity, + color: graphElement.data.color, + size: graphElement.data.size, + community: graphElement.data.community, + }; + } + return entity; + }); + return { - entities: optimizedEntities, + entities: enrichedEntities, relationships: optimizedRelationships, - metadata: metadata, + metadata: { + ...metadata, + graphologyLayout: { + elements: cachedGraph.cytoscapeElements, + layoutDuration: cachedGraph.metadata.layoutDuration, + avgSpacing: cachedGraph.metadata.avgSpacing, + communityCount: cachedGraph.metadata.communityCount, + }, + }, }; } catch (error) { console.error("Error getting global importance layer:", error); @@ -1790,10 +1958,47 @@ export function invalidateTopicCache(websiteCollection: any): void { lastUpdated: 0, isValid: false, }); + // Also clear the graphology layout cache + invalidateAllGraphologyCaches(); +} + +/** + * Action to invalidate topic cache from UI + */ +export async function invalidateTopicCacheAction( + parameters: {}, + context: SessionContext, +): Promise<{ success: boolean; message: string }> { + try { + const websiteCollection = context.agentContext.websiteCollection; + + if (!websiteCollection) { + return { + success: false, + message: "Website collection not available", + }; + } + + invalidateTopicCache(websiteCollection); + + return { + success: true, + message: "Topic cache cleared successfully. Reload the page to regenerate the graph.", + }; + } catch (error) { + console.error("Error invalidating topic cache:", error); + return { + success: false, + message: error instanceof Error ? error.message : "Unknown error", + }; + } } // Ensure topic graph data is cached for fast access async function ensureTopicGraphCache(websiteCollection: any): Promise { + // TEMPORARY: Always invalidate cache to ensure fresh data + invalidateTopicCache(websiteCollection); + const cache = getTopicGraphCache(websiteCollection); // Cache never expires - only invalidated on graph rebuild or knowledge import @@ -1854,6 +2059,27 @@ async function ensureTopicGraphCache(websiteCollection: any): Promise { ); } + // Calculate childCount for each topic + tracker.startOperation("ensureTopicGraphCache.calculateChildCounts"); + const childCountMap = new Map(); + for (const topic of topics) { + childCountMap.set(topic.topicId, 0); + } + for (const topic of topics) { + if (topic.parentTopicId) { + const currentCount = childCountMap.get(topic.parentTopicId) || 0; + childCountMap.set(topic.parentTopicId, currentCount + 1); + } + } + for (const topic of topics) { + topic.childCount = childCountMap.get(topic.topicId) || 0; + } + tracker.endOperation( + "ensureTopicGraphCache.calculateChildCounts", + topics.length, + topics.length, + ); + // Build relationships from parent-child structure tracker.startOperation("ensureTopicGraphCache.buildTopicRelationships"); let relationships = buildTopicRelationships(topics); @@ -2630,18 +2856,39 @@ export async function getTopicImportanceLayer( let lateralRelationships: any[] = []; if (websiteCollection.topicRelationships) { + // Fetch ALL lateral relationships (all types, min strength 0.3) const lateralRels = websiteCollection.topicRelationships.getRelationshipsForTopicsOptimized( selectedTopicIdsArray, 0.3, ); - lateralRelationships = lateralRels.map((rel: any) => ({ - from: rel.fromTopic, - to: rel.toTopic, - type: rel.relationshipType, - strength: rel.strength, - })); + // Build parent map to filter out sibling relationships + const parentMap = new Map(); + for (const topic of selectedTopics) { + if (topic.parentTopicId) { + parentMap.set(topic.topicId, topic.parentTopicId); + } + } + + // Filter out sibling relationships (topics with same parent) + lateralRelationships = lateralRels + .filter((rel: any) => { + const parentA = parentMap.get(rel.fromTopic); + const parentB = parentMap.get(rel.toTopic); + // Skip if both have same parent (siblings) + return !(parentA && parentB && parentA === parentB); + }) + .map((rel: any) => ({ + from: rel.fromTopic, + to: rel.toTopic, + type: rel.relationshipType, + strength: rel.strength, + })); + + debug( + `[Topic Graph] Fetched ${lateralRels.length} lateral relationships, kept ${lateralRelationships.length} after filtering siblings`, + ); } const selectedRelationships = [ @@ -2672,10 +2919,78 @@ export async function getTopicImportanceLayer( layer: "topic_importance", }; + const cacheKey = `topic_importance_${maxNodes}`; + let cachedGraph = getGraphologyCache(cacheKey); + + if (!cachedGraph) { + debug("[Graphology] Building layout for topic importance layer..."); + const layoutStart = performance.now(); + + const graphNodes: GraphNode[] = topicsWithMetrics.map( + (topic: any) => ({ + id: topic.topicId, + name: topic.topicName, + type: "topic", + confidence: topic.confidence || 0.5, + count: topic.descendantCount || 1, + importance: topic.importance || 0, + level: topic.level || 0, + parentId: topic.parentTopicId, + childCount: topic.childCount || 0, + }), + ); + + const graphEdges: GraphEdge[] = selectedRelationships.map( + (rel: any) => ({ + from: rel.from, + to: rel.to, + type: rel.type, + confidence: rel.strength || rel.confidence || 0.5, + strength: rel.strength || 0.5, + }), + ); + + const graph = buildGraphologyGraph(graphNodes, graphEdges, { + nodeLimit: maxNodes * 2, + minEdgeConfidence: 0.3, + denseClusterThreshold: 100, + }); + + const cytoscapeElements = convertToCytoscapeElements(graph, 2000); + const layoutMetrics = calculateLayoutQualityMetrics(graph); + const layoutDuration = performance.now() - layoutStart; + + cachedGraph = createGraphologyCache( + graph, + cytoscapeElements, + layoutDuration, + layoutMetrics.avgSpacing, + ); + + setGraphologyCache(cacheKey, cachedGraph); + + debug( + `[Graphology] Layout complete in ${layoutDuration.toFixed(2)}ms`, + ); + debug( + `[Graphology] Average node spacing: ${layoutMetrics.avgSpacing.toFixed(2)}`, + ); + } else { + debug("[Graphology] Using cached layout"); + } + return { topics: topicsWithMetrics, relationships: selectedRelationships, - metadata, + metadata: { + ...metadata, + graphologyLayout: { + elements: cachedGraph.cytoscapeElements, + layoutDuration: cachedGraph.metadata.layoutDuration, + avgSpacing: cachedGraph.metadata.avgSpacing, + communityCount: cachedGraph.metadata.communityCount, + }, + }, }; } catch (error) { console.error("Error getting topic importance layer:", error); diff --git a/ts/packages/agents/browser/src/agent/knowledge/schema/knowledgeExtraction.mts b/ts/packages/agents/browser/src/agent/knowledge/schema/knowledgeExtraction.mts index c776be683..7ccf81fdd 100644 --- a/ts/packages/agents/browser/src/agent/knowledge/schema/knowledgeExtraction.mts +++ b/ts/packages/agents/browser/src/agent/knowledge/schema/knowledgeExtraction.mts @@ -29,7 +29,7 @@ export interface HierarchicalTopic { level: number; parentId?: string; childIds: string[]; - sourceFragments: string[]; + sourceRefOrdinals: number[]; confidence: number; keywords: string[]; entityReferences: string[]; diff --git a/ts/packages/agents/browser/src/agent/knowledge/utils/graphologyCache.mts b/ts/packages/agents/browser/src/agent/knowledge/utils/graphologyCache.mts new file mode 100644 index 000000000..b55355428 --- /dev/null +++ b/ts/packages/agents/browser/src/agent/knowledge/utils/graphologyCache.mts @@ -0,0 +1,196 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +import { CytoscapeElement } from "./graphologyLayoutEngine.mjs"; +import registerDebug from "debug"; + +const debug = registerDebug("typeagent:browser:knowledge:graphology:cache"); + +type Graph = any; + +export interface GraphologyCache { + graph: Graph; + cytoscapeElements: CytoscapeElement[]; + metadata: { + nodeCount: number; + edgeCount: number; + communityCount: number; + layoutTimestamp: number; + layoutDuration: number; + avgSpacing: number; + }; + lastUpdated: number; + isValid: boolean; +} + +export interface CacheEntry { + key: string; + cache: GraphologyCache; + accessCount: number; + lastAccessed: number; +} + +class GraphologyCacheManager { + private caches: Map = new Map(); + private maxCacheSize: number = 50; + + setCacheEntry(key: string, cache: GraphologyCache): void { + debug(`Setting cache entry: ${key}`); + + if (this.caches.size >= this.maxCacheSize) { + this.evictLRU(); + } + + this.caches.set(key, { + key, + cache, + accessCount: 0, + lastAccessed: Date.now(), + }); + + debug( + `Cache size: ${this.caches.size}/${this.maxCacheSize} entries`, + ); + } + + getCacheEntry(key: string): GraphologyCache | null { + const entry = this.caches.get(key); + if (!entry) { + debug(`Cache miss: ${key}`); + return null; + } + + if (!entry.cache.isValid) { + debug(`Cache entry invalid: ${key}`); + this.caches.delete(key); + return null; + } + + entry.accessCount++; + entry.lastAccessed = Date.now(); + debug(`Cache hit: ${key} (access count: ${entry.accessCount})`); + + return entry.cache; + } + + invalidateCache(key: string): void { + const entry = this.caches.get(key); + if (entry) { + entry.cache.isValid = false; + debug(`Invalidated cache: ${key}`); + } + } + + invalidateAllCaches(): void { + debug("Invalidating all caches"); + for (const entry of this.caches.values()) { + entry.cache.isValid = false; + } + this.caches.clear(); + } + + private evictLRU(): void { + if (this.caches.size === 0) return; + + let oldestKey: string | null = null; + let oldestTime = Infinity; + + for (const [key, entry] of this.caches.entries()) { + if (entry.lastAccessed < oldestTime) { + oldestTime = entry.lastAccessed; + oldestKey = key; + } + } + + if (oldestKey) { + this.caches.delete(oldestKey); + debug(`Evicted LRU cache entry: ${oldestKey}`); + } + } + + getCacheStats(): { + size: number; + maxSize: number; + entries: Array<{ + key: string; + nodeCount: number; + accessCount: number; + lastAccessed: Date; + }>; + } { + return { + size: this.caches.size, + maxSize: this.maxCacheSize, + entries: Array.from(this.caches.values()).map((entry) => ({ + key: entry.key, + nodeCount: entry.cache.metadata.nodeCount, + accessCount: entry.accessCount, + lastAccessed: new Date(entry.lastAccessed), + })), + }; + } + + clearCache(): void { + debug("Clearing all caches"); + this.caches.clear(); + } +} + +const globalCacheManager = new GraphologyCacheManager(); + +export function getGraphologyCache(key: string): GraphologyCache | null { + return globalCacheManager.getCacheEntry(key); +} + +export function setGraphologyCache( + key: string, + cache: GraphologyCache, +): void { + globalCacheManager.setCacheEntry(key, cache); +} + +export function invalidateGraphologyCache(key: string): void { + globalCacheManager.invalidateCache(key); +} + +export function invalidateAllGraphologyCaches(): void { + globalCacheManager.invalidateAllCaches(); +} + +export function getGraphologyCacheStats(): ReturnType< + typeof globalCacheManager.getCacheStats +> { + return globalCacheManager.getCacheStats(); +} + +export function clearGraphologyCache(): void { + globalCacheManager.clearCache(); +} + +export function createGraphologyCache( + graph: Graph, + cytoscapeElements: CytoscapeElement[], + layoutDuration: number, + avgSpacing: number, +): GraphologyCache { + const communities = new Set(); + for (const node of graph.nodes()) { + const comm = graph.getNodeAttribute(node, "community") as number; + communities.add(comm); + } + + return { + graph, + cytoscapeElements, + metadata: { + nodeCount: graph.order, + edgeCount: graph.size, + communityCount: communities.size, + layoutTimestamp: Date.now(), + layoutDuration, + avgSpacing, + }, + lastUpdated: Date.now(), + isValid: true, + }; +} diff --git a/ts/packages/agents/browser/src/agent/knowledge/utils/graphologyLayoutEngine.mts b/ts/packages/agents/browser/src/agent/knowledge/utils/graphologyLayoutEngine.mts new file mode 100644 index 000000000..e369c2204 --- /dev/null +++ b/ts/packages/agents/browser/src/agent/knowledge/utils/graphologyLayoutEngine.mts @@ -0,0 +1,491 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +import { createRequire } from "module"; +import { circular } from "graphology-layout"; +import registerDebug from "debug"; + +const require = createRequire(import.meta.url); +const Graph = require("graphology"); +const louvain = require("graphology-communities-louvain"); +const forceAtlas2 = require("graphology-layout-forceatlas2"); +const noverlap = require("graphology-layout-noverlap"); + +const debug = registerDebug("typeagent:browser:knowledge:graphology"); + +type Graph = any; + +export interface GraphNode { + id: string; + name: string; + type?: string; + confidence?: number; + count?: number; + [key: string]: any; +} + +export interface GraphEdge { + from: string; + to: string; + type?: string; + confidence?: number; + strength?: number; + [key: string]: any; +} + +export interface CytoscapeElement { + data: { + id?: string; + source?: string; + target?: string; + name?: string; + type?: string; + confidence?: number; + importance?: number; + community?: number; + color?: string; + size?: number; + [key: string]: any; + }; + position?: { x: number; y: number }; +} + +export interface GraphologyLayoutOptions { + nodeLimit?: number; + minEdgeConfidence?: number; + denseClusterThreshold?: number; + forceAtlas2Iterations?: number; + noverlapIterations?: number; + targetViewportSize?: number; +} + +const DEFAULT_OPTIONS: Required = { + nodeLimit: 2000, + minEdgeConfidence: 0.3, + denseClusterThreshold: 100, + forceAtlas2Iterations: 150, + noverlapIterations: 500, + targetViewportSize: 2000, +}; + +const COMMUNITY_COLORS = [ + "#bf616a", + "#d08770", + "#ebcb8b", + "#a3be8c", + "#b48ead", + "#8fbcbb", + "#88c0d0", + "#81a1c1", + "#5e81ac", +]; + +export function buildGraphologyGraph( + nodes: GraphNode[], + edges: GraphEdge[], + options: GraphologyLayoutOptions = {}, +): Graph { + const opts = { ...DEFAULT_OPTIONS, ...options }; + + debug( + `Building graphology graph: ${nodes.length} nodes, ${edges.length} edges`, + ); + + const graph = new Graph({ type: "undirected" }); + + for (const node of nodes.slice(0, opts.nodeLimit)) { + const { id, ...nodeProps } = node; + graph.addNode(id, { + ...nodeProps, + type: nodeProps.type || "entity", + confidence: nodeProps.confidence || 0.5, + count: nodeProps.count || 1, + }); + } + + debug(`Added ${graph.order} nodes to graph`); + + initializeCircularLayout(graph); + + const nodeSet = new Set(graph.nodes()); + const edgeSet = new Set(); + let edgeCount = 0; + + for (const edge of edges) { + if (edge.from === edge.to) continue; + if (!nodeSet.has(edge.from) || !nodeSet.has(edge.to)) continue; + + const edgeKey = [edge.from, edge.to].sort().join("|"); + if (edgeSet.has(edgeKey)) continue; + + if ( + edge.type !== "parent" && + edge.type !== "parent-child" && + (edge.confidence || 1) < opts.minEdgeConfidence + ) { + continue; + } + + edgeSet.add(edgeKey); + try { + graph.addEdge(edge.from, edge.to, { + type: edge.type || "related", + confidence: edge.confidence || 0.5, + strength: edge.strength || edge.confidence || 0.5, + }); + edgeCount++; + } catch (error) { + debug(`Warning: Could not add edge ${edge.from} -> ${edge.to}`); + } + } + + debug(`Added ${edgeCount} edges to graph`); + + // Remove isolated nodes (nodes with no edges) + const isolatedNodes: string[] = []; + for (const node of graph.nodes()) { + if (graph.degree(node) === 0) { + isolatedNodes.push(node); + } + } + + if (isolatedNodes.length > 0) { + debug(`Removing ${isolatedNodes.length} isolated nodes (no edges)`); + for (const node of isolatedNodes) { + graph.dropNode(node); + } + } + + calculateNodeImportance(graph); + assignNodeSizes(graph); + detectCommunities(graph); + assignCommunityColors(graph); + applyMultiPhaseLayout(graph, opts); + + return graph; +} + +function initializeCircularLayout(graph: Graph): void { + debug("Initializing circular layout..."); + const positions = circular(graph, { scale: 100 }); + + for (const node of graph.nodes()) { + graph.setNodeAttribute(node, "x", positions[node].x); + graph.setNodeAttribute(node, "y", positions[node].y); + } +} + +function calculateNodeImportance(graph: Graph): void { + debug("Calculating node importance (degree centrality)..."); + for (const node of graph.nodes()) { + const degree = graph.degree(node); + graph.setNodeAttribute(node, "importance", degree); + } +} + +function assignNodeSizes(graph: Graph): void { + const importanceValues = graph + .nodes() + .map((n: string) => graph.getNodeAttribute(n, "importance") as number); + const minImp = Math.min(...importanceValues); + const maxImp = Math.max(...importanceValues); + + for (const node of graph.nodes()) { + const imp = graph.getNodeAttribute(node, "importance") as number; + const normalizedImp = + maxImp > minImp ? (imp - minImp) / (maxImp - minImp) : 0.5; + const size = Math.max(25, Math.min(60, 25 + normalizedImp * 35)); + graph.setNodeAttribute(node, "size", size); + } +} + +function detectCommunities(graph: Graph): void { + debug("Detecting communities (Louvain algorithm)..."); + try { + louvain.assign(graph); + const communities = new Set(); + for (const node of graph.nodes()) { + const comm = graph.getNodeAttribute(node, "community") as number; + communities.add(comm); + } + debug(`Detected ${communities.size} communities`); + } catch (error) { + debug("Community detection failed, assigning all nodes to community 0"); + for (const node of graph.nodes()) { + graph.setNodeAttribute(node, "community", 0); + } + } +} + +function assignCommunityColors(graph: Graph): void { + const communityColors: Record = {}; + let colorIdx = 0; + + for (const node of graph.nodes()) { + const comm = graph.getNodeAttribute(node, "community") as number; + if (!(comm in communityColors)) { + communityColors[comm] = + COMMUNITY_COLORS[colorIdx % COMMUNITY_COLORS.length]; + colorIdx++; + } + graph.setNodeAttribute(node, "color", communityColors[comm]); + } +} + +function applyMultiPhaseLayout( + graph: Graph, + options: Required, +): void { + debug("=== Layout Phase ==="); + + debug("Step 1: Running global ForceAtlas2..."); + forceAtlas2.assign(graph, { + iterations: options.forceAtlas2Iterations, + settings: { + gravity: 0.05, + scalingRatio: 100, + strongGravityMode: false, + linLogMode: false, + barnesHutOptimize: true, + barnesHutTheta: 0.5, + }, + }); + debug(" ✓ ForceAtlas2 complete"); + + debug("Step 2: Applying global overlap prevention..."); + noverlap.assign(graph, { + maxIterations: options.noverlapIterations, + settings: { + margin: 20, + ratio: 1.3, + expansion: 1.2, + gridSize: 40, + }, + }); + debug(" ✓ Global overlap prevention complete"); + + const communities = groupNodesByCommunity(graph); + const denseCommunities = Object.entries(communities).filter( + ([_, nodes]) => nodes.length > options.denseClusterThreshold, + ); + + if (denseCommunities.length > 0) { + debug( + `Step 3: Refining ${denseCommunities.length} dense clusters (>${options.denseClusterThreshold} nodes)...`, + ); + + for (const [comm, nodes] of denseCommunities) { + debug(` Processing community ${comm} (${nodes.length} nodes)...`); + + const subgraph = new Graph({ type: "undirected" }); + for (const node of nodes) { + subgraph.addNode(node, graph.getNodeAttributes(node)); + } + for (const edge of graph.edges()) { + const source = graph.source(edge); + const target = graph.target(edge); + if (subgraph.hasNode(source) && subgraph.hasNode(target)) { + subgraph.addEdge(source, target, graph.getEdgeAttributes(edge)); + } + } + + forceAtlas2.assign(subgraph, { + iterations: options.forceAtlas2Iterations, + settings: { + gravity: 0.05, + scalingRatio: 100, + strongGravityMode: false, + linLogMode: false, + barnesHutOptimize: true, + barnesHutTheta: 0.5, + }, + }); + + noverlap.assign(subgraph, { + maxIterations: 300, + settings: { + margin: 25, + ratio: 1.4, + expansion: 1.2, + gridSize: 30, + }, + }); + + const centroidX = + nodes.reduce( + (sum, n) => sum + (graph.getNodeAttribute(n, "x") as number), + 0, + ) / nodes.length; + const centroidY = + nodes.reduce( + (sum, n) => sum + (graph.getNodeAttribute(n, "y") as number), + 0, + ) / nodes.length; + + const newCentroidX = + nodes.reduce( + (sum, n) => + sum + (subgraph.getNodeAttribute(n, "x") as number), + 0, + ) / nodes.length; + const newCentroidY = + nodes.reduce( + (sum, n) => + sum + (subgraph.getNodeAttribute(n, "y") as number), + 0, + ) / nodes.length; + + for (const node of nodes) { + const newX = subgraph.getNodeAttribute(node, "x") as number; + const newY = subgraph.getNodeAttribute(node, "y") as number; + graph.setNodeAttribute( + node, + "x", + centroidX + (newX - newCentroidX), + ); + graph.setNodeAttribute( + node, + "y", + centroidY + (newY - newCentroidY), + ); + } + } + debug(" ✓ Dense cluster refinement complete"); + } else { + debug("Step 3: No dense clusters requiring refinement"); + } + + debug("=== Layout Complete ==="); +} + +function groupNodesByCommunity(graph: Graph): Record { + const communities: Record = {}; + for (const node of graph.nodes()) { + const comm = String(graph.getNodeAttribute(node, "community")); + if (!communities[comm]) { + communities[comm] = []; + } + communities[comm].push(node); + } + return communities; +} + +export function convertToCytoscapeElements( + graph: Graph, + targetViewportSize: number = 2000, +): CytoscapeElement[] { + debug("Converting to Cytoscape format..."); + + const elements: CytoscapeElement[] = []; + + let minX = Infinity, + maxX = -Infinity, + minY = Infinity, + maxY = -Infinity; + for (const node of graph.nodes()) { + const x = graph.getNodeAttribute(node, "x") as number; + const y = graph.getNodeAttribute(node, "y") as number; + minX = Math.min(minX, x); + maxX = Math.max(maxX, x); + minY = Math.min(minY, y); + maxY = Math.max(maxY, y); + } + + const targetMin = -targetViewportSize; + const targetMax = targetViewportSize; + const scaleX = + maxX - minX === 0 ? 1 : (targetMax - targetMin) / (maxX - minX); + const scaleY = + maxY - minY === 0 ? 1 : (targetMax - targetMin) / (maxY - minY); + + debug(`Scaling factors: X=${scaleX.toFixed(2)}, Y=${scaleY.toFixed(2)}`); + debug(`Target viewport: [${targetMin}, ${targetMax}]`); + + for (const node of graph.nodes()) { + const attr = graph.getNodeAttributes(node); + const x = ((attr.x - minX) * scaleX) + targetMin; + const y = ((attr.y - minY) * scaleY) + targetMin; + + elements.push({ + data: { + id: node, + label: attr.name, + level: attr.level || 0, + confidence: attr.confidence, + computedImportance: attr.importance, + parentId: attr.parentId, + childCount: attr.childCount || 0, + nodeType: "topic", + color: attr.color, + size: attr.size, + }, + position: { x, y }, + }); + } + + for (const edge of graph.edges()) { + const attr = graph.getEdgeAttributes(edge); + const source = graph.source(edge); + const target = graph.target(edge); + + elements.push({ + data: { + source, + target, + type: attr.type, + confidence: attr.confidence, + strength: attr.strength, + color: "#ddd", + }, + }); + } + + debug( + `Converted ${graph.order} nodes and ${graph.size} edges to Cytoscape format`, + ); + + return elements; +} + +export function calculateLayoutQualityMetrics(graph: Graph): { + avgSpacing: number; + minX: number; + maxX: number; + minY: number; + maxY: number; +} { + let totalMinDist = 0; + const nodes = graph.nodes(); + + for (const node of nodes) { + const x1 = graph.getNodeAttribute(node, "x") as number; + const y1 = graph.getNodeAttribute(node, "y") as number; + let minDist = Infinity; + + for (const other of nodes) { + if (node === other) continue; + const x2 = graph.getNodeAttribute(other, "x") as number; + const y2 = graph.getNodeAttribute(other, "y") as number; + const dist = Math.sqrt((x2 - x1) ** 2 + (y2 - y1) ** 2); + if (dist < minDist) minDist = dist; + } + totalMinDist += minDist; + } + + const avgSpacing = totalMinDist / nodes.length; + + let minX = Infinity, + maxX = -Infinity, + minY = Infinity, + maxY = -Infinity; + for (const node of nodes) { + const x = graph.getNodeAttribute(node, "x") as number; + const y = graph.getNodeAttribute(node, "y") as number; + minX = Math.min(minX, x); + maxX = Math.max(maxX, x); + minY = Math.min(minY, y); + maxY = Math.max(maxY, y); + } + + return { avgSpacing, minX, maxX, minY, maxY }; +} diff --git a/ts/packages/agents/browser/src/extension/views/entityGraphView.ts b/ts/packages/agents/browser/src/extension/views/entityGraphView.ts index 182c4f5f7..5ded342de 100644 --- a/ts/packages/agents/browser/src/extension/views/entityGraphView.ts +++ b/ts/packages/agents/browser/src/extension/views/entityGraphView.ts @@ -1406,9 +1406,9 @@ class EntityGraphView { try { this.showGraphLoading(); - // Get importance layer data (top 500 most important nodes - TESTING) + // Get importance layer data (top 1000 most important nodes) const importanceData = - await this.graphDataProvider.getGlobalImportanceLayer(500); + await this.graphDataProvider.getGlobalImportanceLayer(1000); if (importanceData.entities.length === 0) { this.hideGraphLoading(); @@ -1416,9 +1416,16 @@ class EntityGraphView { return; } - // Transform data to expected format for visualizer with proper LoD properties - const transformedData = { - entities: this.enhanceEntitiesForLoD(importanceData.entities), + // Check if graphology layout is available + const hasGraphologyLayout = importanceData.metadata?.graphologyLayout; + + // Transform data to expected format for visualizer + const transformedData: any = { + // Only enhance for LoD if graphology layout is NOT available + // This preserves community colors and sizes from graphology + entities: hasGraphologyLayout + ? importanceData.entities // Use entities as-is (preserves graphology data) + : this.enhanceEntitiesForLoD(importanceData.entities), // Fallback to blue gradient relationships: importanceData.relationships, communities: [], topics: [], @@ -1427,7 +1434,24 @@ class EntityGraphView { totalRelationships: importanceData.relationships.length, totalCommunities: 0, }, + metadata: importanceData.metadata, }; + + if (hasGraphologyLayout) { + console.log( + `[EntityGraphView] Using graphology preset layout with community colors (${importanceData.metadata.graphologyLayout.elements?.length || 0} elements)`, + ); + transformedData.presetLayout = { + elements: importanceData.metadata.graphologyLayout.elements, + layoutDuration: + importanceData.metadata.graphologyLayout.layoutDuration, + avgSpacing: + importanceData.metadata.graphologyLayout.avgSpacing, + communityCount: + importanceData.metadata.graphologyLayout.communityCount, + }; + } + await this.visualizer.loadGlobalGraph(transformedData); this.hideGraphLoading(); } catch (error) { diff --git a/ts/packages/agents/browser/src/extension/views/entityGraphVisualizer.ts b/ts/packages/agents/browser/src/extension/views/entityGraphVisualizer.ts index 51b5a116f..35e3d0fd6 100644 --- a/ts/packages/agents/browser/src/extension/views/entityGraphVisualizer.ts +++ b/ts/packages/agents/browser/src/extension/views/entityGraphVisualizer.ts @@ -1407,6 +1407,15 @@ export class EntityGraphVisualizer { }, }, + // Override type-specific colors with community colors when available + // This selector has higher priority by being placed after type-specific selectors + { + selector: "node[color]", + style: { + "background-color": "data(color)", + }, + }, + // Entity nodes (zoomed-out view with community colors) { selector: 'node[type="entity"]', @@ -1730,6 +1739,11 @@ export class EntityGraphVisualizer { this.globalInstance.add(elements); + // Check if elements have preset positions + const hasPresetPositions = elements.some( + (el: any) => el.group === "nodes" && el.position, + ); + // Apply direct sizing based on computed importance (since CSS mapData doesn't auto-refresh) this.applyImportanceBasedSizing(); @@ -1749,12 +1763,30 @@ export class EntityGraphVisualizer { // Store global data reference this.globalGraphData = graphData; - // Apply layout optimized for global size - await this.applyLayoutToInstance( - this.globalInstance, - "cose", - graphData.entities.length, - ); + // Apply layout based on whether we have preset positions + if (hasPresetPositions) { + console.log( + "[Visualizer] Using preset layout (graphology positions)", + ); + // Use preset layout - no calculation needed + this.globalInstance + .layout({ + name: "preset", + fit: true, + padding: 50, + }) + .run(); + } else { + console.log( + "[Visualizer] No preset positions - running force layout", + ); + // Fallback to force-directed layout + await this.applyLayoutToInstance( + this.globalInstance, + "cose", + graphData.entities.length, + ); + } // Fit the graph to the viewport to let Cytoscape handle optimal sizing this.globalInstance.fit({ @@ -2089,6 +2121,28 @@ export class EntityGraphVisualizer { * Convert graph data to Cytoscape elements (enhanced for triple-instance) */ private convertToGraphElements(graphData: any): any[] { + // Check if preset layout is available + const presetLayout = graphData.presetLayout?.elements; + const presetPositions = new Map(); + + if (presetLayout) { + console.log( + `[Visualizer] Using preset layout with ${presetLayout.length} positioned elements`, + ); + for (const element of presetLayout) { + if (element.position && element.data?.id) { + presetPositions.set(element.data.id, element.position); + } + // Also try label-based lookup + if (element.position && element.data?.label) { + presetPositions.set(element.data.label, element.position); + } + } + console.log( + `[Visualizer] Extracted ${presetPositions.size} preset positions`, + ); + } + const nodes = graphData.entities.map((entity: any) => { // Set appropriate importance values based on current context const baseImportance = @@ -2097,14 +2151,19 @@ export class EntityGraphVisualizer { this.currentActiveView === "neighborhood" ? 0.5 : 0; const effectiveImportance = Math.max(baseImportance, minImportance); - return { + const entityId = entity.id || entity.name; + const nodeElement: any = { group: "nodes", data: { - id: entity.id || entity.name, + id: entityId, name: entity.name, type: entity.type, importance: effectiveImportance, confidence: entity.confidence || 0.5, + // Preserve graphology properties if available + color: entity.color, + size: entity.size, + community: entity.community, // Ensure LOD-compatible properties are set degreeCount: entity.properties?.degree || @@ -2118,6 +2177,16 @@ export class EntityGraphVisualizer { ...entity.properties, }, }; + + // Add preset position if available + const presetPos = + presetPositions.get(entityId) || + presetPositions.get(entity.name); + if (presetPos) { + nodeElement.position = { x: presetPos.x, y: presetPos.y }; + } + + return nodeElement; }); // Create a set of valid node IDs for fast lookup diff --git a/ts/packages/agents/browser/src/extension/views/extensionServiceBase.ts b/ts/packages/agents/browser/src/extension/views/extensionServiceBase.ts index 999ed335f..180c4491c 100644 --- a/ts/packages/agents/browser/src/extension/views/extensionServiceBase.ts +++ b/ts/packages/agents/browser/src/extension/views/extensionServiceBase.ts @@ -620,6 +620,13 @@ export abstract class ExtensionServiceBase { }); } + async invalidateTopicCache(): Promise { + return this.sendMessage({ + type: "invalidateTopicCacheAction", + parameters: {}, + }); + } + async getTopicTimelines(parameters: { topicNames: string[]; maxTimelineEntries?: number; diff --git a/ts/packages/agents/browser/src/extension/views/graphDataProvider.ts b/ts/packages/agents/browser/src/extension/views/graphDataProvider.ts index 2de53472f..3986a8ba4 100644 --- a/ts/packages/agents/browser/src/extension/views/graphDataProvider.ts +++ b/ts/packages/agents/browser/src/extension/views/graphDataProvider.ts @@ -472,33 +472,37 @@ class GraphDataProviderImpl implements GraphDataProvider { const importance = hybridEntity.importance || 0; const degree = hybridEntity.degree || 0; - // Compute size based on importance/degree (remove server size field dependency) - const computedSize = Math.max(20, 20 + Math.sqrt(importance * 1000)); // Dynamic sizing + // Preserve graphology size if available, otherwise compute + const computedSize = + hybridEntity.size || + Math.max(20, 20 + Math.sqrt(importance * 1000)); - // Compute colors based on type (remove server color field dependency) - let color = "#6C7B7F"; // Default gray + // Preserve graphology color if available, otherwise compute based on type + let color = hybridEntity.color || "#6C7B7F"; // Use graphology color or default gray let borderColor = "#4A5568"; // Default border - // Type-specific styling - switch (entityType) { - case "concept": - case "entity": - color = "#4299E1"; // Blue - borderColor = "#2B6CB0"; - break; - case "website": - color = "#48BB78"; // Green - borderColor = "#2F855A"; - break; - case "topic": - color = "#ED8936"; // Orange - borderColor = "#C05621"; - break; - case "unknown": - default: - color = "#A0AEC0"; // Light gray - borderColor = "#718096"; - break; + // Only compute type-specific colors if no color was provided + if (!hybridEntity.color) { + switch (entityType) { + case "concept": + case "entity": + color = "#4299E1"; // Blue + borderColor = "#2B6CB0"; + break; + case "website": + color = "#48BB78"; // Green + borderColor = "#2F855A"; + break; + case "topic": + color = "#ED8936"; // Orange + borderColor = "#C05621"; + break; + case "unknown": + default: + color = "#A0AEC0"; // Light gray + borderColor = "#718096"; + break; + } } return { @@ -515,14 +519,14 @@ class GraphDataProviderImpl implements GraphDataProvider { communityId: hybridEntity.communityId, // Optional fields (only if non-empty) - ...(hybridEntity.community && { + ...(hybridEntity.community !== undefined && { community: hybridEntity.community, }), ...(hybridEntity.description && { description: hybridEntity.description, }), - // Computed UI properties + // Computed or preserved UI properties color: color, size: computedSize, borderColor: borderColor, diff --git a/ts/packages/agents/browser/src/extension/views/topicGraphView.html b/ts/packages/agents/browser/src/extension/views/topicGraphView.html index 4762218e1..82acf9b4c 100644 --- a/ts/packages/agents/browser/src/extension/views/topicGraphView.html +++ b/ts/packages/agents/browser/src/extension/views/topicGraphView.html @@ -145,6 +145,31 @@
Error Loading Topic Graph
> + + +
+ + +
diff --git a/ts/packages/agents/browser/src/extension/views/topicGraphView.ts b/ts/packages/agents/browser/src/extension/views/topicGraphView.ts index e4fbccb2b..71355532a 100644 --- a/ts/packages/agents/browser/src/extension/views/topicGraphView.ts +++ b/ts/packages/agents/browser/src/extension/views/topicGraphView.ts @@ -9,16 +9,19 @@ interface TopicGraphViewState { searchQuery: string; visibleLevels: number[]; sidebarOpen: boolean; + prototypeMode: boolean; } class TopicGraphView { private visualizer: TopicGraphVisualizer | null = null; private extensionService: any; + private lastLoadedData: any = null; private state: TopicGraphViewState = { currentTopic: null, searchQuery: "", visibleLevels: [0, 1, 2, 3], sidebarOpen: false, + prototypeMode: false, }; private loadingOverlay: HTMLElement; @@ -86,6 +89,25 @@ class TopicGraphView { this.exportGraph(); }); + document + .getElementById("exportJsonButton") + ?.addEventListener("click", () => { + this.exportGraphologyJson(); + }); + + document + .getElementById("invalidateCacheButton") + ?.addEventListener("click", () => { + this.invalidateCache(); + }); + + document + .getElementById("prototypeMode") + ?.addEventListener("change", (e) => { + const checkbox = e.target as HTMLInputElement; + this.togglePrototypeMode(checkbox.checked); + }); + // Settings modal removed - using optimized defaults // Sidebar close button @@ -173,6 +195,8 @@ class TopicGraphView { return; } + this.lastLoadedData = topicData; + await this.visualizer?.init(topicData); this.updateGraphStats(); @@ -242,13 +266,27 @@ class TopicGraphView { const relationships = data.relationships || []; - return { + const result: any = { centerTopic: null, topics, relationships, maxDepth: Math.max(...topics.map((t: any) => t.level), 0), metadata: data.metadata, }; + + if (data.metadata?.graphologyLayout) { + console.log( + `[TopicGraphView] Using graphology preset layout (${data.metadata.graphologyLayout.elements?.length || 0} elements)`, + ); + result.presetLayout = { + elements: data.metadata.graphologyLayout.elements, + layoutDuration: data.metadata.graphologyLayout.layoutDuration, + avgSpacing: data.metadata.graphologyLayout.avgSpacing, + communityCount: data.metadata.graphologyLayout.communityCount, + }; + } + + return result; } /** @@ -999,6 +1037,52 @@ class TopicGraphView { this.showNotification("Graph exported as image"); } + private exportGraphologyJson(): void { + if (!this.lastLoadedData || !this.lastLoadedData.presetLayout) { + this.showNotification("No graphology layout data available to export"); + return; + } + + const jsonData = JSON.stringify(this.lastLoadedData.presetLayout.elements, null, 2); + const blob = new Blob([jsonData], { type: "application/json" }); + const url = URL.createObjectURL(blob); + const link = document.createElement("a"); + link.download = `graphology-topic-graph-${new Date().toISOString().slice(0, 10)}.json`; + link.href = url; + link.click(); + URL.revokeObjectURL(url); + + this.showNotification("Cytoscape JSON exported successfully"); + } + + private async invalidateCache(): Promise { + try { + const result = await this.extensionService.invalidateTopicCache(); + + if (result && result.success) { + this.showNotification(result.message); + } else { + this.showNotification(result?.message || "Failed to invalidate cache"); + } + } catch (error) { + console.error("Error invalidating cache:", error); + this.showNotification("Error invalidating cache"); + } + } + + private togglePrototypeMode(enabled: boolean): void { + this.state.prototypeMode = enabled; + console.log(`[TopicGraphView] Prototype mode: ${enabled ? "ENABLED" : "DISABLED"}`); + + if (!this.lastLoadedData) { + this.showNotification("No data available. Load a graph first."); + return; + } + + this.visualizer?.setPrototypeMode(enabled); + this.showNotification(enabled ? "Prototype mode enabled" : "Prototype mode disabled"); + } + private toggleSidebar(): void { this.state.sidebarOpen = !this.state.sidebarOpen; this.sidebar.classList.toggle("visible", this.state.sidebarOpen); diff --git a/ts/packages/agents/browser/src/extension/views/topicGraphVisualizer.ts b/ts/packages/agents/browser/src/extension/views/topicGraphVisualizer.ts index 8bcb51825..0f53f69b8 100644 --- a/ts/packages/agents/browser/src/extension/views/topicGraphVisualizer.ts +++ b/ts/packages/agents/browser/src/extension/views/topicGraphVisualizer.ts @@ -53,6 +53,7 @@ export class TopicGraphVisualizer { private lastLodUpdate: number = 0; private lodUpdateInterval: number = 33; // ~30fps (reduced from 16ms for better performance) private zoomHandlerSetup: boolean = false; + private prototypeModeEnabled: boolean = false; // Dual-instance approach: separate instances for global and neighborhood views private globalInstance: any = null; @@ -708,6 +709,11 @@ export class TopicGraphVisualizer { private async applyLevelOfDetail(zoom: number): Promise { if (!this.cy) return; + // Skip LoD updates when in prototype mode + if (this.prototypeModeEnabled) { + return; + } + // Check for view transitions based on zoom await this.checkViewTransitions(zoom); @@ -1043,7 +1049,25 @@ export class TopicGraphVisualizer { instance: any, data: any, ): Promise { - const elements = this.convertToTopicElements(data); + let elements: any[]; + let usePresetLayout = false; + + if (data.presetLayout?.elements) { + console.log( + `[TopicGraphVisualizer] Using graphology preset layout with ${data.presetLayout.elements.length} elements`, + ); + console.log( + `[TopicGraphVisualizer] Layout computed in ${data.presetLayout.layoutDuration?.toFixed(0)}ms, ` + + `${data.presetLayout.communityCount} communities detected`, + ); + elements = data.presetLayout.elements; + usePresetLayout = true; + } else { + console.log( + "[TopicGraphVisualizer] No preset layout, will compute CoSE layout", + ); + elements = this.convertToTopicElements(data); + } // Use batch operations for better performance instance.batch(() => { @@ -1052,7 +1076,7 @@ export class TopicGraphVisualizer { }); // Apply layout on this specific instance - await this.applyLayoutToInstance(instance); + await this.applyLayoutToInstance(instance, usePresetLayout); // Focus on center topic if specified if (data.centerTopic) { @@ -1262,7 +1286,9 @@ export class TopicGraphVisualizer { { selector: 'node[nodeType="topic"]', style: { - "background-color": "#FF6B9D", + "background-color": "data(color)", + width: "data(size)", + height: "data(size)", label: "data(label)", "text-valign": "bottom", "text-margin-y": 5, @@ -1270,7 +1296,7 @@ export class TopicGraphVisualizer { "font-weight": "bold", color: "#333", "border-width": 2, - "border-color": "#E5507A", + "border-color": "#666", "min-zoomed-font-size": 8, "transition-property": "none", "transition-duration": 0, @@ -1278,42 +1304,30 @@ export class TopicGraphVisualizer { }, }, - // Level-specific styling with fixed sizes for performance + // Level-specific styling - using graphology community colors instead { selector: ".level-0", style: { - "background-color": "#4A90E2", - "border-color": "#1565C0", shape: "roundrectangle", - width: 60, - height: 60, "font-size": "14px", "font-weight": "bold", - "text-opacity": 1, // Show labels for important level-0 nodes + "text-opacity": 1, "z-index": 1000, }, }, { selector: ".level-1", style: { - "background-color": "#7ED321", - "border-color": "#388E3C", shape: "ellipse", - width: 50, - height: 50, "font-size": "12px", - "text-opacity": 1, // Show labels for level-1 nodes + "text-opacity": 1, "z-index": 900, }, }, { selector: ".level-2", style: { - "background-color": "#F5A623", - "border-color": "#F57C00", shape: "diamond", - width: 35, - height: 35, "font-size": "11px", "z-index": 800, }, @@ -1321,11 +1335,7 @@ export class TopicGraphVisualizer { { selector: ".level-3", style: { - "background-color": "#BD10E0", - "border-color": "#9013FE", shape: "triangle", - width: 30, - height: 30, "font-size": "10px", "z-index": 700, }, @@ -1333,11 +1343,7 @@ export class TopicGraphVisualizer { { selector: ".level-4", style: { - "background-color": "#50E3C2", - "border-color": "#4ECDC4", shape: "pentagon", - width: 25, - height: 25, "font-size": "9px", "z-index": 600, }, @@ -1537,10 +1543,38 @@ export class TopicGraphVisualizer { /** * Apply layout to a specific instance */ - private async applyLayoutToInstance(instance: any): Promise { + private async applyLayoutToInstance( + instance: any, + usePreset: boolean = false, + ): Promise { return new Promise((resolve) => { - const layout = instance.layout(this.getLayoutOptions()); - layout.on("layoutstop", () => resolve()); + let layoutConfig; + + if (usePreset) { + layoutConfig = { + name: "preset", + fit: false, + animate: false, + }; + console.log( + "[TopicGraphVisualizer] Applying preset layout (using pre-computed positions)", + ); + } else { + layoutConfig = this.getLayoutOptions(); + console.log( + `[TopicGraphVisualizer] Computing CoSE layout...`, + ); + } + + const layout = instance.layout(layoutConfig); + layout.on("layoutstop", () => { + if (!usePreset) { + console.log( + "[TopicGraphVisualizer] CoSE layout computation complete", + ); + } + resolve(); + }); layout.run(); }); } @@ -1803,6 +1837,49 @@ export class TopicGraphVisualizer { }); } + /** + * Enable or disable prototype rendering mode + * When enabled, disables LoD and shows all elements with simple styling + */ + public setPrototypeMode(enabled: boolean): void { + if (!this.cy) { + console.warn("[TopicGraphVisualizer] No Cytoscape instance available"); + return; + } + + this.prototypeModeEnabled = enabled; + + if (enabled) { + console.log("[TopicGraphVisualizer] Enabling prototype mode - disabling LoD, showing all elements"); + + this.cy.batch(() => { + this.cy.nodes().forEach((node: any) => { + node.removeClass("hidden-at-zoom"); + node.addClass("visible-at-zoom"); + node.style("display", "element"); + node.style("events", "yes"); + node.style("text-opacity", 0); + }); + + this.cy.edges().forEach((edge: any) => { + edge.removeClass("hidden-at-zoom"); + edge.addClass("visible-at-zoom"); + edge.style("display", "element"); + edge.style("events", "yes"); + }); + }); + + console.log(`[TopicGraphVisualizer] Prototype mode enabled - ${this.cy.nodes().length} nodes, ${this.cy.edges().length} edges visible`); + } else { + console.log("[TopicGraphVisualizer] Disabling prototype mode - re-enabling LoD"); + + const currentZoom = this.cy.zoom(); + this.applyLevelOfDetail(currentZoom); + + console.log(`[TopicGraphVisualizer] Prototype mode disabled - LoD re-applied at zoom ${currentZoom.toFixed(2)}x`); + } + } + /** * Cleanup and dispose */ diff --git a/ts/packages/knowledgeProcessor/src/conversation/hierarchicalTopicSchema.ts b/ts/packages/knowledgeProcessor/src/conversation/hierarchicalTopicSchema.ts index 20f986bdc..28c7a6097 100644 --- a/ts/packages/knowledgeProcessor/src/conversation/hierarchicalTopicSchema.ts +++ b/ts/packages/knowledgeProcessor/src/conversation/hierarchicalTopicSchema.ts @@ -7,7 +7,7 @@ export interface HierarchicalTopic { level: number; // Hierarchy level (0 = root, 1 = sub-topic, etc.) parentId?: string; // Reference to parent topic childIds: string[]; // References to child topics - sourceFragments: string[]; // Which HTML fragments contributed + sourceRefOrdinals: number[]; // SemanticRef ordinals linking to knowledge topics confidence: number; // Extraction confidence (0-1) keywords: string[]; // Associated keywords entityReferences: string[]; // Related entities diff --git a/ts/packages/knowledgeProcessor/src/conversation/hierarchicalTopics.ts b/ts/packages/knowledgeProcessor/src/conversation/hierarchicalTopics.ts index ea5af950a..db809b919 100644 --- a/ts/packages/knowledgeProcessor/src/conversation/hierarchicalTopics.ts +++ b/ts/packages/knowledgeProcessor/src/conversation/hierarchicalTopics.ts @@ -312,7 +312,7 @@ JSON OUTPUT:`; name: rootData.rootTopic, level: 0, childIds: [], - sourceFragments: [], + sourceRefOrdinals: [], confidence: 0.8, keywords: [rootData.rootTopic], entityReferences: [], @@ -331,7 +331,7 @@ JSON OUTPUT:`; level: 1, parentId: rootTopic.id, childIds: [], - sourceFragments: [], + sourceRefOrdinals: [], confidence: 0.7, keywords: [childName], entityReferences: [], @@ -353,7 +353,7 @@ JSON OUTPUT:`; level: 2, parentId: childTopic.id, childIds: [], - sourceFragments: [], + sourceRefOrdinals: [], confidence: 0.6, keywords: [grandchildName], entityReferences: [], @@ -386,7 +386,7 @@ JSON OUTPUT:`; name: rootData.rootTopic, level: 0, childIds: [], - sourceFragments: [], + sourceRefOrdinals: [], confidence: 0.6, // Lower confidence for fallback keywords: [rootData.rootTopic], entityReferences: [], @@ -405,7 +405,7 @@ JSON OUTPUT:`; level: 1, parentId: rootTopic.id, childIds: [], - sourceFragments: [], + sourceRefOrdinals: [], confidence: 0.5, // Lower confidence for fallback keywords: [childName], entityReferences: [], @@ -430,7 +430,7 @@ JSON OUTPUT:`; name: aggregatedTopics[0], level: 0, childIds: [], - sourceFragments: [], + sourceRefOrdinals: [], confidence: 0.8, keywords: [aggregatedTopics[0]], entityReferences: [], @@ -448,7 +448,7 @@ JSON OUTPUT:`; level: 1, parentId: rootTopic.id, childIds: [], - sourceFragments: [], + sourceRefOrdinals: [], confidence: 0.6, keywords: [topics[i]], entityReferences: [], @@ -475,11 +475,16 @@ function enrichHierarchy( fragmentExtractions: FragmentTopicExtraction[], context: TopicExtractionContext, ): TopicHierarchy { - // Add fragment source information + // Add semanticRef ordinal information for (const extraction of fragmentExtractions) { for (const [, topic] of hierarchy.topicMap) { if (extraction.topics.includes(topic.name)) { - topic.sourceFragments.push(extraction.fragmentId); + const ordinal = typeof extraction.fragmentId === 'number' + ? extraction.fragmentId + : parseInt(extraction.fragmentId, 10); + if (!isNaN(ordinal)) { + topic.sourceRefOrdinals.push(ordinal); + } } } } @@ -505,12 +510,12 @@ function mergeHierarchies( mergedRootTopics.push(topic); } } else { - // Merge source fragments for existing topics + // Merge sourceRefOrdinals for existing topics const existingTopic = mergedTopicMap.get(topicId)!; - existingTopic.sourceFragments = [ + existingTopic.sourceRefOrdinals = [ ...new Set([ - ...existingTopic.sourceFragments, - ...topic.sourceFragments, + ...existingTopic.sourceRefOrdinals, + ...topic.sourceRefOrdinals, ]), ]; } diff --git a/ts/packages/memory/website/src/websiteCollection.ts b/ts/packages/memory/website/src/websiteCollection.ts index 791f38d0d..a6b847449 100644 --- a/ts/packages/memory/website/src/websiteCollection.ts +++ b/ts/packages/memory/website/src/websiteCollection.ts @@ -1944,7 +1944,7 @@ export class WebsiteCollection `[Knowledge Graph] AI model not available for topic merging: ${error}`, ); // Fall back to simple hierarchical grouping - await this.buildSimpleTopicHierarchy(flatTopics, urlLimit); + await this.buildSimpleTopicHierarchy(flatTopics); return; } @@ -1966,21 +1966,21 @@ export class WebsiteCollection confidence: 0.9, keywords: [mergeResult.topic], }, - urlLimit, + "aggregated:multiple-sources", + "aggregated", ); // Organize flat topics under the root await this.organizeTopicsUnderRoot( flatTopics, rootTopicId, - urlLimit, ); } else { // Fall back to simple hierarchy if merging fails debug( `[Knowledge Graph] Topic merging failed, using simple hierarchy`, ); - await this.buildSimpleTopicHierarchy(flatTopics, urlLimit); + await this.buildSimpleTopicHierarchy(flatTopics); } debug( @@ -2005,6 +2005,7 @@ export class WebsiteCollection ); let globalHierarchy: any | undefined; + const websiteUrlMap = new Map(); for (const website of newWebsites) { const docHierarchy = (website.knowledge as any)?.topicHierarchy as @@ -2015,28 +2016,40 @@ export class WebsiteCollection continue; } - if (!globalHierarchy) { - let topicMap: Map; + let topicMap: Map; - if (docHierarchy.topicMap instanceof Map) { - topicMap = docHierarchy.topicMap; - } else if ( - typeof docHierarchy.topicMap === "object" && - docHierarchy.topicMap !== null - ) { - topicMap = new Map(Object.entries(docHierarchy.topicMap)); - } else { - topicMap = new Map(); + if (docHierarchy.topicMap instanceof Map) { + topicMap = docHierarchy.topicMap; + } else if ( + typeof docHierarchy.topicMap === "object" && + docHierarchy.topicMap !== null + ) { + topicMap = new Map(Object.entries(docHierarchy.topicMap)); + } else { + topicMap = new Map(); + } + + // Track which website each topic came from + const websiteUrl = website.metadata.url || "unknown"; + const websiteDomain = website.metadata.domain || "unknown"; + for (const [topicId] of topicMap) { + if (!websiteUrlMap.has(topicId)) { + websiteUrlMap.set(topicId, { url: websiteUrl, domain: websiteDomain }); } + } - globalHierarchy = { - ...docHierarchy, - topicMap: topicMap, - }; + const hierarchyWithMap = { + ...docHierarchy, + topicMap: topicMap, + }; + + if (!globalHierarchy) { + globalHierarchy = hierarchyWithMap; } else { globalHierarchy = this.mergeHierarchies( globalHierarchy, - docHierarchy, + hierarchyWithMap, + websiteUrl, ); } } @@ -2050,16 +2063,23 @@ export class WebsiteCollection await this.storeTopicHierarchyRecursive( rootTopic, globalHierarchy.topicMap, + websiteUrlMap, ); } } catch (error) { debug( `[Knowledge Graph] Error updating hierarchical topics: ${error}`, + // Note: Full document provenance is available via semanticRefIndex lookup + // Each topic has semanticRefs with range.start.messageOrdinal pointing to source documents ); } } - private mergeHierarchies(existing: any, newHierarchy: any): any { + private mergeHierarchies( + existing: any, + newHierarchy: any, + newWebsiteUrl: string, + ): any { // Convert existing topicMap to Map if it's a plain object (from deserialization) const existingTopicMap = existing.topicMap instanceof Map @@ -2084,12 +2104,14 @@ export class WebsiteCollection } else { const existingTopic: any = mergedTopicMap.get(topicId); if (existingTopic) { - existingTopic.sourceFragments = [ + // Merge sourceRefOrdinals to track semanticRefs that contributed to this topic + existingTopic.sourceRefOrdinals = [ ...new Set([ - ...existingTopic.sourceFragments, - ...topic.sourceFragments, + ...existingTopic.sourceRefOrdinals, + ...topic.sourceRefOrdinals, ]), ]; + } } } @@ -2105,6 +2127,7 @@ export class WebsiteCollection private async storeTopicHierarchyRecursive( topic: any, topicMap: Map, + websiteUrlMap: Map, ): Promise { const existing = this.hierarchicalTopics.getTopicByName( topic.name, @@ -2123,20 +2146,34 @@ export class WebsiteCollection } } - await this.storeHierarchicalTopic({ - topicId: topic.id, - topicName: topic.name, - level: topic.level, - ...(parentTopicId ? { parentTopicId } : {}), - confidence: topic.confidence, - keywords: topic.keywords, - }); + // Get URL from the first website that contributed this topic + const urlInfo = websiteUrlMap.get(topic.id) || { + url: "unknown", + domain: "unknown", + }; + + await this.storeHierarchicalTopic( + { + topicId: topic.id, + topicName: topic.name, + level: topic.level, + ...(parentTopicId ? { parentTopicId } : {}), + confidence: topic.confidence, + keywords: topic.keywords, + }, + urlInfo.url, + urlInfo.domain, + ); } for (const childId of topic.childIds) { const childTopic = topicMap.get(childId); if (childTopic) { - await this.storeTopicHierarchyRecursive(childTopic, topicMap); + await this.storeTopicHierarchyRecursive( + childTopic, + topicMap, + websiteUrlMap, + ); } } } @@ -2899,7 +2936,6 @@ Determine the appropriate relationship action based on the PairwiseTopicRelation */ private async buildSimpleTopicHierarchy( topics: string[], - urlLimit?: number, ): Promise { debug( `[Knowledge Graph] Building simple topic hierarchy for ${topics.length} topics`, @@ -2922,7 +2958,8 @@ Determine the appropriate relationship action based on the PairwiseTopicRelation confidence: 0.7, keywords: [groupName], }, - urlLimit, + "aggregated:multiple-sources", + "aggregated", ); // Store child topics @@ -2937,7 +2974,8 @@ Determine the appropriate relationship action based on the PairwiseTopicRelation confidence: 0.6, keywords: [topic], }, - urlLimit, + "aggregated:multiple-sources", + "aggregated", ); } @@ -2951,7 +2989,6 @@ Determine the appropriate relationship action based on the PairwiseTopicRelation private async organizeTopicsUnderRoot( topics: string[], rootTopicId: string, - urlLimit?: number, ): Promise { // Group similar topics const groups = this.groupTopicsBySimpleSimilarity(topics); @@ -2972,7 +3009,8 @@ Determine the appropriate relationship action based on the PairwiseTopicRelation confidence: 0.7, keywords: [groupName], }, - urlLimit, + "aggregated:multiple-sources", + "aggregated", ); // Store leaf topics @@ -2987,7 +3025,8 @@ Determine the appropriate relationship action based on the PairwiseTopicRelation confidence: 0.6, keywords: [topic], }, - urlLimit, + "aggregated:multiple-sources", + "aggregated", ); } } @@ -3004,7 +3043,8 @@ Determine the appropriate relationship action based on the PairwiseTopicRelation confidence: 0.6, keywords: [topic], }, - urlLimit, + "aggregated:multiple-sources", + "aggregated", ); } } @@ -3022,18 +3062,9 @@ Determine the appropriate relationship action based on the PairwiseTopicRelation confidence: number; keywords: string[]; }, - urlLimit?: number, + websiteUrl: string, + websiteDomain: string, ): Promise { - // Get a sample URL and domain from processed websites - const websites = this.getWebsites(); - const websitesToProcess = urlLimit - ? websites.slice(0, urlLimit) - : websites; - - const sampleWebsite = websitesToProcess[0]; - const url = sampleWebsite?.metadata?.url || "unknown"; - const domain = sampleWebsite?.metadata?.domain || "unknown"; - const sourceRef: dataFrame.RowSourceRef = { range: { start: { messageOrdinal: 0, chunkOrdinal: 0 }, @@ -3044,8 +3075,8 @@ Determine the appropriate relationship action based on the PairwiseTopicRelation const topicRow = { sourceRef, record: { - url, - domain, + url: websiteUrl, + domain: websiteDomain, topicId: topic.topicId, topicName: topic.topicName, level: topic.level, diff --git a/ts/pnpm-lock.yaml b/ts/pnpm-lock.yaml index 5e47c528e..f6bc35faf 100644 --- a/ts/pnpm-lock.yaml +++ b/ts/pnpm-lock.yaml @@ -1112,6 +1112,21 @@ importers: express-rate-limit: specifier: ^7.5.0 version: 7.5.0(express@4.21.2) + graphology: + specifier: ^0.25.4 + version: 0.25.4(graphology-types@0.24.8) + graphology-communities-louvain: + specifier: ^2.0.1 + version: 2.0.2(graphology-types@0.24.8) + graphology-layout: + specifier: ^0.6.1 + version: 0.6.1(graphology-types@0.24.8) + graphology-layout-forceatlas2: + specifier: ^0.10.1 + version: 0.10.1(graphology-types@0.24.8) + graphology-layout-noverlap: + specifier: ^0.4.1 + version: 0.4.2(graphology-types@0.24.8) html-to-text: specifier: ^9.0.5 version: 9.0.5 @@ -8860,6 +8875,44 @@ packages: graphlib@2.1.8: resolution: {integrity: sha512-jcLLfkpoVGmH7/InMC/1hIvOPSUh38oJtGhvrOFGzioE1DZ+0YW16RgmOJhHiuWTvGiJQ9Z1Ik43JvkRPRvE+A==} + graphology-communities-louvain@2.0.2: + resolution: {integrity: sha512-zt+2hHVPYxjEquyecxWXoUoIuN/UvYzsvI7boDdMNz0rRvpESQ7+e+Ejv6wK7AThycbZXuQ6DkG8NPMCq6XwoA==} + peerDependencies: + graphology-types: '>=0.19.0' + + graphology-indices@0.17.0: + resolution: {integrity: sha512-A7RXuKQvdqSWOpn7ZVQo4S33O0vCfPBnUSf7FwE0zNCasqwZVUaCXePuWo5HBpWw68KJcwObZDHpFk6HKH6MYQ==} + peerDependencies: + graphology-types: '>=0.20.0' + + graphology-layout-forceatlas2@0.10.1: + resolution: {integrity: sha512-ogzBeF1FvWzjkikrIFwxhlZXvD2+wlY54lqhsrWprcdPjopM2J9HoMweUmIgwaTvY4bUYVimpSsOdvDv1gPRFQ==} + peerDependencies: + graphology-types: '>=0.19.0' + + graphology-layout-noverlap@0.4.2: + resolution: {integrity: sha512-13WwZSx96zim6l1dfZONcqLh3oqyRcjIBsqz2c2iJ3ohgs3605IDWjldH41Gnhh462xGB1j6VGmuGhZ2FKISXA==} + peerDependencies: + graphology-types: '>=0.19.0' + + graphology-layout@0.6.1: + resolution: {integrity: sha512-m9aMvbd0uDPffUCFPng5ibRkb2pmfNvdKjQWeZrf71RS1aOoat5874+DcyNfMeCT4aQguKC7Lj9eCbqZj/h8Ag==} + peerDependencies: + graphology-types: '>=0.19.0' + + graphology-types@0.24.8: + resolution: {integrity: sha512-hDRKYXa8TsoZHjgEaysSRyPdT6uB78Ci8WnjgbStlQysz7xR52PInxNsmnB7IBOM1BhikxkNyCVEFgmPKnpx3Q==} + + graphology-utils@2.5.2: + resolution: {integrity: sha512-ckHg8MXrXJkOARk56ZaSCM1g1Wihe2d6iTmz1enGOz4W/l831MBCKSayeFQfowgF8wd+PQ4rlch/56Vs/VZLDQ==} + peerDependencies: + graphology-types: '>=0.23.0' + + graphology@0.25.4: + resolution: {integrity: sha512-33g0Ol9nkWdD6ulw687viS8YJQBxqG5LWII6FI6nul0pq6iM2t5EKquOTFDbyTblRB3O9I+7KX4xI8u5ffekAQ==} + peerDependencies: + graphology-types: '>=0.24.0' + hachure-fill@0.5.2: resolution: {integrity: sha512-3GKBOn+m2LX9iq+JC1064cSFprJY4jL1jCXTcpnfER5HYE2l/4EfWSGzkPa/ZDBmYI0ZOEj5VHV/eKnPGkHuOg==} @@ -10409,6 +10462,9 @@ packages: mlly@1.7.4: resolution: {integrity: sha512-qmdSIPC4bDJXgZTCR7XosJiNKySV7O215tsPtDN9iEO/7q/76b/ijtgRu/+epFXSJhijtTCCGp3DWS549P3xKw==} + mnemonist@0.39.8: + resolution: {integrity: sha512-vyWo2K3fjrUw8YeeZ1zF0fy6Mu59RHokURlld8ymdUPjMlD9EC9ov1/YPqTgqRvUN9nTr3Gqfz29LYAmu0PHPQ==} + mocha@10.8.2: resolution: {integrity: sha512-VZlYo/WE8t1tstuRmqgeyBgCbJc/lEdopaa+axcKzTBJ+UIdlAB9XnmvTCAH4pwR4ElNInaedhEBmZD8iCSVEg==} engines: {node: '>= 14.0.0'} @@ -10635,6 +10691,9 @@ packages: resolution: {integrity: sha512-nK28WOo+QIjBkDduTINE4JkF/UJJKyf2EJxvJKfblDpyg0Q+pkOHNTL0Qwy6NP6FhE/EnzV73BxxqcJaXY9anw==} engines: {node: '>= 0.4'} + obliterator@2.0.5: + resolution: {integrity: sha512-42CPE9AhahZRsMNslczq0ctAEtqk8Eka26QofnqC346BZdHDySk3LWka23LI7ULIw11NmltpiLagIq8gBozxTw==} + obuf@1.1.2: resolution: {integrity: sha512-PX1wu0AmAdPqOL1mWhqmlOd8kOIZQwGZw6rh7uby9fTc5lhaOWFLX3I6R1hrF9k3zUY40e6igsLGkDXK92LJNg==} @@ -10765,6 +10824,9 @@ packages: pako@1.0.11: resolution: {integrity: sha512-4hLB8Py4zZce5s4yd9XzopqwVv/yGNhV1Bl8NTmCq1763HeK2+EwVTv+leGeL13Dnh2wfbqowVPXCIO0z4taYw==} + pandemonium@2.4.1: + resolution: {integrity: sha512-wRqjisUyiUfXowgm7MFH2rwJzKIr20rca5FsHXCMNm1W5YPP1hCtrZfgmQ62kP7OZ7Xt+cR858aB28lu5NX55g==} + param-case@3.0.4: resolution: {integrity: sha512-RXlj7zCYokReqWpOPH9oYivUzLYZ5vAPIfEmCTNViosC78F8F0H9y7T7gG2M39ymgutxF5gcFEsyZQSph9Bp3A==} @@ -20168,6 +20230,48 @@ snapshots: dependencies: lodash: 4.17.21 + graphology-communities-louvain@2.0.2(graphology-types@0.24.8): + dependencies: + graphology-indices: 0.17.0(graphology-types@0.24.8) + graphology-types: 0.24.8 + graphology-utils: 2.5.2(graphology-types@0.24.8) + mnemonist: 0.39.8 + pandemonium: 2.4.1 + + graphology-indices@0.17.0(graphology-types@0.24.8): + dependencies: + graphology-types: 0.24.8 + graphology-utils: 2.5.2(graphology-types@0.24.8) + mnemonist: 0.39.8 + + graphology-layout-forceatlas2@0.10.1(graphology-types@0.24.8): + dependencies: + graphology-types: 0.24.8 + graphology-utils: 2.5.2(graphology-types@0.24.8) + + graphology-layout-noverlap@0.4.2(graphology-types@0.24.8): + dependencies: + graphology-types: 0.24.8 + graphology-utils: 2.5.2(graphology-types@0.24.8) + + graphology-layout@0.6.1(graphology-types@0.24.8): + dependencies: + graphology-types: 0.24.8 + graphology-utils: 2.5.2(graphology-types@0.24.8) + pandemonium: 2.4.1 + + graphology-types@0.24.8: {} + + graphology-utils@2.5.2(graphology-types@0.24.8): + dependencies: + graphology-types: 0.24.8 + + graphology@0.25.4(graphology-types@0.24.8): + dependencies: + events: 3.3.0 + graphology-types: 0.24.8 + obliterator: 2.0.5 + hachure-fill@0.5.2: {} handle-thing@2.0.1: {} @@ -22281,6 +22385,10 @@ snapshots: pkg-types: 1.3.1 ufo: 1.6.1 + mnemonist@0.39.8: + dependencies: + obliterator: 2.0.5 + mocha@10.8.2: dependencies: ansi-colors: 4.1.3 @@ -22473,6 +22581,8 @@ snapshots: has-symbols: 1.1.0 object-keys: 1.1.1 + obliterator@2.0.5: {} + obuf@1.1.2: {} on-finished@2.4.1: @@ -22642,6 +22752,10 @@ snapshots: pako@1.0.11: {} + pandemonium@2.4.1: + dependencies: + mnemonist: 0.39.8 + param-case@3.0.4: dependencies: dot-case: 3.0.4 From 62e19ecfa457de812718f6f238b50197d3499218 Mon Sep 17 00:00:00 2001 From: Hillary Mutisya <150286414+hillary-mutisya@users.noreply.github.com> Date: Thu, 30 Oct 2025 08:19:27 -0700 Subject: [PATCH 02/10] Adding relationships to topic graph --- .../src/agent/browserActionHandler.mts | 1 + .../agent/knowledge/actions/graphActions.mts | 4 +- .../actions/knowledgeActionRouter.mts | 3 + .../conversation/hierarchicalTopicSchema.ts | 1 + .../src/conversation/hierarchicalTopics.ts | 99 ++++- ts/packages/memory/website/src/tables.ts | 58 +++ .../memory/website/src/websiteCollection.ts | 385 +++++++++++++++++- 7 files changed, 537 insertions(+), 14 deletions(-) diff --git a/ts/packages/agents/browser/src/agent/browserActionHandler.mts b/ts/packages/agents/browser/src/agent/browserActionHandler.mts index 7770511ca..d38b44ba6 100644 --- a/ts/packages/agents/browser/src/agent/browserActionHandler.mts +++ b/ts/packages/agents/browser/src/agent/browserActionHandler.mts @@ -654,6 +654,7 @@ async function processBrowserAgentMessage( case "getTopicViewportNeighborhood": case "getTopicMetrics": case "getTopicTimelines": + case "invalidateTopicCacheAction": case "getViewportBasedNeighborhood": case "testMergeTopicHierarchies": case "mergeTopicHierarchies": diff --git a/ts/packages/agents/browser/src/agent/knowledge/actions/graphActions.mts b/ts/packages/agents/browser/src/agent/knowledge/actions/graphActions.mts index d3025e37c..9fa006b95 100644 --- a/ts/packages/agents/browser/src/agent/knowledge/actions/graphActions.mts +++ b/ts/packages/agents/browser/src/agent/knowledge/actions/graphActions.mts @@ -143,7 +143,7 @@ export async function getKnowledgeGraphStatus( if (websiteCollection.knowledgeEntities) { entityCount = ( websiteCollection.knowledgeEntities as any - ).getTotalEntityCount(); + ).getUniqueEntityCount(); } } catch (error) { console.warn("Failed to get entity count:", error); @@ -1996,8 +1996,6 @@ export async function invalidateTopicCacheAction( // Ensure topic graph data is cached for fast access async function ensureTopicGraphCache(websiteCollection: any): Promise { - // TEMPORARY: Always invalidate cache to ensure fresh data - invalidateTopicCache(websiteCollection); const cache = getTopicGraphCache(websiteCollection); diff --git a/ts/packages/agents/browser/src/agent/knowledge/actions/knowledgeActionRouter.mts b/ts/packages/agents/browser/src/agent/knowledge/actions/knowledgeActionRouter.mts index 2e4c443ed..8d1338522 100644 --- a/ts/packages/agents/browser/src/agent/knowledge/actions/knowledgeActionRouter.mts +++ b/ts/packages/agents/browser/src/agent/knowledge/actions/knowledgeActionRouter.mts @@ -43,6 +43,7 @@ import { getUrlContentBreakdown, getTopicTimelines, discoverRelatedKnowledge, + invalidateTopicCacheAction, } from "./graphActions.mjs"; import { checkAIModelStatus, @@ -121,6 +122,8 @@ export async function handleKnowledgeAction( return await getUrlContentBreakdown(parameters, context); case "getTopicTimelines": return await getTopicTimelines(parameters, context); + case "invalidateTopicCacheAction": + return await invalidateTopicCacheAction(parameters, context); // Query Actions case "getRecentKnowledgeItems": diff --git a/ts/packages/knowledgeProcessor/src/conversation/hierarchicalTopicSchema.ts b/ts/packages/knowledgeProcessor/src/conversation/hierarchicalTopicSchema.ts index 28c7a6097..1aa688df6 100644 --- a/ts/packages/knowledgeProcessor/src/conversation/hierarchicalTopicSchema.ts +++ b/ts/packages/knowledgeProcessor/src/conversation/hierarchicalTopicSchema.ts @@ -8,6 +8,7 @@ export interface HierarchicalTopic { parentId?: string; // Reference to parent topic childIds: string[]; // References to child topics sourceRefOrdinals: number[]; // SemanticRef ordinals linking to knowledge topics + sourceTopicNames: string[]; // Knowledge topic names that contributed to this hierarchical topic (for co-occurrence lookup) confidence: number; // Extraction confidence (0-1) keywords: string[]; // Associated keywords entityReferences: string[]; // Related entities diff --git a/ts/packages/knowledgeProcessor/src/conversation/hierarchicalTopics.ts b/ts/packages/knowledgeProcessor/src/conversation/hierarchicalTopics.ts index db809b919..259fb87e0 100644 --- a/ts/packages/knowledgeProcessor/src/conversation/hierarchicalTopics.ts +++ b/ts/packages/knowledgeProcessor/src/conversation/hierarchicalTopics.ts @@ -307,12 +307,19 @@ JSON OUTPUT:`; // Build hierarchy from LLM response for (const rootData of hierarchyData) { + // Check if root topic name matches any knowledge topics + const rootSourceNames = findMatchingKnowledgeTopics( + rootData.rootTopic, + topics, + ); + const rootTopic: HierarchicalTopic = { id: generateTopicId(rootData.rootTopic, 0), name: rootData.rootTopic, level: 0, childIds: [], sourceRefOrdinals: [], + sourceTopicNames: rootSourceNames, confidence: 0.8, keywords: [rootData.rootTopic], entityReferences: [], @@ -325,6 +332,12 @@ JSON OUTPUT:`; // Add children for (const childName of rootData.children || []) { + // Check if child topic name matches any knowledge topics + const childSourceNames = findMatchingKnowledgeTopics( + childName, + topics, + ); + const childTopic: HierarchicalTopic = { id: generateTopicId(childName, 1), name: childName, @@ -332,6 +345,7 @@ JSON OUTPUT:`; parentId: rootTopic.id, childIds: [], sourceRefOrdinals: [], + sourceTopicNames: childSourceNames, confidence: 0.7, keywords: [childName], entityReferences: [], @@ -347,6 +361,12 @@ JSON OUTPUT:`; const grandchildrenForThisChild = rootData.grandchildren?.[childName] || []; for (const grandchildName of grandchildrenForThisChild) { + // Check if grandchild topic name matches any knowledge topics + const grandchildSourceNames = findMatchingKnowledgeTopics( + grandchildName, + topics, + ); + const grandchildTopic: HierarchicalTopic = { id: generateTopicId(grandchildName, 2), name: grandchildName, @@ -354,6 +374,7 @@ JSON OUTPUT:`; parentId: childTopic.id, childIds: [], sourceRefOrdinals: [], + sourceTopicNames: grandchildSourceNames, confidence: 0.6, keywords: [grandchildName], entityReferences: [], @@ -381,12 +402,18 @@ JSON OUTPUT:`; // Build hierarchy from fallback data for (const rootData of hierarchyData) { + const rootSourceNames = findMatchingKnowledgeTopics( + rootData.rootTopic, + topics, + ); + const rootTopic: HierarchicalTopic = { id: generateTopicId(rootData.rootTopic, 0), name: rootData.rootTopic, level: 0, childIds: [], sourceRefOrdinals: [], + sourceTopicNames: rootSourceNames, confidence: 0.6, // Lower confidence for fallback keywords: [rootData.rootTopic], entityReferences: [], @@ -399,6 +426,11 @@ JSON OUTPUT:`; // Add children from fallback for (const childName of rootData.children || []) { + const childSourceNames = findMatchingKnowledgeTopics( + childName, + topics, + ); + const childTopic: HierarchicalTopic = { id: generateTopicId(childName, 1), name: childName, @@ -406,6 +438,7 @@ JSON OUTPUT:`; parentId: rootTopic.id, childIds: [], sourceRefOrdinals: [], + sourceTopicNames: childSourceNames, confidence: 0.5, // Lower confidence for fallback keywords: [childName], entityReferences: [], @@ -425,12 +458,18 @@ JSON OUTPUT:`; // Fallback: If LLM failed or produced no results, use simple rule-based approach if (rootTopics.length === 0 && aggregatedTopics.length > 0) { + const rootSourceNames = findMatchingKnowledgeTopics( + aggregatedTopics[0], + topics, + ); + const rootTopic: HierarchicalTopic = { id: generateTopicId(aggregatedTopics[0], 0), name: aggregatedTopics[0], level: 0, childIds: [], sourceRefOrdinals: [], + sourceTopicNames: rootSourceNames, confidence: 0.8, keywords: [aggregatedTopics[0]], entityReferences: [], @@ -442,6 +481,7 @@ JSON OUTPUT:`; // Add topics as children for (let i = 0; i < Math.min(topics.length, 15); i++) { + // Each child topic directly corresponds to a knowledge topic const childTopic: HierarchicalTopic = { id: generateTopicId(topics[i], 1), name: topics[i], @@ -449,6 +489,7 @@ JSON OUTPUT:`; parentId: rootTopic.id, childIds: [], sourceRefOrdinals: [], + sourceTopicNames: [topics[i]], // Direct 1:1 mapping confidence: 0.6, keywords: [topics[i]], entityReferences: [], @@ -475,15 +516,28 @@ function enrichHierarchy( fragmentExtractions: FragmentTopicExtraction[], context: TopicExtractionContext, ): TopicHierarchy { - // Add semanticRef ordinal information + // Add semanticRef ordinal information and sourceTopicNames for (const extraction of fragmentExtractions) { for (const [, topic] of hierarchy.topicMap) { - if (extraction.topics.includes(topic.name)) { - const ordinal = typeof extraction.fragmentId === 'number' - ? extraction.fragmentId - : parseInt(extraction.fragmentId, 10); - if (!isNaN(ordinal)) { - topic.sourceRefOrdinals.push(ordinal); + // Check if this hierarchical topic matches any fragment topics + for (const fragmentTopic of extraction.topics) { + const normalizedTopicName = topic.name.toLowerCase().trim(); + const normalizedFragmentTopic = fragmentTopic.toLowerCase().trim(); + + if (normalizedTopicName === normalizedFragmentTopic) { + // Add ordinal + const ordinal = typeof extraction.fragmentId === 'number' + ? extraction.fragmentId + : parseInt(extraction.fragmentId, 10); + if (!isNaN(ordinal)) { + topic.sourceRefOrdinals.push(ordinal); + } + + // Add to sourceTopicNames if not already present + if (!topic.sourceTopicNames.includes(fragmentTopic)) { + topic.sourceTopicNames.push(fragmentTopic); + } + break; } } } @@ -510,7 +564,7 @@ function mergeHierarchies( mergedRootTopics.push(topic); } } else { - // Merge sourceRefOrdinals for existing topics + // Merge sourceRefOrdinals and sourceTopicNames for existing topics const existingTopic = mergedTopicMap.get(topicId)!; existingTopic.sourceRefOrdinals = [ ...new Set([ @@ -518,6 +572,12 @@ function mergeHierarchies( ...topic.sourceRefOrdinals, ]), ]; + existingTopic.sourceTopicNames = [ + ...new Set([ + ...existingTopic.sourceTopicNames, + ...topic.sourceTopicNames, + ]), + ]; } } @@ -558,6 +618,29 @@ function calculateKeywordOverlap( return intersection.size / union.size; } +/** + * Find knowledge topics that match a hierarchical topic name + * Uses case-insensitive comparison to match topic names + */ +function findMatchingKnowledgeTopics( + hierarchicalTopicName: string, + knowledgeTopics: string[], +): string[] { + const matches: string[] = []; + const normalizedHierarchicalName = hierarchicalTopicName.toLowerCase().trim(); + + for (const knowledgeTopic of knowledgeTopics) { + const normalizedKnowledgeName = knowledgeTopic.toLowerCase().trim(); + + // Exact match (case-insensitive) + if (normalizedHierarchicalName === normalizedKnowledgeName) { + matches.push(knowledgeTopic); + } + } + + return matches; +} + /** * Creates a fallback hierarchy when JSON parsing fails */ diff --git a/ts/packages/memory/website/src/tables.ts b/ts/packages/memory/website/src/tables.ts index 32959d13a..d17ebaef9 100644 --- a/ts/packages/memory/website/src/tables.ts +++ b/ts/packages/memory/website/src/tables.ts @@ -243,6 +243,16 @@ export class KnowledgeEntityTable extends ms.sqlite.SqliteDataFrame { const result = stmt.get() as { count: number }; return result.count; } + + public getUniqueEntityCount(): number { + const stmt = this.db.prepare(` + SELECT COUNT(DISTINCT entityName) as count + FROM knowledgeEntities + WHERE entityName != '' AND entityName IS NOT NULL + `); + const result = stmt.get() as { count: number }; + return result.count; + } } // Knowledge topics table @@ -615,11 +625,14 @@ export interface HierarchicalTopicRecord { parentTopicId?: string; confidence: number; keywords?: string; // JSON array stored as string + sourceTopicNames?: string; // JSON array of knowledge topic names stored as string extractionDate: string; } export class HierarchicalTopicTable extends ms.sqlite.SqliteDataFrame { constructor(public db: sqlite.Database) { + HierarchicalTopicTable.migrateSchema(db); + super(db, "hierarchicalTopics", [ ["url", { type: "string" }], ["domain", { type: "string" }], @@ -629,10 +642,55 @@ export class HierarchicalTopicTable extends ms.sqlite.SqliteDataFrame { ["parentTopicId", { type: "string", optional: true }], ["confidence", { type: "number" }], ["keywords", { type: "string", optional: true }], + ["sourceTopicNames", { type: "string", optional: true }], ["extractionDate", { type: "string" }], ]); } + private static migrateSchema(db: sqlite.Database): void { + try { + // Check if table exists + const tableInfo = db + .prepare( + "SELECT sql FROM sqlite_master WHERE type='table' AND name='hierarchicalTopics'", + ) + .get() as { sql?: string } | undefined; + + if (!tableInfo) { + // Table doesn't exist yet, will be created by super constructor + return; + } + + // Check if sourceTopicNames column exists + const columnInfo = db + .prepare("PRAGMA table_info(hierarchicalTopics)") + .all() as Array<{ name: string }>; + + const hasSourceTopicNames = columnInfo.some( + (col) => col.name === "sourceTopicNames", + ); + + if (!hasSourceTopicNames) { + console.log( + "[HierarchicalTopicTable] Migrating schema: Adding sourceTopicNames column", + ); + // Add the missing column + db.exec(` + ALTER TABLE hierarchicalTopics + ADD COLUMN sourceTopicNames TEXT + `); + console.log( + "[HierarchicalTopicTable] Migration complete: sourceTopicNames column added", + ); + } + } catch (error) { + console.warn( + "[HierarchicalTopicTable] Schema migration warning:", + error, + ); + } + } + public getTopicsByLevel(level: number): HierarchicalTopicRecord[] { const stmt = this.db.prepare(` SELECT * FROM hierarchicalTopics diff --git a/ts/packages/memory/website/src/websiteCollection.ts b/ts/packages/memory/website/src/websiteCollection.ts index a6b847449..2a98f49e2 100644 --- a/ts/packages/memory/website/src/websiteCollection.ts +++ b/ts/packages/memory/website/src/websiteCollection.ts @@ -1886,13 +1886,16 @@ export class WebsiteCollection try { // First, check if websites already have rich hierarchies from extraction const websites = this.getWebsites(); + debug(`[Knowledge Graph] Total websites: ${websites.length}`); const websitesToProcess = urlLimit ? websites.slice(0, urlLimit) : websites; + debug(`[Knowledge Graph] Processing ${websitesToProcess.length} websites for hierarchies`); const websitesWithHierarchies = websitesToProcess.filter( (w) => (w.knowledge as any)?.topicHierarchy, ); + debug(`[Knowledge Graph] Found ${websitesWithHierarchies.length} websites with existing hierarchies`); if (websitesWithHierarchies.length > 0) { // Clear existing hierarchical topics before rebuilding @@ -1901,15 +1904,19 @@ export class WebsiteCollection "DELETE FROM hierarchicalTopics", ); clearStmt.run(); + debug(`[Knowledge Graph] Cleared existing hierarchical topics`); } // Use existing rich hierarchies from websites + debug(`[Knowledge Graph] Using rich hierarchies from ${websitesWithHierarchies.length} websites`); await this.updateHierarchicalTopics(websitesWithHierarchies); return; } // No existing hierarchies, fall back to building from flat topics + debug(`[Knowledge Graph] No websites with hierarchies, extracting flat topics...`); const flatTopics = await this.extractFlatTopics(urlLimit); + debug(`[Knowledge Graph] Extracted ${flatTopics.length} flat topics`); if (flatTopics.length === 0) { return; @@ -1931,6 +1938,7 @@ export class WebsiteCollection let topicExtractor: any; try { // Try to create AI model for topic merging + debug(`[Knowledge Graph] Creating AI model for topic extraction...`); const apiSettings = ai.openai.azureApiSettingsFromEnv( ai.openai.ModelType.Chat, undefined, @@ -1939,16 +1947,20 @@ export class WebsiteCollection const languageModel = ai.openai.createChatModel(apiSettings); topicExtractor = kpLib.conversation.createTopicExtractor(languageModel); + debug(`[Knowledge Graph] AI model created successfully`); } catch (error) { debug( `[Knowledge Graph] AI model not available for topic merging: ${error}`, ); // Fall back to simple hierarchical grouping + debug(`[Knowledge Graph] Using simple hierarchical grouping for ${flatTopics.length} topics`); await this.buildSimpleTopicHierarchy(flatTopics); + debug(`[Knowledge Graph] Simple hierarchy built`); return; } // Use AI to merge topics into higher-level topics + debug(`[Knowledge Graph] Merging ${flatTopics.length} topics into hierarchy...`); const mergeResult = await topicExtractor.mergeTopics( flatTopics, undefined, // No past topics for initial build @@ -1956,8 +1968,10 @@ export class WebsiteCollection ); if (mergeResult && mergeResult.status === "Success") { + debug(`[Knowledge Graph] Topic merge successful: ${mergeResult.topic}`); // Store the merged topic as root const rootTopicId = this.generateTopicId(mergeResult.topic, 0); + debug(`[Knowledge Graph] Storing root topic: ${rootTopicId}`); await this.storeHierarchicalTopic( { topicId: rootTopicId, @@ -1971,16 +1985,19 @@ export class WebsiteCollection ); // Organize flat topics under the root + debug(`[Knowledge Graph] Organizing ${flatTopics.length} topics under root`); await this.organizeTopicsUnderRoot( flatTopics, rootTopicId, ); + debug(`[Knowledge Graph] Topics organized successfully`); } else { // Fall back to simple hierarchy if merging fails debug( - `[Knowledge Graph] Topic merging failed, using simple hierarchy`, + `[Knowledge Graph] Topic merging failed (status: ${mergeResult?.status}), using simple hierarchy`, ); await this.buildSimpleTopicHierarchy(flatTopics); + debug(`[Knowledge Graph] Simple hierarchy built`); } debug( @@ -2107,8 +2124,15 @@ export class WebsiteCollection // Merge sourceRefOrdinals to track semanticRefs that contributed to this topic existingTopic.sourceRefOrdinals = [ ...new Set([ - ...existingTopic.sourceRefOrdinals, - ...topic.sourceRefOrdinals, + ...(existingTopic.sourceRefOrdinals || []), + ...(topic.sourceRefOrdinals || []), + ]), + ]; + // Merge sourceTopicNames for hierarchical aggregation + existingTopic.sourceTopicNames = [ + ...new Set([ + ...(existingTopic.sourceTopicNames || []), + ...(topic.sourceTopicNames || []), ]), ]; @@ -2116,6 +2140,14 @@ export class WebsiteCollection } } + // Calculate sibling relationships + const siblingRels = this.calculateSiblingRelationships( + mergedTopicMap as Map, + ); + for (const rel of siblingRels) { + this.topicRelationships?.upsertRelationship(rel); + } + return { rootTopics: mergedRootTopics, topicMap: mergedTopicMap, @@ -2160,6 +2192,7 @@ export class WebsiteCollection ...(parentTopicId ? { parentTopicId } : {}), confidence: topic.confidence, keywords: topic.keywords, + sourceTopicNames: topic.sourceTopicNames, }, urlInfo.url, urlInfo.domain, @@ -3061,6 +3094,7 @@ Determine the appropriate relationship action based on the PairwiseTopicRelation parentTopicId?: string; confidence: number; keywords: string[]; + sourceTopicNames?: string[]; }, websiteUrl: string, websiteDomain: string, @@ -3083,6 +3117,9 @@ Determine the appropriate relationship action based on the PairwiseTopicRelation parentTopicId: topic.parentTopicId, confidence: topic.confidence, keywords: JSON.stringify(topic.keywords), + sourceTopicNames: topic.sourceTopicNames + ? JSON.stringify(topic.sourceTopicNames) + : undefined, extractionDate: new Date().toISOString(), }, }; @@ -3443,6 +3480,31 @@ Determine the appropriate relationship action based on the PairwiseTopicRelation entityRelationsByTopic.get(rel.topicId)!.push(rel); } + // Calculate co-occurrence relationships + debug(`[Knowledge Graph] Calculating co-occurrence relationships`); + const cooccurrenceRels = this.calculateCooccurrenceRelationships( + topicIds, + cacheManager, + ); + debug( + `[Knowledge Graph] Found ${cooccurrenceRels.length} co-occurrence relationships`, + ); + + // Calculate entity-mediated relationships + debug(`[Knowledge Graph] Calculating entity-mediated relationships`); + const entityRels = await this.calculateEntityMediatedRelationships( + topicIds, + entityRelationsByTopic, + ); + debug( + `[Knowledge Graph] Found ${entityRels.length} entity-mediated relationships`, + ); + + // Store all relationships + for (const rel of [...cooccurrenceRels, ...entityRels]) { + this.topicRelationships?.upsertRelationship(rel); + } + // Calculate metrics for each topic using pre-fetched data for (const topicId of uniqueTopics) { const topic = this.hierarchicalTopics?.getTopicById(topicId); @@ -3508,4 +3570,321 @@ Determine the appropriate relationship action based on the PairwiseTopicRelation const topic = allTopics.find((t) => t.topicName === topicName); return topic?.topicId || null; } + + /** + * Calculate co-occurrence relationships using bottom-up hierarchical aggregation + * + * Algorithm: + * 1. Leaf topics: Get co-occurrences from GraphBuildingCache using sourceTopicNames + * 2. Parent topics: Aggregate from direct children (not from leaves) + * 3. Use intermediate cache to avoid re-computation + */ + private calculateCooccurrenceRelationships( + topicIds: string[], + cacheManager: any, + ): any[] { + const relationships: any[] = []; + + // Working cache: hierarchicalTopicId → (otherTopicId → {count, strength, sources}) + const hierarchicalCooccurrenceCache = new Map>(); + + // Group topics by level for bottom-up processing + const topicsByLevel = new Map(); + for (const topicId of topicIds) { + const topic = this.hierarchicalTopics?.getTopicById(topicId); + if (!topic) continue; + + if (!topicsByLevel.has(topic.level)) { + topicsByLevel.set(topic.level, []); + } + topicsByLevel.get(topic.level)!.push(topic); + } + + // Process level by level, starting from leaves (highest level number) + const levels = Array.from(topicsByLevel.keys()).sort((a, b) => b - a); + + for (const level of levels) { + const topicsAtLevel = topicsByLevel.get(level)!; + + for (const topic of topicsAtLevel) { + const topicCooccurrences = new Map(); + + if (topic.childIds && topic.childIds.length > 0) { + // Parent topic: aggregate from direct children + for (const childId of topic.childIds) { + const childCooccurrences = hierarchicalCooccurrenceCache.get(childId); + if (!childCooccurrences) continue; + + for (const [otherTopicId, cooccurData] of childCooccurrences) { + if (!topicCooccurrences.has(otherTopicId)) { + topicCooccurrences.set(otherTopicId, { + count: 0, + sources: new Set(), + combinations: 0, + }); + } + const existing = topicCooccurrences.get(otherTopicId)!; + existing.count += cooccurData.count; + cooccurData.sources?.forEach((src: string) => existing.sources.add(src)); + existing.combinations += 1; + } + } + } else { + // Leaf topic: get from GraphBuildingCache using sourceTopicNames + const sourceNames = topic.sourceTopicNames || [topic.topicName]; + + for (const sourceName of sourceNames) { + // Get all co-occurrences for this source topic name + const allCooccurrences = cacheManager.getAllTopicRelationships(); + + for (const cooccur of allCooccurrences) { + if (cooccur.fromTopic !== sourceName && cooccur.toTopic !== sourceName) { + continue; + } + + // Find the other topic in the co-occurrence + const otherTopicName = cooccur.fromTopic === sourceName + ? cooccur.toTopic + : cooccur.fromTopic; + + // Map knowledge topic name to hierarchical topic ID + const otherHierarchicalTopic = this.findHierarchicalTopicBySourceName( + otherTopicName, + topicIds, + ); + + if (!otherHierarchicalTopic) continue; + + if (!topicCooccurrences.has(otherHierarchicalTopic.topicId)) { + topicCooccurrences.set(otherHierarchicalTopic.topicId, { + count: 0, + sources: new Set(), + combinations: 0, + }); + } + const existing = topicCooccurrences.get(otherHierarchicalTopic.topicId)!; + existing.count += cooccur.count; + cooccur.sources?.forEach((src: string) => existing.sources.add(src)); + existing.combinations += 1; + } + } + } + + // Store in cache for parent nodes to use + hierarchicalCooccurrenceCache.set(topic.topicId, topicCooccurrences); + } + } + + // Convert cache to relationships + for (const [fromTopicId, cooccurrences] of hierarchicalCooccurrenceCache) { + const fromTopic = this.hierarchicalTopics?.getTopicById(fromTopicId); + if (!fromTopic) continue; + + for (const [toTopicId, cooccurData] of cooccurrences) { + if (fromTopicId >= toTopicId) continue; // Avoid duplicates + + const toTopic = this.hierarchicalTopics?.getTopicById(toTopicId); + if (!toTopic) continue; + + // Calculate aggregate strength + const avgCount = cooccurData.count / (cooccurData.combinations || 1); + const sourceArray = Array.from(cooccurData.sources); + + // Normalize by document coverage + const strength = Math.min( + avgCount / Math.min( + sourceArray.length || 1, + 10, // cap for reasonable normalization + ), + 1.0 + ); + + if (strength < 0.1) continue; // Filter weak relationships + + relationships.push({ + fromTopic: fromTopicId, + toTopic: toTopicId, + relationshipType: "CO_OCCURS", + strength, + metadata: JSON.stringify({ + cooccurrenceCount: cooccurData.count, + commonDocuments: sourceArray.length, + aggregatedFrom: cooccurData.combinations, + }), + sourceUrls: JSON.stringify(sourceArray.slice(0, 10)), + cooccurrenceCount: cooccurData.count, + firstSeen: fromTopic.extractionDate || new Date().toISOString(), + lastSeen: toTopic.extractionDate || new Date().toISOString(), + updated: new Date().toISOString(), + }); + } + } + + return relationships; + } + + /** + * Find hierarchical topic that has the given source topic name + */ + private findHierarchicalTopicBySourceName( + sourceName: string, + topicIds: string[], + ): any | null { + for (const topicId of topicIds) { + const topic = this.hierarchicalTopics?.getTopicById(topicId); + if (!topic) continue; + + const sourceNames = topic.sourceTopicNames || [topic.topicName]; + if (sourceNames.includes(sourceName)) { + return topic; + } + } + return null; + } + + /** + * Calculate entity-mediated relationships using bottom-up hierarchical aggregation + * + * Algorithm: + * 1. Leaf topics: Get entities from entityRelationsByTopic + * 2. Parent topics: Aggregate entities from direct children (union of entity sets) + * 3. Calculate pairwise entity overlap using aggregated entity sets + */ + private async calculateEntityMediatedRelationships( + topicIds: string[], + entityRelationsByTopic: Map, + ): Promise { + const relationships: any[] = []; + + // Working cache: hierarchicalTopicId → Set + const hierarchicalEntityCache = new Map>(); + + // Group topics by level for bottom-up processing + const topicsByLevel = new Map(); + for (const topicId of topicIds) { + const topic = this.hierarchicalTopics?.getTopicById(topicId); + if (!topic) continue; + + if (!topicsByLevel.has(topic.level)) { + topicsByLevel.set(topic.level, []); + } + topicsByLevel.get(topic.level)!.push(topic); + } + + // Process level by level, starting from leaves (highest level number) + const levels = Array.from(topicsByLevel.keys()).sort((a, b) => b - a); + + for (const level of levels) { + const topicsAtLevel = topicsByLevel.get(level)!; + + for (const topic of topicsAtLevel) { + const topicEntities = new Set(); + + if (topic.childIds && topic.childIds.length > 0) { + // Parent topic: aggregate entities from direct children (union) + for (const childId of topic.childIds) { + const childEntities = hierarchicalEntityCache.get(childId); + if (!childEntities) continue; + + for (const entity of childEntities) { + topicEntities.add(entity); + } + } + } else { + // Leaf topic: get entities from entityRelationsByTopic + const entities = entityRelationsByTopic.get(topic.topicId) || []; + for (const entity of entities) { + topicEntities.add(entity.entityName); + } + } + + // Store in cache for parent nodes to use + hierarchicalEntityCache.set(topic.topicId, topicEntities); + } + } + + // Calculate pairwise entity overlap using aggregated entity sets + for (let i = 0; i < topicIds.length; i++) { + for (let j = i + 1; j < topicIds.length; j++) { + const topicA = topicIds[i]; + const topicB = topicIds[j]; + + const entitiesA = hierarchicalEntityCache.get(topicA) || new Set(); + const entitiesB = hierarchicalEntityCache.get(topicB) || new Set(); + + if (entitiesA.size === 0 || entitiesB.size === 0) continue; + + // Calculate shared entities + const shared = Array.from(entitiesA).filter((e) => + entitiesB.has(e), + ); + + if (shared.length === 0) continue; + + // Calculate strength as Jaccard similarity (intersection / union) + const unionSize = new Set([...entitiesA, ...entitiesB]).size; + const strength = shared.length / unionSize; + + // Only create relationship if strength is significant + if (strength < 0.1) continue; + + relationships.push({ + fromTopic: topicA, + toTopic: topicB, + relationshipType: "RELATED_VIA_ENTITY", + strength, + metadata: JSON.stringify({ + sharedEntities: shared.slice(0, 10), + sharedEntityCount: shared.length, + entityOverlapRatio: strength, + totalEntitiesA: entitiesA.size, + totalEntitiesB: entitiesB.size, + }), + updated: new Date().toISOString(), + }); + } + } + + return relationships; + } + + /** + * Calculate sibling relationships from hierarchical structure + */ + private calculateSiblingRelationships(topicMap: Map): any[] { + const relationships: any[] = []; + const parentToChildren = new Map(); + + // Group children by parent + for (const [topicId, topic] of topicMap) { + if (topic.parentId) { + if (!parentToChildren.has(topic.parentId)) { + parentToChildren.set(topic.parentId, []); + } + parentToChildren.get(topic.parentId)!.push(topicId); + } + } + + // Create sibling relationships + for (const [parentId, children] of parentToChildren) { + for (let i = 0; i < children.length; i++) { + for (let j = i + 1; j < children.length; j++) { + const parent = topicMap.get(parentId); + relationships.push({ + fromTopic: children[i], + toTopic: children[j], + relationshipType: "SIBLING", + strength: 0.8, + metadata: JSON.stringify({ + parentTopic: parent?.name, + sharedParentId: parentId, + }), + updated: new Date().toISOString(), + }); + } + } + } + + return relationships; + } } From e53c87f67385aeea9ae78612683e2a44660e298a Mon Sep 17 00:00:00 2001 From: Hillary Mutisya <150286414+hillary-mutisya@users.noreply.github.com> Date: Thu, 30 Oct 2025 22:10:22 -0700 Subject: [PATCH 03/10] Use graphology for graph metrics in addition to layout --- .gitignore | 3 + ts/packages/memory/website/package.json | 3 + .../src/buildTopicGraphWithGraphology.ts | 68 ++ .../website/src/graph/graphStateManager.ts | 132 ++++ .../website/src/graph/incrementalUpdater.ts | 459 ++++++++++++++ .../website/src/graph/metricsCalculator.ts | 181 ++++++ .../website/src/graph/topicGraphBuilder.ts | 382 ++++++++++++ .../memory/website/src/websiteCollection.ts | 589 ++++-------------- ts/pnpm-lock.yaml | 42 ++ 9 files changed, 1375 insertions(+), 484 deletions(-) create mode 100644 ts/packages/memory/website/src/buildTopicGraphWithGraphology.ts create mode 100644 ts/packages/memory/website/src/graph/graphStateManager.ts create mode 100644 ts/packages/memory/website/src/graph/incrementalUpdater.ts create mode 100644 ts/packages/memory/website/src/graph/metricsCalculator.ts create mode 100644 ts/packages/memory/website/src/graph/topicGraphBuilder.ts diff --git a/.gitignore b/.gitignore index 3fe2f10eb..9bd26ad85 100644 --- a/.gitignore +++ b/.gitignore @@ -446,3 +446,6 @@ android/samples/mobile/.idea/deploymentTargetSelector.xml # Mac OS files .DS_Store +/ts/.claude +/ts/packages/cli/.collection-backups +/ts/packages/cli/.import-states diff --git a/ts/packages/memory/website/package.json b/ts/packages/memory/website/package.json index 9086e04f9..b8679f2fe 100644 --- a/ts/packages/memory/website/package.json +++ b/ts/packages/memory/website/package.json @@ -36,6 +36,9 @@ "debug": "^4.4.0", "dompurify": "^3.2.6", "get-folder-size": "^5.0.0", + "graphology": "^0.25.4", + "graphology-communities-louvain": "^2.0.1", + "graphology-metrics": "^2.1.0", "jsdom": "^26.1.0", "knowledge-processor": "workspace:*", "knowpro": "workspace:*", diff --git a/ts/packages/memory/website/src/buildTopicGraphWithGraphology.ts b/ts/packages/memory/website/src/buildTopicGraphWithGraphology.ts new file mode 100644 index 000000000..31cda217c --- /dev/null +++ b/ts/packages/memory/website/src/buildTopicGraphWithGraphology.ts @@ -0,0 +1,68 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +import registerDebug from "debug"; +import { TopicGraphBuilder, type CooccurrenceData } from "./graph/topicGraphBuilder.js"; +import { MetricsCalculator } from "./graph/metricsCalculator.js"; +import type { HierarchicalTopicRecord } from "./tables.js"; + +const debug = registerDebug("typeagent:website:buildTopicGraph"); + +export async function buildTopicGraphWithGraphology( + hierarchicalTopics: HierarchicalTopicRecord[], + cacheManager: any, + topicRelationshipsTable: any, + topicMetricsTable: any, +): Promise { + debug(`Building topic graph for ${hierarchicalTopics.length} hierarchical topics`); + + const cooccurrences = extractCooccurrencesFromCache(cacheManager); + debug(`Extracted ${cooccurrences.length} cooccurrences from cache`); + + const graphBuilder = new TopicGraphBuilder(); + const { flatGraph, hierarchicalGraph } = graphBuilder.buildFromTopicHierarchy( + hierarchicalTopics, + cooccurrences, + ); + + debug(`Graphs built: flat=${flatGraph.order} nodes, hierarchical=${hierarchicalGraph.order} nodes`); + + const relationships = graphBuilder.exportToTopicRelationships(); + debug(`Exporting ${relationships.length} topic relationships to database`); + + for (const rel of relationships) { + topicRelationshipsTable.upsertRelationship(rel); + } + + const metricsCalculator = new MetricsCalculator(); + const topicCounts = metricsCalculator.calculateTopicCounts( + hierarchicalTopics.map((t) => ({ + topicId: t.topicId, + url: t.url, + domain: t.domain, + })), + ); + + const { topicMetrics, communities } = metricsCalculator.calculateMetrics( + hierarchicalGraph, + topicCounts, + ); + + debug(`Calculated metrics for ${topicMetrics.size} topics, ${communities.size} communities`); + + for (const [, metrics] of topicMetrics) { + topicMetricsTable.upsertMetrics(metrics); + } + + debug(`Topic graph build complete`); +} + +function extractCooccurrencesFromCache(cacheManager: any): CooccurrenceData[] { + const cachedRelationships = cacheManager.getAllTopicRelationships(); + return cachedRelationships.map((rel: any) => ({ + fromTopic: rel.fromTopic, + toTopic: rel.toTopic, + count: rel.count, + urls: rel.sources || [], + })); +} diff --git a/ts/packages/memory/website/src/graph/graphStateManager.ts b/ts/packages/memory/website/src/graph/graphStateManager.ts new file mode 100644 index 000000000..950ffe831 --- /dev/null +++ b/ts/packages/memory/website/src/graph/graphStateManager.ts @@ -0,0 +1,132 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +import { createRequire } from "module"; +import registerDebug from "debug"; +import { TopicGraphBuilder, type CooccurrenceData } from "./topicGraphBuilder.js"; +import { IncrementalGraphUpdater, type WebpageKnowledge } from "./incrementalUpdater.js"; +import { MetricsCalculator } from "./metricsCalculator.js"; +import type { HierarchicalTopicRecord } from "../tables.js"; + +const require = createRequire(import.meta.url); +const Graph = require("graphology"); + +const debug = registerDebug("typeagent:website:graph:state"); + +type Graph = any; + +export class GraphStateManager { + private flatGraph: Graph | null = null; + private hierarchicalGraph: Graph | null = null; + private incrementalUpdater: IncrementalGraphUpdater | null = null; + + public async ensureGraphsInitialized( + hierarchicalTopics: HierarchicalTopicRecord[], + cooccurrences: CooccurrenceData[], + ): Promise { + if (this.flatGraph && this.hierarchicalGraph) { + debug("Graphs already initialized, skipping rebuild"); + return; + } + + debug(`Initializing graphs with ${hierarchicalTopics.length} topics, ${cooccurrences.length} cooccurrences`); + + const graphBuilder = new TopicGraphBuilder(); + const graphs = graphBuilder.buildFromTopicHierarchy( + hierarchicalTopics, + cooccurrences, + ); + + this.flatGraph = graphs.flatGraph; + this.hierarchicalGraph = graphs.hierarchicalGraph; + + this.incrementalUpdater = new IncrementalGraphUpdater( + this.flatGraph, + this.hierarchicalGraph, + ); + + debug(`Graphs initialized: flat=${this.flatGraph.order} nodes, hierarchical=${this.hierarchicalGraph.order} nodes`); + } + + public async addWebpage( + knowledge: WebpageKnowledge, + ): Promise<{ addedTopics: number; updatedTopics: number; addedRelationships: number; durationMs: number }> { + if (!this.incrementalUpdater) { + throw new Error("Graphs not initialized. Call ensureGraphsInitialized() first."); + } + + const result = await this.incrementalUpdater.addWebpage(knowledge); + debug(`Added webpage in ${result.durationMs}ms: ${result.addedTopics} topics, ${result.addedRelationships} relationships`); + return result; + } + + public getGraphs(): { flatGraph: Graph | null; hierarchicalGraph: Graph | null } { + return { + flatGraph: this.flatGraph, + hierarchicalGraph: this.hierarchicalGraph, + }; + } + + public getMetrics(): Map | null { + if (!this.incrementalUpdater) { + return null; + } + return this.incrementalUpdater.getCachedMetrics(); + } + + public exportRelationships(): any[] { + if (!this.hierarchicalGraph) { + return []; + } + + const relationships: any[] = []; + const now = new Date().toISOString(); + + for (const edge of this.hierarchicalGraph.edges()) { + const attrs = this.hierarchicalGraph.getEdgeAttributes(edge); + if (attrs.type !== "cooccurrence") continue; + + const source = this.hierarchicalGraph.source(edge); + const target = this.hierarchicalGraph.target(edge); + const sourceName = this.hierarchicalGraph.getNodeAttribute(source, "topicName"); + const targetName = this.hierarchicalGraph.getNodeAttribute(target, "topicName"); + + relationships.push({ + fromTopic: source, + toTopic: target, + relationshipType: "cooccurrence", + strength: attrs.strength || 0, + metadata: JSON.stringify({ + fromTopicName: sourceName, + toTopicName: targetName, + }), + sourceUrls: JSON.stringify(attrs.urls || []), + cooccurrenceCount: attrs.count || 0, + updated: now, + }); + } + + return relationships; + } + + public async recomputeMetrics( + topicCounts?: Map, + ): Promise<{ topicMetrics: Map; communities: Map }> { + if (!this.hierarchicalGraph) { + throw new Error("Graphs not initialized. Call ensureGraphsInitialized() first."); + } + + const metricsCalculator = new MetricsCalculator(); + const result = metricsCalculator.calculateMetrics(this.hierarchicalGraph, topicCounts); + + debug(`Recomputed metrics for ${result.topicMetrics.size} topics, ${result.communities.size} communities`); + return result; + } + + public reset(): void { + this.flatGraph = null; + this.hierarchicalGraph = null; + this.incrementalUpdater = null; + debug("Graph state reset"); + } +} diff --git a/ts/packages/memory/website/src/graph/incrementalUpdater.ts b/ts/packages/memory/website/src/graph/incrementalUpdater.ts new file mode 100644 index 000000000..1531de37a --- /dev/null +++ b/ts/packages/memory/website/src/graph/incrementalUpdater.ts @@ -0,0 +1,459 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +import { createRequire } from "module"; +import registerDebug from "debug"; +import type { + HierarchicalTopicRecord, + TopicMetrics, +} from "../tables.js"; +import { MetricsCalculator } from "./metricsCalculator.js"; +import type { CooccurrenceData } from "./topicGraphBuilder.js"; + +const require = createRequire(import.meta.url); +const Graph = require("graphology"); + +const debug = registerDebug("typeagent:website:graph:incremental"); + +type Graph = any; + +export interface WebpageKnowledge { + url: string; + domain: string; + hierarchicalTopics: HierarchicalTopicRecord[]; + cooccurrences: CooccurrenceData[]; +} + +export interface UpdateResult { + addedTopics: number; + updatedTopics: number; + addedRelationships: number; + metricsRecomputed: boolean; + durationMs: number; +} + +export class IncrementalGraphUpdater { + private flatGraph: Graph; + private hierarchicalGraph: Graph; + private metricsCalculator: MetricsCalculator; + private cachedMetrics: Map | null = null; + private changedTopics: Set = new Set(); + + constructor(flatGraph: Graph, hierarchicalGraph: Graph) { + this.flatGraph = flatGraph; + this.hierarchicalGraph = hierarchicalGraph; + this.metricsCalculator = new MetricsCalculator(); + this.setupEventListeners(); + } + + public async addWebpage( + knowledge: WebpageKnowledge, + ): Promise { + const startTime = Date.now(); + debug( + `Adding webpage: ${knowledge.url}, ${knowledge.hierarchicalTopics.length} topics, ${knowledge.cooccurrences.length} cooccurrences`, + ); + + this.changedTopics.clear(); + + const addedTopics = this.updateFlatGraph(knowledge); + const updatedTopics = this.updateHierarchicalGraph(knowledge); + const addedRelationships = this.updateCooccurrences(knowledge); + + const metricsRecomputed = await this.recomputeMetrics({ + affectedOnly: true, + }); + + const durationMs = Date.now() - startTime; + debug(`Webpage added in ${durationMs}ms`); + + return { + addedTopics, + updatedTopics, + addedRelationships, + metricsRecomputed, + durationMs, + }; + } + + private updateFlatGraph(knowledge: WebpageKnowledge): number { + let addedTopics = 0; + + const leafTopics = knowledge.hierarchicalTopics.filter((topic) => { + const sourceTopicNames = topic.sourceTopicNames + ? JSON.parse(topic.sourceTopicNames) + : []; + return sourceTopicNames.length > 0; + }); + + for (const topic of leafTopics) { + if (!this.flatGraph.hasNode(topic.topicId)) { + this.flatGraph.addNode(topic.topicId, { + topicName: topic.topicName, + level: topic.level, + confidence: topic.confidence, + sourceTopicNames: topic.sourceTopicNames + ? JSON.parse(topic.sourceTopicNames) + : [], + isLeaf: true, + }); + addedTopics++; + this.changedTopics.add(topic.topicId); + } + } + + debug(`Flat graph: added ${addedTopics} new leaf topics`); + return addedTopics; + } + + private updateHierarchicalGraph(knowledge: WebpageKnowledge): number { + let updatedTopics = 0; + + for (const topic of knowledge.hierarchicalTopics) { + if (!this.hierarchicalGraph.hasNode(topic.topicId)) { + this.hierarchicalGraph.addNode(topic.topicId, { + topicName: topic.topicName, + level: topic.level, + confidence: topic.confidence, + sourceTopicNames: topic.sourceTopicNames + ? JSON.parse(topic.sourceTopicNames) + : [], + parentTopicId: topic.parentTopicId, + childIds: [], + }); + updatedTopics++; + this.changedTopics.add(topic.topicId); + + if (topic.parentTopicId) { + if (this.hierarchicalGraph.hasNode(topic.parentTopicId)) { + this.hierarchicalGraph.addDirectedEdge( + topic.parentTopicId, + topic.topicId, + { type: "parent-child" }, + ); + + const parentAttrs = + this.hierarchicalGraph.getNodeAttributes( + topic.parentTopicId, + ); + if (!parentAttrs.childIds.includes(topic.topicId)) { + parentAttrs.childIds.push(topic.topicId); + } + this.changedTopics.add(topic.parentTopicId); + } + } + } + } + + debug(`Hierarchical graph: updated ${updatedTopics} topics`); + return updatedTopics; + } + + private updateCooccurrences(knowledge: WebpageKnowledge): number { + let addedRelationships = 0; + + const leafTopics = knowledge.hierarchicalTopics.filter((topic) => { + const sourceTopicNames = topic.sourceTopicNames + ? JSON.parse(topic.sourceTopicNames) + : []; + return sourceTopicNames.length > 0; + }); + + for (const cooccur of knowledge.cooccurrences) { + const fromTopics = this.findTopicsBySourceName( + leafTopics, + cooccur.fromTopic, + ); + const toTopics = this.findTopicsBySourceName( + leafTopics, + cooccur.toTopic, + ); + + for (const fromTopic of fromTopics) { + for (const toTopic of toTopics) { + if (fromTopic.topicId === toTopic.topicId) continue; + + const edgeKey = this.getEdgeKey( + fromTopic.topicId, + toTopic.topicId, + ); + + if (this.flatGraph.hasEdge(edgeKey)) { + const current = this.flatGraph.getEdgeAttributes(edgeKey); + this.flatGraph.setEdgeAttribute( + edgeKey, + "count", + current.count + cooccur.count, + ); + this.flatGraph.setEdgeAttribute(edgeKey, "urls", [ + ...current.urls, + ...cooccur.urls, + ]); + const newStrength = this.calculateStrength( + current.count + cooccur.count, + ); + this.flatGraph.setEdgeAttribute( + edgeKey, + "strength", + newStrength, + ); + } else { + this.flatGraph.addEdge( + fromTopic.topicId, + toTopic.topicId, + { + count: cooccur.count, + urls: cooccur.urls, + strength: this.calculateStrength(cooccur.count), + }, + ); + addedRelationships++; + } + + this.changedTopics.add(fromTopic.topicId); + this.changedTopics.add(toTopic.topicId); + } + } + } + + this.propagateCooccurrencesToHierarchy(); + + debug(`Added/updated ${addedRelationships} cooccurrence relationships`); + return addedRelationships; + } + + private propagateCooccurrencesToHierarchy(): void { + const changedTopicsArray = Array.from(this.changedTopics); + + for (const topicId of changedTopicsArray) { + if (!this.hierarchicalGraph.hasNode(topicId)) continue; + + const attrs = this.hierarchicalGraph.getNodeAttributes(topicId); + + const existingCooccurrences = this.hierarchicalGraph + .edges(topicId) + .filter((edge: string) => { + const edgeAttrs = this.hierarchicalGraph.getEdgeAttributes( + edge, + ); + return edgeAttrs.type === "cooccurrence"; + }); + + for (const edge of existingCooccurrences) { + this.hierarchicalGraph.dropEdge(edge); + } + + if (this.flatGraph.hasNode(topicId)) { + for (const edge of this.flatGraph.edges(topicId)) { + const source = this.flatGraph.source(edge); + const target = this.flatGraph.target(edge); + const edgeAttrs = this.flatGraph.getEdgeAttributes(edge); + const otherNode = source === topicId ? target : source; + + if (!this.hierarchicalGraph.hasNode(otherNode)) continue; + + const hierarchicalEdgeKey = this.getEdgeKey( + topicId, + otherNode, + ); + if (!this.hierarchicalGraph.hasEdge(hierarchicalEdgeKey)) { + this.hierarchicalGraph.addEdge(topicId, otherNode, { + type: "cooccurrence", + count: edgeAttrs.count, + urls: edgeAttrs.urls, + strength: edgeAttrs.strength, + }); + } + } + } + + let currentParent = attrs.parentTopicId; + while (currentParent) { + this.changedTopics.add(currentParent); + const parentAttrs = + this.hierarchicalGraph.getNodeAttributes(currentParent); + this.aggregateCooccurrencesForNode(currentParent); + currentParent = parentAttrs.parentTopicId; + } + } + } + + private aggregateCooccurrencesForNode(topicId: string): void { + const attrs = this.hierarchicalGraph.getNodeAttributes(topicId); + if (attrs.childIds.length === 0) return; + + const existingCooccurrences = this.hierarchicalGraph + .edges(topicId) + .filter((edge: string) => { + const edgeAttrs = this.hierarchicalGraph.getEdgeAttributes(edge); + return edgeAttrs.type === "cooccurrence"; + }); + + for (const edge of existingCooccurrences) { + this.hierarchicalGraph.dropEdge(edge); + } + + const aggregatedCooccurrences = new Map< + string, + { count: number; urls: Set } + >(); + + for (const childId of attrs.childIds) { + for (const edge of this.hierarchicalGraph.edges(childId)) { + const edgeAttrs = this.hierarchicalGraph.getEdgeAttributes(edge); + if (edgeAttrs.type !== "cooccurrence") continue; + + const source = this.hierarchicalGraph.source(edge); + const target = this.hierarchicalGraph.target(edge); + const otherNode = source === childId ? target : source; + + if (attrs.childIds.includes(otherNode)) continue; + + if (!aggregatedCooccurrences.has(otherNode)) { + aggregatedCooccurrences.set(otherNode, { + count: 0, + urls: new Set(), + }); + } + const agg = aggregatedCooccurrences.get(otherNode)!; + agg.count += edgeAttrs.count || 0; + for (const url of edgeAttrs.urls || []) { + agg.urls.add(url); + } + } + } + + for (const [otherNode, agg] of aggregatedCooccurrences) { + const hierarchicalEdgeKey = this.getEdgeKey(topicId, otherNode); + if (!this.hierarchicalGraph.hasEdge(hierarchicalEdgeKey)) { + this.hierarchicalGraph.addEdge(topicId, otherNode, { + type: "cooccurrence", + count: agg.count, + urls: Array.from(agg.urls), + strength: this.calculateStrength(agg.count), + }); + } + } + } + + private async recomputeMetrics(options: { + affectedOnly?: boolean; + }): Promise { + if (options.affectedOnly && this.changedTopics.size > 0) { + debug( + `Recomputing metrics for ${this.changedTopics.size} affected topics`, + ); + + const affectedSubgraph = this.extractAffectedSubgraph(2); + + const { topicMetrics } = + this.metricsCalculator.calculateMetrics(affectedSubgraph); + + if (!this.cachedMetrics) { + this.cachedMetrics = new Map(); + } + + for (const [topicId, metrics] of topicMetrics) { + this.cachedMetrics.set(topicId, metrics); + } + + return true; + } + + return false; + } + + private extractAffectedSubgraph(hops: number): Graph { + const affectedNodes = new Set(); + + const queue: Array<{ node: string; depth: number }> = []; + for (const node of this.changedTopics) { + queue.push({ node, depth: 0 }); + affectedNodes.add(node); + } + + while (queue.length > 0) { + const { node, depth } = queue.shift()!; + if (depth >= hops) continue; + + for (const neighbor of this.hierarchicalGraph.neighbors(node)) { + if (!affectedNodes.has(neighbor)) { + affectedNodes.add(neighbor); + queue.push({ node: neighbor, depth: depth + 1 }); + } + } + } + + const subgraph = new Graph({ type: "directed" }); + for (const node of affectedNodes) { + if (this.hierarchicalGraph.hasNode(node)) { + subgraph.addNode( + node, + this.hierarchicalGraph.getNodeAttributes(node), + ); + } + } + + for (const edge of this.hierarchicalGraph.edges()) { + const source = this.hierarchicalGraph.source(edge); + const target = this.hierarchicalGraph.target(edge); + if (affectedNodes.has(source) && affectedNodes.has(target)) { + subgraph.addEdge( + source, + target, + this.hierarchicalGraph.getEdgeAttributes(edge), + ); + } + } + + debug( + `Extracted affected subgraph: ${subgraph.order} nodes, ${subgraph.size} edges`, + ); + return subgraph; + } + + private setupEventListeners(): void { + this.hierarchicalGraph.on("nodeAdded", () => { + this.cachedMetrics = null; + }); + + this.hierarchicalGraph.on("edgeAdded", () => { + this.cachedMetrics = null; + }); + + this.hierarchicalGraph.on("edgeDropped", () => { + this.cachedMetrics = null; + }); + } + + private findTopicsBySourceName( + topics: HierarchicalTopicRecord[], + sourceName: string, + ): HierarchicalTopicRecord[] { + return topics.filter((topic) => { + const sourceTopicNames = topic.sourceTopicNames + ? JSON.parse(topic.sourceTopicNames) + : []; + return sourceTopicNames.includes(sourceName); + }); + } + + private getEdgeKey(nodeA: string, nodeB: string): string { + return nodeA < nodeB ? `${nodeA}|${nodeB}` : `${nodeB}|${nodeA}`; + } + + private calculateStrength(count: number): number { + return Math.min(1.0, Math.log(count + 1) / Math.log(10)); + } + + public getCachedMetrics(): Map | null { + return this.cachedMetrics; + } + + public getGraphs(): { flatGraph: Graph; hierarchicalGraph: Graph } { + return { + flatGraph: this.flatGraph, + hierarchicalGraph: this.hierarchicalGraph, + }; + } +} diff --git a/ts/packages/memory/website/src/graph/metricsCalculator.ts b/ts/packages/memory/website/src/graph/metricsCalculator.ts new file mode 100644 index 000000000..7fdfae9d6 --- /dev/null +++ b/ts/packages/memory/website/src/graph/metricsCalculator.ts @@ -0,0 +1,181 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +import { createRequire } from "module"; +import registerDebug from "debug"; +import type { TopicMetrics } from "../tables.js"; + +const require = createRequire(import.meta.url); +const Graph = require("graphology"); +const betweennessCentrality = require("graphology-metrics/centrality/betweenness"); +const { degreeCentrality } = require("graphology-metrics/centrality/degree"); +const louvain = require("graphology-communities-louvain"); + +const debug = registerDebug("typeagent:website:graph:metrics"); + +type Graph = any; + +export interface MetricsResult { + topicMetrics: Map; + communities: Map; +} + +export class MetricsCalculator { + public calculateMetrics( + hierarchicalGraph: Graph, + topicCounts?: Map, + ): MetricsResult { + debug(`Calculating metrics for ${hierarchicalGraph.order} topics`); + + const undirectedGraph = this.createUndirectedCooccurrenceGraph( + hierarchicalGraph, + ); + + debug("Running betweenness centrality..."); + const betweennessScores = betweennessCentrality(undirectedGraph); + + debug("Running degree centrality..."); + const degreeScores = degreeCentrality(undirectedGraph); + + debug("Running community detection (Louvain)..."); + const communities = new Map(); + louvain.assign(undirectedGraph); + for (const node of undirectedGraph.nodes()) { + const community = undirectedGraph.getNodeAttribute(node, "community"); + communities.set(node, community); + } + + debug("Computing topic metrics..."); + const topicMetrics = new Map(); + const now = new Date().toISOString(); + + for (const topicId of hierarchicalGraph.nodes()) { + const attrs = hierarchicalGraph.getNodeAttributes(topicId); + const counts = topicCounts?.get(topicId) || { + documentCount: 0, + domainCount: 0, + }; + + const cooccurrenceEdges = hierarchicalGraph + .edges(topicId) + .filter((edge: string) => { + const edgeAttrs = hierarchicalGraph.getEdgeAttributes(edge); + return edgeAttrs.type === "cooccurrence"; + }); + + const strongRelationships = cooccurrenceEdges.filter( + (edge: string) => { + const edgeAttrs = hierarchicalGraph.getEdgeAttributes(edge); + return (edgeAttrs.strength || 0) >= 0.7; + }, + ); + + const metrics: TopicMetrics = { + topicId, + topicName: attrs.topicName, + documentCount: counts.documentCount, + domainCount: counts.domainCount, + degreeCentrality: undirectedGraph.hasNode(topicId) + ? degreeScores[topicId] || 0 + : 0, + betweennessCentrality: undirectedGraph.hasNode(topicId) + ? betweennessScores[topicId] || 0 + : 0, + activityPeriod: 0, + avgConfidence: attrs.confidence || 0, + maxConfidence: attrs.confidence || 0, + totalRelationships: cooccurrenceEdges.length, + strongRelationships: strongRelationships.length, + entityCount: 0, + updated: now, + }; + + topicMetrics.set(topicId, metrics); + } + + debug( + `Calculated metrics for ${topicMetrics.size} topics, ${communities.size} community assignments`, + ); + + return { topicMetrics, communities }; + } + + private createUndirectedCooccurrenceGraph( + hierarchicalGraph: Graph, + ): Graph { + const undirectedGraph = new Graph({ type: "undirected" }); + + for (const node of hierarchicalGraph.nodes()) { + const attrs = hierarchicalGraph.getNodeAttributes(node); + undirectedGraph.addNode(node, { + topicName: attrs.topicName, + level: attrs.level, + }); + } + + for (const edge of hierarchicalGraph.edges()) { + const attrs = hierarchicalGraph.getEdgeAttributes(edge); + if (attrs.type !== "cooccurrence") continue; + + const source = hierarchicalGraph.source(edge); + const target = hierarchicalGraph.target(edge); + + if ( + !undirectedGraph.hasNode(source) || + !undirectedGraph.hasNode(target) + ) { + continue; + } + + if (!undirectedGraph.hasEdge(source, target)) { + undirectedGraph.addEdge(source, target, { + weight: attrs.strength || 0.5, + }); + } + } + + debug( + `Created undirected graph: ${undirectedGraph.order} nodes, ${undirectedGraph.size} edges`, + ); + + return undirectedGraph; + } + + public calculateTopicCounts( + hierarchicalTopics: Array<{ + topicId: string; + url: string; + domain: string; + }>, + ): Map { + const counts = new Map< + string, + { documents: Set; domains: Set } + >(); + + for (const topic of hierarchicalTopics) { + if (!counts.has(topic.topicId)) { + counts.set(topic.topicId, { + documents: new Set(), + domains: new Set(), + }); + } + const count = counts.get(topic.topicId)!; + count.documents.add(topic.url); + count.domains.add(topic.domain); + } + + const result = new Map< + string, + { documentCount: number; domainCount: number } + >(); + for (const [topicId, count] of counts) { + result.set(topicId, { + documentCount: count.documents.size, + domainCount: count.domains.size, + }); + } + + return result; + } +} diff --git a/ts/packages/memory/website/src/graph/topicGraphBuilder.ts b/ts/packages/memory/website/src/graph/topicGraphBuilder.ts new file mode 100644 index 000000000..dc22d4632 --- /dev/null +++ b/ts/packages/memory/website/src/graph/topicGraphBuilder.ts @@ -0,0 +1,382 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +import { createRequire } from "module"; +import registerDebug from "debug"; +import type { HierarchicalTopicRecord, TopicRelationship } from "../tables.js"; + +const require = createRequire(import.meta.url); +const Graph = require("graphology"); + +const debug = registerDebug("typeagent:website:graph:builder"); + +type Graph = any; + +export interface CooccurrenceData { + fromTopic: string; + toTopic: string; + count: number; + urls: string[]; +} + +export interface TopicGraphs { + flatGraph: Graph; + hierarchicalGraph: Graph; +} + +export class TopicGraphBuilder { + private flatGraph: Graph; + private hierarchicalGraph: Graph; + + constructor() { + this.flatGraph = new Graph({ type: "undirected" }); + this.hierarchicalGraph = new Graph({ type: "directed" }); + } + + public buildFromTopicHierarchy( + hierarchicalTopics: HierarchicalTopicRecord[], + cooccurrences: CooccurrenceData[], + ): TopicGraphs { + debug( + `Building topic graphs: ${hierarchicalTopics.length} topics, ${cooccurrences.length} cooccurrences`, + ); + + this.buildFlatGraph(hierarchicalTopics, cooccurrences); + this.buildHierarchicalGraph(hierarchicalTopics); + this.aggregateCooccurrencesBottomUp(); + + debug( + `Graphs built: flat=${this.flatGraph.order} nodes, ${this.flatGraph.size} edges; hierarchical=${this.hierarchicalGraph.order} nodes, ${this.hierarchicalGraph.size} edges`, + ); + + return { + flatGraph: this.flatGraph, + hierarchicalGraph: this.hierarchicalGraph, + }; + } + + private buildFlatGraph( + hierarchicalTopics: HierarchicalTopicRecord[], + cooccurrences: CooccurrenceData[], + ): void { + debug("Building flat graph with ground truth cooccurrences"); + + const leafTopics = hierarchicalTopics.filter((topic) => { + const sourceTopicNames = topic.sourceTopicNames + ? JSON.parse(topic.sourceTopicNames) + : []; + return sourceTopicNames.length > 0; + }); + + for (const topic of leafTopics) { + if (!this.flatGraph.hasNode(topic.topicId)) { + this.flatGraph.addNode(topic.topicId, { + topicName: topic.topicName, + level: topic.level, + confidence: topic.confidence, + sourceTopicNames: topic.sourceTopicNames + ? JSON.parse(topic.sourceTopicNames) + : [], + isLeaf: true, + }); + } + } + + for (const cooccur of cooccurrences) { + const fromTopics = this.findTopicsBySourceName( + leafTopics, + cooccur.fromTopic, + ); + const toTopics = this.findTopicsBySourceName( + leafTopics, + cooccur.toTopic, + ); + + for (const fromTopic of fromTopics) { + for (const toTopic of toTopics) { + if (fromTopic.topicId === toTopic.topicId) continue; + + if (this.flatGraph.hasEdge(fromTopic.topicId, toTopic.topicId)) { + const current = this.flatGraph.getEdgeAttributes(fromTopic.topicId, toTopic.topicId); + this.flatGraph.setEdgeAttribute( + fromTopic.topicId, + toTopic.topicId, + "count", + current.count + cooccur.count, + ); + this.flatGraph.setEdgeAttribute( + fromTopic.topicId, + toTopic.topicId, + "urls", + [...current.urls, ...cooccur.urls], + ); + } else { + this.flatGraph.addEdge( + fromTopic.topicId, + toTopic.topicId, + { + count: cooccur.count, + urls: cooccur.urls, + strength: this.calculateStrength(cooccur.count), + }, + ); + } + } + } + } + + debug( + `Flat graph: ${this.flatGraph.order} nodes, ${this.flatGraph.size} edges`, + ); + } + + private buildHierarchicalGraph( + hierarchicalTopics: HierarchicalTopicRecord[], + ): void { + debug("Building hierarchical graph structure"); + + for (const topic of hierarchicalTopics) { + if (!this.hierarchicalGraph.hasNode(topic.topicId)) { + this.hierarchicalGraph.addNode(topic.topicId, { + topicName: topic.topicName, + level: topic.level, + confidence: topic.confidence, + sourceTopicNames: topic.sourceTopicNames + ? JSON.parse(topic.sourceTopicNames) + : [], + parentTopicId: topic.parentTopicId, + childIds: [], + }); + } + } + + for (const topic of hierarchicalTopics) { + if (topic.parentTopicId) { + if (this.hierarchicalGraph.hasNode(topic.parentTopicId)) { + this.hierarchicalGraph.addDirectedEdge( + topic.parentTopicId, + topic.topicId, + { type: "parent-child" }, + ); + + const parentAttrs = this.hierarchicalGraph.getNodeAttributes( + topic.parentTopicId, + ); + parentAttrs.childIds.push(topic.topicId); + } + } + } + + debug( + `Hierarchical graph: ${this.hierarchicalGraph.order} nodes, ${this.hierarchicalGraph.size} structural edges`, + ); + } + + private aggregateCooccurrencesBottomUp(): void { + debug("Aggregating cooccurrences bottom-up through hierarchy"); + + const nodesByLevel = new Map(); + for (const node of this.hierarchicalGraph.nodes()) { + const level = this.hierarchicalGraph.getNodeAttribute(node, "level"); + if (!nodesByLevel.has(level)) { + nodesByLevel.set(level, []); + } + nodesByLevel.get(level)!.push(node); + } + + const maxLevel = Math.max(...Array.from(nodesByLevel.keys())); + + for (let level = maxLevel; level >= 0; level--) { + const nodesAtLevel = nodesByLevel.get(level) || []; + + for (const topicId of nodesAtLevel) { + const attrs = this.hierarchicalGraph.getNodeAttributes(topicId); + + if (attrs.childIds.length === 0) { + if (this.flatGraph.hasNode(topicId)) { + for (const edge of this.flatGraph.edges(topicId)) { + const source = this.flatGraph.source(edge); + const target = this.flatGraph.target(edge); + const edgeAttrs = this.flatGraph.getEdgeAttributes(edge); + const otherNode = source === topicId ? target : source; + + if (!this.hierarchicalGraph.hasNode(otherNode)) { + continue; + } + + const hierarchicalEdgeKey = this.getEdgeKey( + topicId, + otherNode, + ); + if ( + !this.hierarchicalGraph.hasEdge(hierarchicalEdgeKey) + ) { + this.hierarchicalGraph.addEdge( + topicId, + otherNode, + { + type: "cooccurrence", + count: edgeAttrs.count, + urls: edgeAttrs.urls, + strength: edgeAttrs.strength, + }, + ); + } + } + } + } else { + const aggregatedCooccurrences = new Map< + string, + { + count: number; + urls: Set; + } + >(); + + for (const childId of attrs.childIds) { + for (const edge of this.hierarchicalGraph.edges(childId)) { + const edgeAttrs = + this.hierarchicalGraph.getEdgeAttributes(edge); + if (edgeAttrs.type !== "cooccurrence") continue; + + const source = this.hierarchicalGraph.source(edge); + const target = this.hierarchicalGraph.target(edge); + const otherNode = + source === childId ? target : source; + + if (attrs.childIds.includes(otherNode)) { + continue; + } + + const ancestorId = this.findCommonAncestor( + topicId, + otherNode, + ); + if (ancestorId && ancestorId !== topicId) { + continue; + } + + if (!aggregatedCooccurrences.has(otherNode)) { + aggregatedCooccurrences.set(otherNode, { + count: 0, + urls: new Set(), + }); + } + const agg = aggregatedCooccurrences.get(otherNode)!; + agg.count += edgeAttrs.count || 0; + for (const url of edgeAttrs.urls || []) { + agg.urls.add(url); + } + } + } + + for (const [otherNode, agg] of aggregatedCooccurrences) { + const hierarchicalEdgeKey = this.getEdgeKey( + topicId, + otherNode, + ); + if (!this.hierarchicalGraph.hasEdge(hierarchicalEdgeKey)) { + this.hierarchicalGraph.addEdge(topicId, otherNode, { + type: "cooccurrence", + count: agg.count, + urls: Array.from(agg.urls), + strength: this.calculateStrength(agg.count), + }); + } + } + } + } + } + + debug( + `Hierarchical graph after aggregation: ${this.hierarchicalGraph.order} nodes, ${this.hierarchicalGraph.size} total edges`, + ); + } + + private findTopicsBySourceName( + topics: HierarchicalTopicRecord[], + sourceName: string, + ): HierarchicalTopicRecord[] { + return topics.filter((topic) => { + const sourceTopicNames = topic.sourceTopicNames + ? JSON.parse(topic.sourceTopicNames) + : []; + return sourceTopicNames.includes(sourceName); + }); + } + + private findCommonAncestor(nodeA: string, nodeB: string): string | null { + const ancestorsA = new Set(); + let current: string | null = nodeA; + + while (current) { + ancestorsA.add(current); + const attrs: any = this.hierarchicalGraph.getNodeAttributes(current); + current = attrs.parentTopicId || null; + } + + current = nodeB; + while (current) { + if (ancestorsA.has(current)) { + return current; + } + const attrs: any = this.hierarchicalGraph.getNodeAttributes(current); + current = attrs.parentTopicId || null; + } + + return null; + } + + private getEdgeKey(nodeA: string, nodeB: string): string { + return nodeA < nodeB ? `${nodeA}|${nodeB}` : `${nodeB}|${nodeA}`; + } + + private calculateStrength(count: number): number { + return Math.min(1.0, Math.log(count + 1) / Math.log(10)); + } + + public exportToTopicRelationships(): TopicRelationship[] { + const relationships: TopicRelationship[] = []; + const now = new Date().toISOString(); + + for (const edge of this.hierarchicalGraph.edges()) { + const attrs = this.hierarchicalGraph.getEdgeAttributes(edge); + if (attrs.type !== "cooccurrence") continue; + + const source = this.hierarchicalGraph.source(edge); + const target = this.hierarchicalGraph.target(edge); + const sourceName = this.hierarchicalGraph.getNodeAttribute( + source, + "topicName", + ); + const targetName = this.hierarchicalGraph.getNodeAttribute( + target, + "topicName", + ); + + relationships.push({ + fromTopic: source, + toTopic: target, + relationshipType: "cooccurrence", + strength: attrs.strength || 0, + metadata: JSON.stringify({ + fromTopicName: sourceName, + toTopicName: targetName, + }), + sourceUrls: JSON.stringify(attrs.urls || []), + cooccurrenceCount: attrs.count || 0, + updated: now, + }); + } + + debug(`Exported ${relationships.length} topic relationships`); + return relationships; + } + + public getGraphs(): TopicGraphs { + return { + flatGraph: this.flatGraph, + hierarchicalGraph: this.hierarchicalGraph, + }; + } +} diff --git a/ts/packages/memory/website/src/websiteCollection.ts b/ts/packages/memory/website/src/websiteCollection.ts index 2a98f49e2..f53632294 100644 --- a/ts/packages/memory/website/src/websiteCollection.ts +++ b/ts/packages/memory/website/src/websiteCollection.ts @@ -129,6 +129,7 @@ export class WebsiteCollection private db: sqlite.Database | undefined = undefined; private dbPath: string = ""; + private graphStateManager: any = null; constructor( nameTag: string = "", @@ -1666,18 +1667,20 @@ export class WebsiteCollection `[Knowledge Graph] Built hierarchical topics in ${Date.now() - topicStartTime}ms`, ); - // Build topic relationships and metrics using batch operations - const topicRelationshipsStart = Date.now(); - await this.buildTopicRelationships(cacheManager); - debug( - `[Knowledge Graph] Completed topic relationships in ${Date.now() - topicRelationshipsStart}ms`, + // Build topic relationships and metrics using Graphology-based graph builder + const topicGraphStart = Date.now(); + const { buildTopicGraphWithGraphology } = await import( + "./buildTopicGraphWithGraphology.js" + ); + const allHierarchicalTopics = this.hierarchicalTopics?.getTopicHierarchy() || []; + await buildTopicGraphWithGraphology( + allHierarchicalTopics, + cacheManager, + this.topicRelationships, + this.topicMetrics, ); - - // Calculate topic metrics using batch operations - const topicMetricsStart = Date.now(); - await this.calculateTopicMetrics(cacheManager); debug( - `[Knowledge Graph] Completed topic metrics in ${Date.now() - topicMetricsStart}ms`, + `[Knowledge Graph] Completed topic graph build in ${Date.now() - topicGraphStart}ms`, ); const totalTime = Date.now() - startTime; @@ -1707,6 +1710,98 @@ export class WebsiteCollection // Update hierarchical topics with new website topics await this.updateHierarchicalTopics(newWebsites); + + // Update topic graph incrementally + await this.updateTopicGraphIncremental(newWebsites); + } + + private async updateTopicGraphIncremental(newWebsites: Website[]): Promise { + debug(`[Knowledge Graph] Updating topic graph incrementally for ${newWebsites.length} websites`); + + if (!this.graphStateManager) { + const { GraphStateManager } = await import("./graph/graphStateManager.js"); + this.graphStateManager = new GraphStateManager(); + } + + const allHierarchicalTopics = this.hierarchicalTopics?.getTopicHierarchy() || []; + + const { GraphBuildingCacheManager } = await import( + "./utils/graphBuildingCacheManager.mjs" + ); + const cacheManager = new GraphBuildingCacheManager(); + const websites = this.getWebsites(); + await cacheManager.initializeCache(websites); + + const cooccurrences = cacheManager.getAllTopicRelationships().map((rel: any) => ({ + fromTopic: rel.fromTopic, + toTopic: rel.toTopic, + count: rel.count, + urls: rel.sources || [], + })); + + await this.graphStateManager.ensureGraphsInitialized( + allHierarchicalTopics, + cooccurrences, + ); + + for (const website of newWebsites) { + const knowledge = website.knowledge as any; + if (!knowledge?.topicHierarchy) continue; + + const topicMap = knowledge.topicHierarchy.topicMap instanceof Map + ? knowledge.topicHierarchy.topicMap + : new Map(Object.entries(knowledge.topicHierarchy.topicMap || {})); + + const hierarchicalTopics: any[] = []; + for (const [topicId, topic] of topicMap) { + hierarchicalTopics.push({ + url: website.metadata.url, + domain: website.metadata.domain, + topicId: topicId, + topicName: (topic as any).name, + level: (topic as any).level || 0, + parentTopicId: (topic as any).parentId, + confidence: (topic as any).confidence || 0.5, + sourceTopicNames: JSON.stringify((topic as any).sourceTopicNames || []), + extractionDate: new Date().toISOString(), + }); + } + + const websiteCooccurrences: any[] = []; + + const result = await this.graphStateManager.addWebpage({ + url: website.metadata.url, + domain: website.metadata.domain, + hierarchicalTopics, + cooccurrences: websiteCooccurrences, + }); + + debug( + `[Knowledge Graph] Added ${website.metadata.url}: ${result.addedTopics} topics, ${result.addedRelationships} relationships in ${result.durationMs}ms`, + ); + } + + const relationships = this.graphStateManager.exportRelationships(); + for (const rel of relationships) { + this.topicRelationships?.upsertRelationship(rel); + } + + const metricsCalculator = await import("./graph/metricsCalculator.js"); + const calc = new metricsCalculator.MetricsCalculator(); + const topicCounts = calc.calculateTopicCounts( + allHierarchicalTopics.map((t: any) => ({ + topicId: t.topicId, + url: t.url, + domain: t.domain, + })), + ); + + const { topicMetrics } = await this.graphStateManager.recomputeMetrics(topicCounts); + for (const [, metrics] of topicMetrics) { + this.topicMetrics?.upsertMetrics(metrics); + } + + debug(`[Knowledge Graph] Incremental update complete`); } /** @@ -3377,480 +3472,6 @@ Determine the appropriate relationship action based on the PairwiseTopicRelation debug(`[Knowledge Graph] Finished storing ${storedCount} communities`); } - /** - * Build topic relationships using batch operations - */ - private async buildTopicRelationships(cacheManager: any): Promise { - debug( - `[Knowledge Graph] Building topic relationships using batch approach`, - ); - - // Get all topic relationships from cache manager (pre-computed co-occurrences) - const cachedRelationships = cacheManager.getAllTopicRelationships(); - debug( - `[Knowledge Graph] Found ${cachedRelationships.length} cached topic relationships`, - ); - - const now = new Date().toISOString(); - - for (const cachedRel of cachedRelationships) { - if (cachedRel.count < 2) continue; // Filter weak relationships - - const strength = Math.min(cachedRel.count / 10, 1.0); - - // Use hierarchical topics to get topic IDs - const fromTopicId = this.findTopicIdByNameInHierarchy( - cachedRel.fromTopic, - ); - const toTopicId = this.findTopicIdByNameInHierarchy( - cachedRel.toTopic, - ); - - if (fromTopicId && toTopicId) { - this.topicRelationships?.upsertRelationship({ - fromTopic: fromTopicId, - toTopic: toTopicId, - relationshipType: "co_occurs", - strength, - sourceUrls: JSON.stringify(cachedRel.sources), - cooccurrenceCount: cachedRel.count, - firstSeen: now, - lastSeen: now, - updated: now, - }); - } - } - - debug(`[Knowledge Graph] Completed topic relationships`); - } - - /** - * Calculate topic metrics using batch operations - */ - private async calculateTopicMetrics(cacheManager: any): Promise { - debug( - `[Knowledge Graph] Calculating topic metrics using batch approach`, - ); - - const allTopics = this.hierarchicalTopics?.getTopicHierarchy(); - if (!allTopics || allTopics.length === 0) return; - - const uniqueTopics = new Set(allTopics.map((t) => t.topicId)); - const topicIds = Array.from(uniqueTopics); - - // MAJOR OPTIMIZATION: Use batch queries instead of individual queries - // Get all relationships for all topics in one query - const startTime = performance.now(); - const allRelationships = - this.topicRelationships?.getRelationshipsForTopics?.(topicIds) || - []; - const relationshipQueryTime = performance.now() - startTime; - debug( - `[Knowledge Graph] Batch fetched ${allRelationships.length} topic relationships in ${relationshipQueryTime.toFixed(2)}ms`, - ); - - // Get all entity relations for all topics in one query - const entityStartTime = performance.now(); - const allEntityRelations = - this.topicEntityRelations?.getEntitiesForTopics?.(topicIds) || []; - const entityQueryTime = performance.now() - entityStartTime; - debug( - `[Knowledge Graph] Batch fetched ${allEntityRelations.length} topic-entity relations in ${entityQueryTime.toFixed(2)}ms`, - ); - - // Group results by topic ID for efficient lookup - const relationshipsByTopic = new Map(); - const entityRelationsByTopic = new Map(); - - for (const rel of allRelationships) { - if (!relationshipsByTopic.has(rel.fromTopic)) { - relationshipsByTopic.set(rel.fromTopic, []); - } - if (!relationshipsByTopic.has(rel.toTopic)) { - relationshipsByTopic.set(rel.toTopic, []); - } - relationshipsByTopic.get(rel.fromTopic)!.push(rel); - relationshipsByTopic.get(rel.toTopic)!.push(rel); - } - - for (const rel of allEntityRelations) { - if (!entityRelationsByTopic.has(rel.topicId)) { - entityRelationsByTopic.set(rel.topicId, []); - } - entityRelationsByTopic.get(rel.topicId)!.push(rel); - } - - // Calculate co-occurrence relationships - debug(`[Knowledge Graph] Calculating co-occurrence relationships`); - const cooccurrenceRels = this.calculateCooccurrenceRelationships( - topicIds, - cacheManager, - ); - debug( - `[Knowledge Graph] Found ${cooccurrenceRels.length} co-occurrence relationships`, - ); - - // Calculate entity-mediated relationships - debug(`[Knowledge Graph] Calculating entity-mediated relationships`); - const entityRels = await this.calculateEntityMediatedRelationships( - topicIds, - entityRelationsByTopic, - ); - debug( - `[Knowledge Graph] Found ${entityRels.length} entity-mediated relationships`, - ); - - // Store all relationships - for (const rel of [...cooccurrenceRels, ...entityRels]) { - this.topicRelationships?.upsertRelationship(rel); - } - - // Calculate metrics for each topic using pre-fetched data - for (const topicId of uniqueTopics) { - const topic = this.hierarchicalTopics?.getTopicById(topicId); - if (!topic) continue; - - // Get data from cache manager - const documentsWithTopic = cacheManager.getWebsitesForTopic( - topic.topicName, - ); - const domains = new Set( - documentsWithTopic.map((url: string) => { - // Extract domain from URL or use a default approach - try { - return new URL(url).hostname; - } catch { - return url.split("/")[0]; - } - }), - ).size; - - // Use pre-fetched data instead of individual queries - const relationships = relationshipsByTopic.get(topicId) || []; - const strongRelationships = relationships.filter( - (r) => r.strength > 0.7, - ).length; - const entityRelations = entityRelationsByTopic.get(topicId) || []; - - const topEntities = entityRelations - .sort((a, b) => b.relevance - a.relevance) - .slice(0, 10) - .map((r) => r.entityName); - - const metricsData = { - topicId, - topicName: topic.topicName, - documentCount: documentsWithTopic.length, - domainCount: domains, - degreeCentrality: relationships.length, - betweennessCentrality: 0, // Could be calculated using graph algorithms if needed - activityPeriod: 0, // Simplified for now - avgConfidence: topic.confidence, - maxConfidence: topic.confidence, - totalRelationships: relationships.length, - strongRelationships, - entityCount: entityRelations.length, - topEntities: JSON.stringify(topEntities), - updated: new Date().toISOString(), - }; - - this.topicMetrics?.upsertMetrics(metricsData); - } - - debug( - `[Knowledge Graph] Completed topic metrics calculation for ${topicIds.length} topics`, - ); - } - - /** - * Helper method to find topic ID by name in hierarchical topics - */ - private findTopicIdByNameInHierarchy(topicName: string): string | null { - const allTopics = this.hierarchicalTopics?.getTopicHierarchy() || []; - const topic = allTopics.find((t) => t.topicName === topicName); - return topic?.topicId || null; - } - - /** - * Calculate co-occurrence relationships using bottom-up hierarchical aggregation - * - * Algorithm: - * 1. Leaf topics: Get co-occurrences from GraphBuildingCache using sourceTopicNames - * 2. Parent topics: Aggregate from direct children (not from leaves) - * 3. Use intermediate cache to avoid re-computation - */ - private calculateCooccurrenceRelationships( - topicIds: string[], - cacheManager: any, - ): any[] { - const relationships: any[] = []; - - // Working cache: hierarchicalTopicId → (otherTopicId → {count, strength, sources}) - const hierarchicalCooccurrenceCache = new Map>(); - - // Group topics by level for bottom-up processing - const topicsByLevel = new Map(); - for (const topicId of topicIds) { - const topic = this.hierarchicalTopics?.getTopicById(topicId); - if (!topic) continue; - - if (!topicsByLevel.has(topic.level)) { - topicsByLevel.set(topic.level, []); - } - topicsByLevel.get(topic.level)!.push(topic); - } - - // Process level by level, starting from leaves (highest level number) - const levels = Array.from(topicsByLevel.keys()).sort((a, b) => b - a); - - for (const level of levels) { - const topicsAtLevel = topicsByLevel.get(level)!; - - for (const topic of topicsAtLevel) { - const topicCooccurrences = new Map(); - - if (topic.childIds && topic.childIds.length > 0) { - // Parent topic: aggregate from direct children - for (const childId of topic.childIds) { - const childCooccurrences = hierarchicalCooccurrenceCache.get(childId); - if (!childCooccurrences) continue; - - for (const [otherTopicId, cooccurData] of childCooccurrences) { - if (!topicCooccurrences.has(otherTopicId)) { - topicCooccurrences.set(otherTopicId, { - count: 0, - sources: new Set(), - combinations: 0, - }); - } - const existing = topicCooccurrences.get(otherTopicId)!; - existing.count += cooccurData.count; - cooccurData.sources?.forEach((src: string) => existing.sources.add(src)); - existing.combinations += 1; - } - } - } else { - // Leaf topic: get from GraphBuildingCache using sourceTopicNames - const sourceNames = topic.sourceTopicNames || [topic.topicName]; - - for (const sourceName of sourceNames) { - // Get all co-occurrences for this source topic name - const allCooccurrences = cacheManager.getAllTopicRelationships(); - - for (const cooccur of allCooccurrences) { - if (cooccur.fromTopic !== sourceName && cooccur.toTopic !== sourceName) { - continue; - } - - // Find the other topic in the co-occurrence - const otherTopicName = cooccur.fromTopic === sourceName - ? cooccur.toTopic - : cooccur.fromTopic; - - // Map knowledge topic name to hierarchical topic ID - const otherHierarchicalTopic = this.findHierarchicalTopicBySourceName( - otherTopicName, - topicIds, - ); - - if (!otherHierarchicalTopic) continue; - - if (!topicCooccurrences.has(otherHierarchicalTopic.topicId)) { - topicCooccurrences.set(otherHierarchicalTopic.topicId, { - count: 0, - sources: new Set(), - combinations: 0, - }); - } - const existing = topicCooccurrences.get(otherHierarchicalTopic.topicId)!; - existing.count += cooccur.count; - cooccur.sources?.forEach((src: string) => existing.sources.add(src)); - existing.combinations += 1; - } - } - } - - // Store in cache for parent nodes to use - hierarchicalCooccurrenceCache.set(topic.topicId, topicCooccurrences); - } - } - - // Convert cache to relationships - for (const [fromTopicId, cooccurrences] of hierarchicalCooccurrenceCache) { - const fromTopic = this.hierarchicalTopics?.getTopicById(fromTopicId); - if (!fromTopic) continue; - - for (const [toTopicId, cooccurData] of cooccurrences) { - if (fromTopicId >= toTopicId) continue; // Avoid duplicates - - const toTopic = this.hierarchicalTopics?.getTopicById(toTopicId); - if (!toTopic) continue; - - // Calculate aggregate strength - const avgCount = cooccurData.count / (cooccurData.combinations || 1); - const sourceArray = Array.from(cooccurData.sources); - - // Normalize by document coverage - const strength = Math.min( - avgCount / Math.min( - sourceArray.length || 1, - 10, // cap for reasonable normalization - ), - 1.0 - ); - - if (strength < 0.1) continue; // Filter weak relationships - - relationships.push({ - fromTopic: fromTopicId, - toTopic: toTopicId, - relationshipType: "CO_OCCURS", - strength, - metadata: JSON.stringify({ - cooccurrenceCount: cooccurData.count, - commonDocuments: sourceArray.length, - aggregatedFrom: cooccurData.combinations, - }), - sourceUrls: JSON.stringify(sourceArray.slice(0, 10)), - cooccurrenceCount: cooccurData.count, - firstSeen: fromTopic.extractionDate || new Date().toISOString(), - lastSeen: toTopic.extractionDate || new Date().toISOString(), - updated: new Date().toISOString(), - }); - } - } - - return relationships; - } - - /** - * Find hierarchical topic that has the given source topic name - */ - private findHierarchicalTopicBySourceName( - sourceName: string, - topicIds: string[], - ): any | null { - for (const topicId of topicIds) { - const topic = this.hierarchicalTopics?.getTopicById(topicId); - if (!topic) continue; - - const sourceNames = topic.sourceTopicNames || [topic.topicName]; - if (sourceNames.includes(sourceName)) { - return topic; - } - } - return null; - } - - /** - * Calculate entity-mediated relationships using bottom-up hierarchical aggregation - * - * Algorithm: - * 1. Leaf topics: Get entities from entityRelationsByTopic - * 2. Parent topics: Aggregate entities from direct children (union of entity sets) - * 3. Calculate pairwise entity overlap using aggregated entity sets - */ - private async calculateEntityMediatedRelationships( - topicIds: string[], - entityRelationsByTopic: Map, - ): Promise { - const relationships: any[] = []; - - // Working cache: hierarchicalTopicId → Set - const hierarchicalEntityCache = new Map>(); - - // Group topics by level for bottom-up processing - const topicsByLevel = new Map(); - for (const topicId of topicIds) { - const topic = this.hierarchicalTopics?.getTopicById(topicId); - if (!topic) continue; - - if (!topicsByLevel.has(topic.level)) { - topicsByLevel.set(topic.level, []); - } - topicsByLevel.get(topic.level)!.push(topic); - } - - // Process level by level, starting from leaves (highest level number) - const levels = Array.from(topicsByLevel.keys()).sort((a, b) => b - a); - - for (const level of levels) { - const topicsAtLevel = topicsByLevel.get(level)!; - - for (const topic of topicsAtLevel) { - const topicEntities = new Set(); - - if (topic.childIds && topic.childIds.length > 0) { - // Parent topic: aggregate entities from direct children (union) - for (const childId of topic.childIds) { - const childEntities = hierarchicalEntityCache.get(childId); - if (!childEntities) continue; - - for (const entity of childEntities) { - topicEntities.add(entity); - } - } - } else { - // Leaf topic: get entities from entityRelationsByTopic - const entities = entityRelationsByTopic.get(topic.topicId) || []; - for (const entity of entities) { - topicEntities.add(entity.entityName); - } - } - - // Store in cache for parent nodes to use - hierarchicalEntityCache.set(topic.topicId, topicEntities); - } - } - - // Calculate pairwise entity overlap using aggregated entity sets - for (let i = 0; i < topicIds.length; i++) { - for (let j = i + 1; j < topicIds.length; j++) { - const topicA = topicIds[i]; - const topicB = topicIds[j]; - - const entitiesA = hierarchicalEntityCache.get(topicA) || new Set(); - const entitiesB = hierarchicalEntityCache.get(topicB) || new Set(); - - if (entitiesA.size === 0 || entitiesB.size === 0) continue; - - // Calculate shared entities - const shared = Array.from(entitiesA).filter((e) => - entitiesB.has(e), - ); - - if (shared.length === 0) continue; - - // Calculate strength as Jaccard similarity (intersection / union) - const unionSize = new Set([...entitiesA, ...entitiesB]).size; - const strength = shared.length / unionSize; - - // Only create relationship if strength is significant - if (strength < 0.1) continue; - - relationships.push({ - fromTopic: topicA, - toTopic: topicB, - relationshipType: "RELATED_VIA_ENTITY", - strength, - metadata: JSON.stringify({ - sharedEntities: shared.slice(0, 10), - sharedEntityCount: shared.length, - entityOverlapRatio: strength, - totalEntitiesA: entitiesA.size, - totalEntitiesB: entitiesB.size, - }), - updated: new Date().toISOString(), - }); - } - } - - return relationships; - } - - /** - * Calculate sibling relationships from hierarchical structure - */ private calculateSiblingRelationships(topicMap: Map): any[] { const relationships: any[] = []; const parentToChildren = new Map(); diff --git a/ts/pnpm-lock.yaml b/ts/pnpm-lock.yaml index f6bc35faf..1ad25a8d4 100644 --- a/ts/pnpm-lock.yaml +++ b/ts/pnpm-lock.yaml @@ -3263,6 +3263,15 @@ importers: get-folder-size: specifier: ^5.0.0 version: 5.0.0 + graphology: + specifier: ^0.25.4 + version: 0.25.4(graphology-types@0.24.8) + graphology-communities-louvain: + specifier: ^2.0.1 + version: 2.0.2(graphology-types@0.24.8) + graphology-metrics: + specifier: ^2.1.0 + version: 2.4.0(graphology-types@0.24.8) jsdom: specifier: ^26.1.0 version: 26.1.0 @@ -6741,6 +6750,9 @@ packages: '@xtuc/long@4.2.2': resolution: {integrity: sha512-NuHqBY1PB/D8xU6s/thBgOAiAP7HOYDQ32+BFZILJ8ivkUkAHQnWfn6WhL79Owj1qmUnoN/YPhktdIoucipkAQ==} + '@yomguithereal/helpers@1.1.1': + resolution: {integrity: sha512-UYvAq/XCA7xoh1juWDYsq3W0WywOB+pz8cgVnE1b45ZfdMhBvHDrgmSFG3jXeZSr2tMTYLGHFHON+ekG05Jebg==} + abab@2.0.6: resolution: {integrity: sha512-j2afSsaIENvHZN2B8GOpF566vZ5WVk5opAiMTvWgaQT8DkbOqsTfvNAvHoRGU2zzP8cPoqys+xHTRDWW8L+/BA==} deprecated: Use your platform's native atob() and btoa() methods instead @@ -7696,6 +7708,7 @@ packages: crx@5.0.1: resolution: {integrity: sha512-n/PzBx/fR1+xZCiJBats9y5zw/a+YBcoJ0ABnUaY56xb1RpXuFhsiCMpNY6WjVtylLzhUUXSWsbitesVg7v2vg==} engines: {node: '>=10'} + deprecated: Package no longer supported. Contact Support at https://www.npmjs.com/support for more info. hasBin: true css-select@4.3.0: @@ -8900,6 +8913,16 @@ packages: peerDependencies: graphology-types: '>=0.19.0' + graphology-metrics@2.4.0: + resolution: {integrity: sha512-7WOfOP+mFLCaTJx55Qg4eY+211vr1/b3D/R3biz3SXGhAaCVcWYkfabnmO4O4WBNWANEHtVnFrGgJ0kj6MM6xw==} + peerDependencies: + graphology-types: '>=0.20.0' + + graphology-shortest-path@2.1.0: + resolution: {integrity: sha512-KbT9CTkP/u72vGEJzyRr24xFC7usI9Es3LMmCPHGwQ1KTsoZjxwA9lMKxfU0syvT/w+7fZUdB/Hu2wWYcJBm6Q==} + peerDependencies: + graphology-types: '>=0.20.0' + graphology-types@0.24.8: resolution: {integrity: sha512-hDRKYXa8TsoZHjgEaysSRyPdT6uB78Ci8WnjgbStlQysz7xR52PInxNsmnB7IBOM1BhikxkNyCVEFgmPKnpx3Q==} @@ -17583,6 +17606,8 @@ snapshots: '@xtuc/long@4.2.2': {} + '@yomguithereal/helpers@1.1.1': {} + abab@2.0.6: {} abbrev@1.1.1: {} @@ -20260,6 +20285,23 @@ snapshots: graphology-utils: 2.5.2(graphology-types@0.24.8) pandemonium: 2.4.1 + graphology-metrics@2.4.0(graphology-types@0.24.8): + dependencies: + graphology-indices: 0.17.0(graphology-types@0.24.8) + graphology-shortest-path: 2.1.0(graphology-types@0.24.8) + graphology-types: 0.24.8 + graphology-utils: 2.5.2(graphology-types@0.24.8) + mnemonist: 0.39.8 + pandemonium: 2.4.1 + + graphology-shortest-path@2.1.0(graphology-types@0.24.8): + dependencies: + '@yomguithereal/helpers': 1.1.1 + graphology-indices: 0.17.0(graphology-types@0.24.8) + graphology-types: 0.24.8 + graphology-utils: 2.5.2(graphology-types@0.24.8) + mnemonist: 0.39.8 + graphology-types@0.24.8: {} graphology-utils@2.5.2(graphology-types@0.24.8): From 7b9d8c18e051c42fd8bf928a885a78c867a5c38c Mon Sep 17 00:00:00 2001 From: Hillary Mutisya <150286414+hillary-mutisya@users.noreply.github.com> Date: Fri, 31 Oct 2025 06:04:09 -0700 Subject: [PATCH 04/10] Update client styling to only use webgl optimized styles --- .../extension/views/entityGraphVisualizer.ts | 36 +++++++------------ 1 file changed, 13 insertions(+), 23 deletions(-) diff --git a/ts/packages/agents/browser/src/extension/views/entityGraphVisualizer.ts b/ts/packages/agents/browser/src/extension/views/entityGraphVisualizer.ts index 35e3d0fd6..f8b593b7a 100644 --- a/ts/packages/agents/browser/src/extension/views/entityGraphVisualizer.ts +++ b/ts/packages/agents/browser/src/extension/views/entityGraphVisualizer.ts @@ -1303,11 +1303,9 @@ export class EntityGraphVisualizer { style: { "line-color": "#4A90E2", width: "mapData(strength, 0, 1, 2, 5)", - "line-opacity": 1, - "target-arrow-color": "#4A90E2", - "target-arrow-shape": "triangle", - "curve-style": "bezier", - "line-style": "solid", + "line-opacity": 1.0, + "curve-style": "haystack", + "haystack-radius": 0.5, }, }, { @@ -1315,11 +1313,9 @@ export class EntityGraphVisualizer { style: { "line-color": "#7ED321", width: "mapData(strength, 0, 1, 2, 4)", - "line-opacity": 0.8, - "target-arrow-color": "#7ED321", - "target-arrow-shape": "triangle", - "curve-style": "bezier", - "line-style": "dashed", + "line-opacity": 0.6, + "curve-style": "haystack", + "haystack-radius": 0.5, }, }, { @@ -1327,11 +1323,9 @@ export class EntityGraphVisualizer { style: { "line-color": "#BD10E0", width: "mapData(strength, 0, 1, 1, 3)", - "line-opacity": 0.6, - "target-arrow-color": "#BD10E0", - "target-arrow-shape": "triangle", - "curve-style": "bezier", - "line-style": "dotted", + "line-opacity": 0.4, + "curve-style": "haystack", + "haystack-radius": 0.5, }, }, { @@ -1340,10 +1334,8 @@ export class EntityGraphVisualizer { "line-color": "#F5A623", width: "mapData(strength, 0, 1, 2, 4)", "line-opacity": 0.7, - "target-arrow-color": "#F5A623", - "target-arrow-shape": "triangle", - "curve-style": "bezier", - "line-style": "solid", + "curve-style": "haystack", + "haystack-radius": 0.5, }, }, { @@ -1352,10 +1344,8 @@ export class EntityGraphVisualizer { "line-color": "#FF6B9D", width: "mapData(strength, 0, 1, 1, 3)", "line-opacity": 0.5, - "target-arrow-color": "#FF6B9D", - "target-arrow-shape": "triangle", - "curve-style": "bezier", - "line-style": "dashed", + "curve-style": "haystack", + "haystack-radius": 0.5, }, }, // Fallback edge styles by strength (fixed selectors) From 69b2ba9060bf3cd58f04d78136dc4370565dae08 Mon Sep 17 00:00:00 2001 From: Hillary Mutisya <150286414+hillary-mutisya@users.noreply.github.com> Date: Fri, 31 Oct 2025 06:24:08 -0700 Subject: [PATCH 05/10] Remove UI action added for debugging --- .../src/agent/browserActionHandler.mts | 1 - .../agent/knowledge/actions/graphActions.mts | 32 ------------------- .../actions/knowledgeActionRouter.mts | 3 -- .../extension/views/extensionServiceBase.ts | 6 ---- .../src/extension/views/topicGraphView.html | 9 +----- .../src/extension/views/topicGraphView.ts | 21 ------------ 6 files changed, 1 insertion(+), 71 deletions(-) diff --git a/ts/packages/agents/browser/src/agent/browserActionHandler.mts b/ts/packages/agents/browser/src/agent/browserActionHandler.mts index d38b44ba6..7770511ca 100644 --- a/ts/packages/agents/browser/src/agent/browserActionHandler.mts +++ b/ts/packages/agents/browser/src/agent/browserActionHandler.mts @@ -654,7 +654,6 @@ async function processBrowserAgentMessage( case "getTopicViewportNeighborhood": case "getTopicMetrics": case "getTopicTimelines": - case "invalidateTopicCacheAction": case "getViewportBasedNeighborhood": case "testMergeTopicHierarchies": case "mergeTopicHierarchies": diff --git a/ts/packages/agents/browser/src/agent/knowledge/actions/graphActions.mts b/ts/packages/agents/browser/src/agent/knowledge/actions/graphActions.mts index 9fa006b95..973b1d339 100644 --- a/ts/packages/agents/browser/src/agent/knowledge/actions/graphActions.mts +++ b/ts/packages/agents/browser/src/agent/knowledge/actions/graphActions.mts @@ -1962,38 +1962,6 @@ export function invalidateTopicCache(websiteCollection: any): void { invalidateAllGraphologyCaches(); } -/** - * Action to invalidate topic cache from UI - */ -export async function invalidateTopicCacheAction( - parameters: {}, - context: SessionContext, -): Promise<{ success: boolean; message: string }> { - try { - const websiteCollection = context.agentContext.websiteCollection; - - if (!websiteCollection) { - return { - success: false, - message: "Website collection not available", - }; - } - - invalidateTopicCache(websiteCollection); - - return { - success: true, - message: "Topic cache cleared successfully. Reload the page to regenerate the graph.", - }; - } catch (error) { - console.error("Error invalidating topic cache:", error); - return { - success: false, - message: error instanceof Error ? error.message : "Unknown error", - }; - } -} - // Ensure topic graph data is cached for fast access async function ensureTopicGraphCache(websiteCollection: any): Promise { diff --git a/ts/packages/agents/browser/src/agent/knowledge/actions/knowledgeActionRouter.mts b/ts/packages/agents/browser/src/agent/knowledge/actions/knowledgeActionRouter.mts index 8d1338522..2e4c443ed 100644 --- a/ts/packages/agents/browser/src/agent/knowledge/actions/knowledgeActionRouter.mts +++ b/ts/packages/agents/browser/src/agent/knowledge/actions/knowledgeActionRouter.mts @@ -43,7 +43,6 @@ import { getUrlContentBreakdown, getTopicTimelines, discoverRelatedKnowledge, - invalidateTopicCacheAction, } from "./graphActions.mjs"; import { checkAIModelStatus, @@ -122,8 +121,6 @@ export async function handleKnowledgeAction( return await getUrlContentBreakdown(parameters, context); case "getTopicTimelines": return await getTopicTimelines(parameters, context); - case "invalidateTopicCacheAction": - return await invalidateTopicCacheAction(parameters, context); // Query Actions case "getRecentKnowledgeItems": diff --git a/ts/packages/agents/browser/src/extension/views/extensionServiceBase.ts b/ts/packages/agents/browser/src/extension/views/extensionServiceBase.ts index 180c4491c..a3d1b6918 100644 --- a/ts/packages/agents/browser/src/extension/views/extensionServiceBase.ts +++ b/ts/packages/agents/browser/src/extension/views/extensionServiceBase.ts @@ -620,12 +620,6 @@ export abstract class ExtensionServiceBase { }); } - async invalidateTopicCache(): Promise { - return this.sendMessage({ - type: "invalidateTopicCacheAction", - parameters: {}, - }); - } async getTopicTimelines(parameters: { topicNames: string[]; diff --git a/ts/packages/agents/browser/src/extension/views/topicGraphView.html b/ts/packages/agents/browser/src/extension/views/topicGraphView.html index 82acf9b4c..a4985e176 100644 --- a/ts/packages/agents/browser/src/extension/views/topicGraphView.html +++ b/ts/packages/agents/browser/src/extension/views/topicGraphView.html @@ -152,14 +152,7 @@
Error Loading Topic Graph
> - -
+
{ - this.invalidateCache(); - }); - document .getElementById("prototypeMode") ?.addEventListener("change", (e) => { @@ -1055,21 +1049,6 @@ class TopicGraphView { this.showNotification("Cytoscape JSON exported successfully"); } - private async invalidateCache(): Promise { - try { - const result = await this.extensionService.invalidateTopicCache(); - - if (result && result.success) { - this.showNotification(result.message); - } else { - this.showNotification(result?.message || "Failed to invalidate cache"); - } - } catch (error) { - console.error("Error invalidating cache:", error); - this.showNotification("Error invalidating cache"); - } - } - private togglePrototypeMode(enabled: boolean): void { this.state.prototypeMode = enabled; console.log(`[TopicGraphView] Prototype mode: ${enabled ? "ENABLED" : "DISABLED"}`); From 509b9b2ad649d987d8f7df56c278ddd44d5a18ab Mon Sep 17 00:00:00 2001 From: Hillary Mutisya <150286414+hillary-mutisya@users.noreply.github.com> Date: Fri, 31 Oct 2025 06:43:26 -0700 Subject: [PATCH 06/10] Lint fixes --- .../agent/knowledge/actions/graphActions.mts | 11 +- .../agent/knowledge/utils/graphologyCache.mts | 9 +- .../utils/graphologyLayoutEngine.mts | 16 ++- .../src/extension/views/entityGraphView.ts | 3 +- .../extension/views/extensionServiceBase.ts | 1 - .../src/extension/views/topicGraphView.html | 11 +- .../src/extension/views/topicGraphView.ts | 18 ++- .../extension/views/topicGraphVisualizer.ts | 24 ++-- .../src/conversation/hierarchicalTopics.ts | 21 ++-- .../src/buildTopicGraphWithGraphology.ts | 23 ++-- .../website/src/graph/graphStateManager.ts | 71 ++++++++--- .../website/src/graph/incrementalUpdater.ts | 19 ++- .../website/src/graph/metricsCalculator.ts | 19 +-- .../website/src/graph/topicGraphBuilder.ts | 48 +++++-- .../memory/website/src/websiteCollection.ts | 118 ++++++++++++------ 15 files changed, 278 insertions(+), 134 deletions(-) diff --git a/ts/packages/agents/browser/src/agent/knowledge/actions/graphActions.mts b/ts/packages/agents/browser/src/agent/knowledge/actions/graphActions.mts index 973b1d339..1468abc38 100644 --- a/ts/packages/agents/browser/src/agent/knowledge/actions/graphActions.mts +++ b/ts/packages/agents/browser/src/agent/knowledge/actions/graphActions.mts @@ -1464,7 +1464,9 @@ export async function getGlobalImportanceLayer( let cachedGraph = getGraphologyCache(cacheKey); if (!cachedGraph) { - debug("[Graphology] Building layout for entity importance layer..."); + debug( + "[Graphology] Building layout for entity importance layer...", + ); const layoutStart = performance.now(); const graphNodes: GraphNode[] = optimizedEntities.map( @@ -1520,7 +1522,8 @@ export async function getGlobalImportanceLayer( // Enrich entities with graphology colors and sizes const enrichedEntities = optimizedEntities.map((entity: any) => { const graphElement = cachedGraph!.cytoscapeElements.find( - (el: any) => el.data?.id === entity.id || el.data?.label === entity.name, + (el: any) => + el.data?.id === entity.id || el.data?.label === entity.name, ); if (graphElement?.data) { return { @@ -1964,7 +1967,6 @@ export function invalidateTopicCache(websiteCollection: any): void { // Ensure topic graph data is cached for fast access async function ensureTopicGraphCache(websiteCollection: any): Promise { - const cache = getTopicGraphCache(websiteCollection); // Cache never expires - only invalidated on graph rebuild or knowledge import @@ -2033,7 +2035,8 @@ async function ensureTopicGraphCache(websiteCollection: any): Promise { } for (const topic of topics) { if (topic.parentTopicId) { - const currentCount = childCountMap.get(topic.parentTopicId) || 0; + const currentCount = + childCountMap.get(topic.parentTopicId) || 0; childCountMap.set(topic.parentTopicId, currentCount + 1); } } diff --git a/ts/packages/agents/browser/src/agent/knowledge/utils/graphologyCache.mts b/ts/packages/agents/browser/src/agent/knowledge/utils/graphologyCache.mts index b55355428..1054bd2a3 100644 --- a/ts/packages/agents/browser/src/agent/knowledge/utils/graphologyCache.mts +++ b/ts/packages/agents/browser/src/agent/knowledge/utils/graphologyCache.mts @@ -48,9 +48,7 @@ class GraphologyCacheManager { lastAccessed: Date.now(), }); - debug( - `Cache size: ${this.caches.size}/${this.maxCacheSize} entries`, - ); + debug(`Cache size: ${this.caches.size}/${this.maxCacheSize} entries`); } getCacheEntry(key: string): GraphologyCache | null { @@ -142,10 +140,7 @@ export function getGraphologyCache(key: string): GraphologyCache | null { return globalCacheManager.getCacheEntry(key); } -export function setGraphologyCache( - key: string, - cache: GraphologyCache, -): void { +export function setGraphologyCache(key: string, cache: GraphologyCache): void { globalCacheManager.setCacheEntry(key, cache); } diff --git a/ts/packages/agents/browser/src/agent/knowledge/utils/graphologyLayoutEngine.mts b/ts/packages/agents/browser/src/agent/knowledge/utils/graphologyLayoutEngine.mts index e369c2204..60ac6d61e 100644 --- a/ts/packages/agents/browser/src/agent/knowledge/utils/graphologyLayoutEngine.mts +++ b/ts/packages/agents/browser/src/agent/knowledge/utils/graphologyLayoutEngine.mts @@ -285,7 +285,11 @@ function applyMultiPhaseLayout( const source = graph.source(edge); const target = graph.target(edge); if (subgraph.hasNode(source) && subgraph.hasNode(target)) { - subgraph.addEdge(source, target, graph.getEdgeAttributes(edge)); + subgraph.addEdge( + source, + target, + graph.getEdgeAttributes(edge), + ); } } @@ -313,12 +317,14 @@ function applyMultiPhaseLayout( const centroidX = nodes.reduce( - (sum, n) => sum + (graph.getNodeAttribute(n, "x") as number), + (sum, n) => + sum + (graph.getNodeAttribute(n, "x") as number), 0, ) / nodes.length; const centroidY = nodes.reduce( - (sum, n) => sum + (graph.getNodeAttribute(n, "y") as number), + (sum, n) => + sum + (graph.getNodeAttribute(n, "y") as number), 0, ) / nodes.length; @@ -403,8 +409,8 @@ export function convertToCytoscapeElements( for (const node of graph.nodes()) { const attr = graph.getNodeAttributes(node); - const x = ((attr.x - minX) * scaleX) + targetMin; - const y = ((attr.y - minY) * scaleY) + targetMin; + const x = (attr.x - minX) * scaleX + targetMin; + const y = (attr.y - minY) * scaleY + targetMin; elements.push({ data: { diff --git a/ts/packages/agents/browser/src/extension/views/entityGraphView.ts b/ts/packages/agents/browser/src/extension/views/entityGraphView.ts index 5ded342de..1c4415b6c 100644 --- a/ts/packages/agents/browser/src/extension/views/entityGraphView.ts +++ b/ts/packages/agents/browser/src/extension/views/entityGraphView.ts @@ -1417,7 +1417,8 @@ class EntityGraphView { } // Check if graphology layout is available - const hasGraphologyLayout = importanceData.metadata?.graphologyLayout; + const hasGraphologyLayout = + importanceData.metadata?.graphologyLayout; // Transform data to expected format for visualizer const transformedData: any = { diff --git a/ts/packages/agents/browser/src/extension/views/extensionServiceBase.ts b/ts/packages/agents/browser/src/extension/views/extensionServiceBase.ts index a3d1b6918..999ed335f 100644 --- a/ts/packages/agents/browser/src/extension/views/extensionServiceBase.ts +++ b/ts/packages/agents/browser/src/extension/views/extensionServiceBase.ts @@ -620,7 +620,6 @@ export abstract class ExtensionServiceBase { }); } - async getTopicTimelines(parameters: { topicNames: string[]; maxTimelineEntries?: number; diff --git a/ts/packages/agents/browser/src/extension/views/topicGraphView.html b/ts/packages/agents/browser/src/extension/views/topicGraphView.html index a4985e176..f9f8a33d8 100644 --- a/ts/packages/agents/browser/src/extension/views/topicGraphView.html +++ b/ts/packages/agents/browser/src/extension/views/topicGraphView.html @@ -152,14 +152,21 @@
Error Loading Topic Graph
> -
+
+
diff --git a/ts/packages/agents/browser/src/extension/views/topicGraphView.ts b/ts/packages/agents/browser/src/extension/views/topicGraphView.ts index 759966e7c..be0bd9737 100644 --- a/ts/packages/agents/browser/src/extension/views/topicGraphView.ts +++ b/ts/packages/agents/browser/src/extension/views/topicGraphView.ts @@ -1033,11 +1033,17 @@ class TopicGraphView { private exportGraphologyJson(): void { if (!this.lastLoadedData || !this.lastLoadedData.presetLayout) { - this.showNotification("No graphology layout data available to export"); + this.showNotification( + "No graphology layout data available to export", + ); return; } - const jsonData = JSON.stringify(this.lastLoadedData.presetLayout.elements, null, 2); + const jsonData = JSON.stringify( + this.lastLoadedData.presetLayout.elements, + null, + 2, + ); const blob = new Blob([jsonData], { type: "application/json" }); const url = URL.createObjectURL(blob); const link = document.createElement("a"); @@ -1051,7 +1057,9 @@ class TopicGraphView { private togglePrototypeMode(enabled: boolean): void { this.state.prototypeMode = enabled; - console.log(`[TopicGraphView] Prototype mode: ${enabled ? "ENABLED" : "DISABLED"}`); + console.log( + `[TopicGraphView] Prototype mode: ${enabled ? "ENABLED" : "DISABLED"}`, + ); if (!this.lastLoadedData) { this.showNotification("No data available. Load a graph first."); @@ -1059,7 +1067,9 @@ class TopicGraphView { } this.visualizer?.setPrototypeMode(enabled); - this.showNotification(enabled ? "Prototype mode enabled" : "Prototype mode disabled"); + this.showNotification( + enabled ? "Prototype mode enabled" : "Prototype mode disabled", + ); } private toggleSidebar(): void { diff --git a/ts/packages/agents/browser/src/extension/views/topicGraphVisualizer.ts b/ts/packages/agents/browser/src/extension/views/topicGraphVisualizer.ts index 0f53f69b8..80a1c26b1 100644 --- a/ts/packages/agents/browser/src/extension/views/topicGraphVisualizer.ts +++ b/ts/packages/agents/browser/src/extension/views/topicGraphVisualizer.ts @@ -1561,9 +1561,7 @@ export class TopicGraphVisualizer { ); } else { layoutConfig = this.getLayoutOptions(); - console.log( - `[TopicGraphVisualizer] Computing CoSE layout...`, - ); + console.log(`[TopicGraphVisualizer] Computing CoSE layout...`); } const layout = instance.layout(layoutConfig); @@ -1843,14 +1841,18 @@ export class TopicGraphVisualizer { */ public setPrototypeMode(enabled: boolean): void { if (!this.cy) { - console.warn("[TopicGraphVisualizer] No Cytoscape instance available"); + console.warn( + "[TopicGraphVisualizer] No Cytoscape instance available", + ); return; } this.prototypeModeEnabled = enabled; if (enabled) { - console.log("[TopicGraphVisualizer] Enabling prototype mode - disabling LoD, showing all elements"); + console.log( + "[TopicGraphVisualizer] Enabling prototype mode - disabling LoD, showing all elements", + ); this.cy.batch(() => { this.cy.nodes().forEach((node: any) => { @@ -1869,14 +1871,20 @@ export class TopicGraphVisualizer { }); }); - console.log(`[TopicGraphVisualizer] Prototype mode enabled - ${this.cy.nodes().length} nodes, ${this.cy.edges().length} edges visible`); + console.log( + `[TopicGraphVisualizer] Prototype mode enabled - ${this.cy.nodes().length} nodes, ${this.cy.edges().length} edges visible`, + ); } else { - console.log("[TopicGraphVisualizer] Disabling prototype mode - re-enabling LoD"); + console.log( + "[TopicGraphVisualizer] Disabling prototype mode - re-enabling LoD", + ); const currentZoom = this.cy.zoom(); this.applyLevelOfDetail(currentZoom); - console.log(`[TopicGraphVisualizer] Prototype mode disabled - LoD re-applied at zoom ${currentZoom.toFixed(2)}x`); + console.log( + `[TopicGraphVisualizer] Prototype mode disabled - LoD re-applied at zoom ${currentZoom.toFixed(2)}x`, + ); } } diff --git a/ts/packages/knowledgeProcessor/src/conversation/hierarchicalTopics.ts b/ts/packages/knowledgeProcessor/src/conversation/hierarchicalTopics.ts index 259fb87e0..c56ba51d2 100644 --- a/ts/packages/knowledgeProcessor/src/conversation/hierarchicalTopics.ts +++ b/ts/packages/knowledgeProcessor/src/conversation/hierarchicalTopics.ts @@ -362,10 +362,8 @@ JSON OUTPUT:`; rootData.grandchildren?.[childName] || []; for (const grandchildName of grandchildrenForThisChild) { // Check if grandchild topic name matches any knowledge topics - const grandchildSourceNames = findMatchingKnowledgeTopics( - grandchildName, - topics, - ); + const grandchildSourceNames = + findMatchingKnowledgeTopics(grandchildName, topics); const grandchildTopic: HierarchicalTopic = { id: generateTopicId(grandchildName, 2), @@ -522,13 +520,16 @@ function enrichHierarchy( // Check if this hierarchical topic matches any fragment topics for (const fragmentTopic of extraction.topics) { const normalizedTopicName = topic.name.toLowerCase().trim(); - const normalizedFragmentTopic = fragmentTopic.toLowerCase().trim(); + const normalizedFragmentTopic = fragmentTopic + .toLowerCase() + .trim(); if (normalizedTopicName === normalizedFragmentTopic) { // Add ordinal - const ordinal = typeof extraction.fragmentId === 'number' - ? extraction.fragmentId - : parseInt(extraction.fragmentId, 10); + const ordinal = + typeof extraction.fragmentId === "number" + ? extraction.fragmentId + : parseInt(extraction.fragmentId, 10); if (!isNaN(ordinal)) { topic.sourceRefOrdinals.push(ordinal); } @@ -627,7 +628,9 @@ function findMatchingKnowledgeTopics( knowledgeTopics: string[], ): string[] { const matches: string[] = []; - const normalizedHierarchicalName = hierarchicalTopicName.toLowerCase().trim(); + const normalizedHierarchicalName = hierarchicalTopicName + .toLowerCase() + .trim(); for (const knowledgeTopic of knowledgeTopics) { const normalizedKnowledgeName = knowledgeTopic.toLowerCase().trim(); diff --git a/ts/packages/memory/website/src/buildTopicGraphWithGraphology.ts b/ts/packages/memory/website/src/buildTopicGraphWithGraphology.ts index 31cda217c..da43db817 100644 --- a/ts/packages/memory/website/src/buildTopicGraphWithGraphology.ts +++ b/ts/packages/memory/website/src/buildTopicGraphWithGraphology.ts @@ -2,7 +2,10 @@ // Licensed under the MIT License. import registerDebug from "debug"; -import { TopicGraphBuilder, type CooccurrenceData } from "./graph/topicGraphBuilder.js"; +import { + TopicGraphBuilder, + type CooccurrenceData, +} from "./graph/topicGraphBuilder.js"; import { MetricsCalculator } from "./graph/metricsCalculator.js"; import type { HierarchicalTopicRecord } from "./tables.js"; @@ -14,18 +17,20 @@ export async function buildTopicGraphWithGraphology( topicRelationshipsTable: any, topicMetricsTable: any, ): Promise { - debug(`Building topic graph for ${hierarchicalTopics.length} hierarchical topics`); + debug( + `Building topic graph for ${hierarchicalTopics.length} hierarchical topics`, + ); const cooccurrences = extractCooccurrencesFromCache(cacheManager); debug(`Extracted ${cooccurrences.length} cooccurrences from cache`); const graphBuilder = new TopicGraphBuilder(); - const { flatGraph, hierarchicalGraph } = graphBuilder.buildFromTopicHierarchy( - hierarchicalTopics, - cooccurrences, - ); + const { flatGraph, hierarchicalGraph } = + graphBuilder.buildFromTopicHierarchy(hierarchicalTopics, cooccurrences); - debug(`Graphs built: flat=${flatGraph.order} nodes, hierarchical=${hierarchicalGraph.order} nodes`); + debug( + `Graphs built: flat=${flatGraph.order} nodes, hierarchical=${hierarchicalGraph.order} nodes`, + ); const relationships = graphBuilder.exportToTopicRelationships(); debug(`Exporting ${relationships.length} topic relationships to database`); @@ -48,7 +53,9 @@ export async function buildTopicGraphWithGraphology( topicCounts, ); - debug(`Calculated metrics for ${topicMetrics.size} topics, ${communities.size} communities`); + debug( + `Calculated metrics for ${topicMetrics.size} topics, ${communities.size} communities`, + ); for (const [, metrics] of topicMetrics) { topicMetricsTable.upsertMetrics(metrics); diff --git a/ts/packages/memory/website/src/graph/graphStateManager.ts b/ts/packages/memory/website/src/graph/graphStateManager.ts index 950ffe831..e81bc6d0c 100644 --- a/ts/packages/memory/website/src/graph/graphStateManager.ts +++ b/ts/packages/memory/website/src/graph/graphStateManager.ts @@ -3,8 +3,14 @@ import { createRequire } from "module"; import registerDebug from "debug"; -import { TopicGraphBuilder, type CooccurrenceData } from "./topicGraphBuilder.js"; -import { IncrementalGraphUpdater, type WebpageKnowledge } from "./incrementalUpdater.js"; +import { + TopicGraphBuilder, + type CooccurrenceData, +} from "./topicGraphBuilder.js"; +import { + IncrementalGraphUpdater, + type WebpageKnowledge, +} from "./incrementalUpdater.js"; import { MetricsCalculator } from "./metricsCalculator.js"; import type { HierarchicalTopicRecord } from "../tables.js"; @@ -29,7 +35,9 @@ export class GraphStateManager { return; } - debug(`Initializing graphs with ${hierarchicalTopics.length} topics, ${cooccurrences.length} cooccurrences`); + debug( + `Initializing graphs with ${hierarchicalTopics.length} topics, ${cooccurrences.length} cooccurrences`, + ); const graphBuilder = new TopicGraphBuilder(); const graphs = graphBuilder.buildFromTopicHierarchy( @@ -45,22 +53,36 @@ export class GraphStateManager { this.hierarchicalGraph, ); - debug(`Graphs initialized: flat=${this.flatGraph.order} nodes, hierarchical=${this.hierarchicalGraph.order} nodes`); + debug( + `Graphs initialized: flat=${this.flatGraph.order} nodes, hierarchical=${this.hierarchicalGraph.order} nodes`, + ); } public async addWebpage( knowledge: WebpageKnowledge, - ): Promise<{ addedTopics: number; updatedTopics: number; addedRelationships: number; durationMs: number }> { + ): Promise<{ + addedTopics: number; + updatedTopics: number; + addedRelationships: number; + durationMs: number; + }> { if (!this.incrementalUpdater) { - throw new Error("Graphs not initialized. Call ensureGraphsInitialized() first."); + throw new Error( + "Graphs not initialized. Call ensureGraphsInitialized() first.", + ); } const result = await this.incrementalUpdater.addWebpage(knowledge); - debug(`Added webpage in ${result.durationMs}ms: ${result.addedTopics} topics, ${result.addedRelationships} relationships`); + debug( + `Added webpage in ${result.durationMs}ms: ${result.addedTopics} topics, ${result.addedRelationships} relationships`, + ); return result; } - public getGraphs(): { flatGraph: Graph | null; hierarchicalGraph: Graph | null } { + public getGraphs(): { + flatGraph: Graph | null; + hierarchicalGraph: Graph | null; + } { return { flatGraph: this.flatGraph, hierarchicalGraph: this.hierarchicalGraph, @@ -88,8 +110,14 @@ export class GraphStateManager { const source = this.hierarchicalGraph.source(edge); const target = this.hierarchicalGraph.target(edge); - const sourceName = this.hierarchicalGraph.getNodeAttribute(source, "topicName"); - const targetName = this.hierarchicalGraph.getNodeAttribute(target, "topicName"); + const sourceName = this.hierarchicalGraph.getNodeAttribute( + source, + "topicName", + ); + const targetName = this.hierarchicalGraph.getNodeAttribute( + target, + "topicName", + ); relationships.push({ fromTopic: source, @@ -110,16 +138,29 @@ export class GraphStateManager { } public async recomputeMetrics( - topicCounts?: Map, - ): Promise<{ topicMetrics: Map; communities: Map }> { + topicCounts?: Map< + string, + { documentCount: number; domainCount: number } + >, + ): Promise<{ + topicMetrics: Map; + communities: Map; + }> { if (!this.hierarchicalGraph) { - throw new Error("Graphs not initialized. Call ensureGraphsInitialized() first."); + throw new Error( + "Graphs not initialized. Call ensureGraphsInitialized() first.", + ); } const metricsCalculator = new MetricsCalculator(); - const result = metricsCalculator.calculateMetrics(this.hierarchicalGraph, topicCounts); + const result = metricsCalculator.calculateMetrics( + this.hierarchicalGraph, + topicCounts, + ); - debug(`Recomputed metrics for ${result.topicMetrics.size} topics, ${result.communities.size} communities`); + debug( + `Recomputed metrics for ${result.topicMetrics.size} topics, ${result.communities.size} communities`, + ); return result; } diff --git a/ts/packages/memory/website/src/graph/incrementalUpdater.ts b/ts/packages/memory/website/src/graph/incrementalUpdater.ts index 1531de37a..e23f27deb 100644 --- a/ts/packages/memory/website/src/graph/incrementalUpdater.ts +++ b/ts/packages/memory/website/src/graph/incrementalUpdater.ts @@ -3,10 +3,7 @@ import { createRequire } from "module"; import registerDebug from "debug"; -import type { - HierarchicalTopicRecord, - TopicMetrics, -} from "../tables.js"; +import type { HierarchicalTopicRecord, TopicMetrics } from "../tables.js"; import { MetricsCalculator } from "./metricsCalculator.js"; import type { CooccurrenceData } from "./topicGraphBuilder.js"; @@ -179,7 +176,8 @@ export class IncrementalGraphUpdater { ); if (this.flatGraph.hasEdge(edgeKey)) { - const current = this.flatGraph.getEdgeAttributes(edgeKey); + const current = + this.flatGraph.getEdgeAttributes(edgeKey); this.flatGraph.setEdgeAttribute( edgeKey, "count", @@ -233,9 +231,8 @@ export class IncrementalGraphUpdater { const existingCooccurrences = this.hierarchicalGraph .edges(topicId) .filter((edge: string) => { - const edgeAttrs = this.hierarchicalGraph.getEdgeAttributes( - edge, - ); + const edgeAttrs = + this.hierarchicalGraph.getEdgeAttributes(edge); return edgeAttrs.type === "cooccurrence"; }); @@ -285,7 +282,8 @@ export class IncrementalGraphUpdater { const existingCooccurrences = this.hierarchicalGraph .edges(topicId) .filter((edge: string) => { - const edgeAttrs = this.hierarchicalGraph.getEdgeAttributes(edge); + const edgeAttrs = + this.hierarchicalGraph.getEdgeAttributes(edge); return edgeAttrs.type === "cooccurrence"; }); @@ -300,7 +298,8 @@ export class IncrementalGraphUpdater { for (const childId of attrs.childIds) { for (const edge of this.hierarchicalGraph.edges(childId)) { - const edgeAttrs = this.hierarchicalGraph.getEdgeAttributes(edge); + const edgeAttrs = + this.hierarchicalGraph.getEdgeAttributes(edge); if (edgeAttrs.type !== "cooccurrence") continue; const source = this.hierarchicalGraph.source(edge); diff --git a/ts/packages/memory/website/src/graph/metricsCalculator.ts b/ts/packages/memory/website/src/graph/metricsCalculator.ts index 7fdfae9d6..96e4261ed 100644 --- a/ts/packages/memory/website/src/graph/metricsCalculator.ts +++ b/ts/packages/memory/website/src/graph/metricsCalculator.ts @@ -23,13 +23,15 @@ export interface MetricsResult { export class MetricsCalculator { public calculateMetrics( hierarchicalGraph: Graph, - topicCounts?: Map, + topicCounts?: Map< + string, + { documentCount: number; domainCount: number } + >, ): MetricsResult { debug(`Calculating metrics for ${hierarchicalGraph.order} topics`); - const undirectedGraph = this.createUndirectedCooccurrenceGraph( - hierarchicalGraph, - ); + const undirectedGraph = + this.createUndirectedCooccurrenceGraph(hierarchicalGraph); debug("Running betweenness centrality..."); const betweennessScores = betweennessCentrality(undirectedGraph); @@ -41,7 +43,10 @@ export class MetricsCalculator { const communities = new Map(); louvain.assign(undirectedGraph); for (const node of undirectedGraph.nodes()) { - const community = undirectedGraph.getNodeAttribute(node, "community"); + const community = undirectedGraph.getNodeAttribute( + node, + "community", + ); communities.set(node, community); } @@ -100,9 +105,7 @@ export class MetricsCalculator { return { topicMetrics, communities }; } - private createUndirectedCooccurrenceGraph( - hierarchicalGraph: Graph, - ): Graph { + private createUndirectedCooccurrenceGraph(hierarchicalGraph: Graph): Graph { const undirectedGraph = new Graph({ type: "undirected" }); for (const node of hierarchicalGraph.nodes()) { diff --git a/ts/packages/memory/website/src/graph/topicGraphBuilder.ts b/ts/packages/memory/website/src/graph/topicGraphBuilder.ts index dc22d4632..397815f1a 100644 --- a/ts/packages/memory/website/src/graph/topicGraphBuilder.ts +++ b/ts/packages/memory/website/src/graph/topicGraphBuilder.ts @@ -96,8 +96,16 @@ export class TopicGraphBuilder { for (const toTopic of toTopics) { if (fromTopic.topicId === toTopic.topicId) continue; - if (this.flatGraph.hasEdge(fromTopic.topicId, toTopic.topicId)) { - const current = this.flatGraph.getEdgeAttributes(fromTopic.topicId, toTopic.topicId); + if ( + this.flatGraph.hasEdge( + fromTopic.topicId, + toTopic.topicId, + ) + ) { + const current = this.flatGraph.getEdgeAttributes( + fromTopic.topicId, + toTopic.topicId, + ); this.flatGraph.setEdgeAttribute( fromTopic.topicId, toTopic.topicId, @@ -159,9 +167,10 @@ export class TopicGraphBuilder { { type: "parent-child" }, ); - const parentAttrs = this.hierarchicalGraph.getNodeAttributes( - topic.parentTopicId, - ); + const parentAttrs = + this.hierarchicalGraph.getNodeAttributes( + topic.parentTopicId, + ); parentAttrs.childIds.push(topic.topicId); } } @@ -177,7 +186,10 @@ export class TopicGraphBuilder { const nodesByLevel = new Map(); for (const node of this.hierarchicalGraph.nodes()) { - const level = this.hierarchicalGraph.getNodeAttribute(node, "level"); + const level = this.hierarchicalGraph.getNodeAttribute( + node, + "level", + ); if (!nodesByLevel.has(level)) { nodesByLevel.set(level, []); } @@ -197,8 +209,10 @@ export class TopicGraphBuilder { for (const edge of this.flatGraph.edges(topicId)) { const source = this.flatGraph.source(edge); const target = this.flatGraph.target(edge); - const edgeAttrs = this.flatGraph.getEdgeAttributes(edge); - const otherNode = source === topicId ? target : source; + const edgeAttrs = + this.flatGraph.getEdgeAttributes(edge); + const otherNode = + source === topicId ? target : source; if (!this.hierarchicalGraph.hasNode(otherNode)) { continue; @@ -209,7 +223,9 @@ export class TopicGraphBuilder { otherNode, ); if ( - !this.hierarchicalGraph.hasEdge(hierarchicalEdgeKey) + !this.hierarchicalGraph.hasEdge( + hierarchicalEdgeKey, + ) ) { this.hierarchicalGraph.addEdge( topicId, @@ -234,7 +250,9 @@ export class TopicGraphBuilder { >(); for (const childId of attrs.childIds) { - for (const edge of this.hierarchicalGraph.edges(childId)) { + for (const edge of this.hierarchicalGraph.edges( + childId, + )) { const edgeAttrs = this.hierarchicalGraph.getEdgeAttributes(edge); if (edgeAttrs.type !== "cooccurrence") continue; @@ -275,7 +293,9 @@ export class TopicGraphBuilder { topicId, otherNode, ); - if (!this.hierarchicalGraph.hasEdge(hierarchicalEdgeKey)) { + if ( + !this.hierarchicalGraph.hasEdge(hierarchicalEdgeKey) + ) { this.hierarchicalGraph.addEdge(topicId, otherNode, { type: "cooccurrence", count: agg.count, @@ -311,7 +331,8 @@ export class TopicGraphBuilder { while (current) { ancestorsA.add(current); - const attrs: any = this.hierarchicalGraph.getNodeAttributes(current); + const attrs: any = + this.hierarchicalGraph.getNodeAttributes(current); current = attrs.parentTopicId || null; } @@ -320,7 +341,8 @@ export class TopicGraphBuilder { if (ancestorsA.has(current)) { return current; } - const attrs: any = this.hierarchicalGraph.getNodeAttributes(current); + const attrs: any = + this.hierarchicalGraph.getNodeAttributes(current); current = attrs.parentTopicId || null; } diff --git a/ts/packages/memory/website/src/websiteCollection.ts b/ts/packages/memory/website/src/websiteCollection.ts index f53632294..3debedab3 100644 --- a/ts/packages/memory/website/src/websiteCollection.ts +++ b/ts/packages/memory/website/src/websiteCollection.ts @@ -1672,7 +1672,8 @@ export class WebsiteCollection const { buildTopicGraphWithGraphology } = await import( "./buildTopicGraphWithGraphology.js" ); - const allHierarchicalTopics = this.hierarchicalTopics?.getTopicHierarchy() || []; + const allHierarchicalTopics = + this.hierarchicalTopics?.getTopicHierarchy() || []; await buildTopicGraphWithGraphology( allHierarchicalTopics, cacheManager, @@ -1715,16 +1716,23 @@ export class WebsiteCollection await this.updateTopicGraphIncremental(newWebsites); } - private async updateTopicGraphIncremental(newWebsites: Website[]): Promise { - debug(`[Knowledge Graph] Updating topic graph incrementally for ${newWebsites.length} websites`); + private async updateTopicGraphIncremental( + newWebsites: Website[], + ): Promise { + debug( + `[Knowledge Graph] Updating topic graph incrementally for ${newWebsites.length} websites`, + ); if (!this.graphStateManager) { - const { GraphStateManager } = await import("./graph/graphStateManager.js"); + const { GraphStateManager } = await import( + "./graph/graphStateManager.js" + ); this.graphStateManager = new GraphStateManager(); } - const allHierarchicalTopics = this.hierarchicalTopics?.getTopicHierarchy() || []; - + const allHierarchicalTopics = + this.hierarchicalTopics?.getTopicHierarchy() || []; + const { GraphBuildingCacheManager } = await import( "./utils/graphBuildingCacheManager.mjs" ); @@ -1732,12 +1740,14 @@ export class WebsiteCollection const websites = this.getWebsites(); await cacheManager.initializeCache(websites); - const cooccurrences = cacheManager.getAllTopicRelationships().map((rel: any) => ({ - fromTopic: rel.fromTopic, - toTopic: rel.toTopic, - count: rel.count, - urls: rel.sources || [], - })); + const cooccurrences = cacheManager + .getAllTopicRelationships() + .map((rel: any) => ({ + fromTopic: rel.fromTopic, + toTopic: rel.toTopic, + count: rel.count, + urls: rel.sources || [], + })); await this.graphStateManager.ensureGraphsInitialized( allHierarchicalTopics, @@ -1748,9 +1758,14 @@ export class WebsiteCollection const knowledge = website.knowledge as any; if (!knowledge?.topicHierarchy) continue; - const topicMap = knowledge.topicHierarchy.topicMap instanceof Map - ? knowledge.topicHierarchy.topicMap - : new Map(Object.entries(knowledge.topicHierarchy.topicMap || {})); + const topicMap = + knowledge.topicHierarchy.topicMap instanceof Map + ? knowledge.topicHierarchy.topicMap + : new Map( + Object.entries( + knowledge.topicHierarchy.topicMap || {}, + ), + ); const hierarchicalTopics: any[] = []; for (const [topicId, topic] of topicMap) { @@ -1762,7 +1777,9 @@ export class WebsiteCollection level: (topic as any).level || 0, parentTopicId: (topic as any).parentId, confidence: (topic as any).confidence || 0.5, - sourceTopicNames: JSON.stringify((topic as any).sourceTopicNames || []), + sourceTopicNames: JSON.stringify( + (topic as any).sourceTopicNames || [], + ), extractionDate: new Date().toISOString(), }); } @@ -1796,7 +1813,8 @@ export class WebsiteCollection })), ); - const { topicMetrics } = await this.graphStateManager.recomputeMetrics(topicCounts); + const { topicMetrics } = + await this.graphStateManager.recomputeMetrics(topicCounts); for (const [, metrics] of topicMetrics) { this.topicMetrics?.upsertMetrics(metrics); } @@ -1985,12 +2003,16 @@ export class WebsiteCollection const websitesToProcess = urlLimit ? websites.slice(0, urlLimit) : websites; - debug(`[Knowledge Graph] Processing ${websitesToProcess.length} websites for hierarchies`); + debug( + `[Knowledge Graph] Processing ${websitesToProcess.length} websites for hierarchies`, + ); const websitesWithHierarchies = websitesToProcess.filter( (w) => (w.knowledge as any)?.topicHierarchy, ); - debug(`[Knowledge Graph] Found ${websitesWithHierarchies.length} websites with existing hierarchies`); + debug( + `[Knowledge Graph] Found ${websitesWithHierarchies.length} websites with existing hierarchies`, + ); if (websitesWithHierarchies.length > 0) { // Clear existing hierarchical topics before rebuilding @@ -1999,19 +2021,27 @@ export class WebsiteCollection "DELETE FROM hierarchicalTopics", ); clearStmt.run(); - debug(`[Knowledge Graph] Cleared existing hierarchical topics`); + debug( + `[Knowledge Graph] Cleared existing hierarchical topics`, + ); } // Use existing rich hierarchies from websites - debug(`[Knowledge Graph] Using rich hierarchies from ${websitesWithHierarchies.length} websites`); + debug( + `[Knowledge Graph] Using rich hierarchies from ${websitesWithHierarchies.length} websites`, + ); await this.updateHierarchicalTopics(websitesWithHierarchies); return; } // No existing hierarchies, fall back to building from flat topics - debug(`[Knowledge Graph] No websites with hierarchies, extracting flat topics...`); + debug( + `[Knowledge Graph] No websites with hierarchies, extracting flat topics...`, + ); const flatTopics = await this.extractFlatTopics(urlLimit); - debug(`[Knowledge Graph] Extracted ${flatTopics.length} flat topics`); + debug( + `[Knowledge Graph] Extracted ${flatTopics.length} flat topics`, + ); if (flatTopics.length === 0) { return; @@ -2033,7 +2063,9 @@ export class WebsiteCollection let topicExtractor: any; try { // Try to create AI model for topic merging - debug(`[Knowledge Graph] Creating AI model for topic extraction...`); + debug( + `[Knowledge Graph] Creating AI model for topic extraction...`, + ); const apiSettings = ai.openai.azureApiSettingsFromEnv( ai.openai.ModelType.Chat, undefined, @@ -2048,14 +2080,18 @@ export class WebsiteCollection `[Knowledge Graph] AI model not available for topic merging: ${error}`, ); // Fall back to simple hierarchical grouping - debug(`[Knowledge Graph] Using simple hierarchical grouping for ${flatTopics.length} topics`); + debug( + `[Knowledge Graph] Using simple hierarchical grouping for ${flatTopics.length} topics`, + ); await this.buildSimpleTopicHierarchy(flatTopics); debug(`[Knowledge Graph] Simple hierarchy built`); return; } // Use AI to merge topics into higher-level topics - debug(`[Knowledge Graph] Merging ${flatTopics.length} topics into hierarchy...`); + debug( + `[Knowledge Graph] Merging ${flatTopics.length} topics into hierarchy...`, + ); const mergeResult = await topicExtractor.mergeTopics( flatTopics, undefined, // No past topics for initial build @@ -2063,7 +2099,9 @@ export class WebsiteCollection ); if (mergeResult && mergeResult.status === "Success") { - debug(`[Knowledge Graph] Topic merge successful: ${mergeResult.topic}`); + debug( + `[Knowledge Graph] Topic merge successful: ${mergeResult.topic}`, + ); // Store the merged topic as root const rootTopicId = this.generateTopicId(mergeResult.topic, 0); debug(`[Knowledge Graph] Storing root topic: ${rootTopicId}`); @@ -2080,11 +2118,10 @@ export class WebsiteCollection ); // Organize flat topics under the root - debug(`[Knowledge Graph] Organizing ${flatTopics.length} topics under root`); - await this.organizeTopicsUnderRoot( - flatTopics, - rootTopicId, + debug( + `[Knowledge Graph] Organizing ${flatTopics.length} topics under root`, ); + await this.organizeTopicsUnderRoot(flatTopics, rootTopicId); debug(`[Knowledge Graph] Topics organized successfully`); } else { // Fall back to simple hierarchy if merging fails @@ -2117,7 +2154,10 @@ export class WebsiteCollection ); let globalHierarchy: any | undefined; - const websiteUrlMap = new Map(); + const websiteUrlMap = new Map< + string, + { url: string; domain: string } + >(); for (const website of newWebsites) { const docHierarchy = (website.knowledge as any)?.topicHierarchy as @@ -2146,7 +2186,10 @@ export class WebsiteCollection const websiteDomain = website.metadata.domain || "unknown"; for (const [topicId] of topicMap) { if (!websiteUrlMap.has(topicId)) { - websiteUrlMap.set(topicId, { url: websiteUrl, domain: websiteDomain }); + websiteUrlMap.set(topicId, { + url: websiteUrl, + domain: websiteDomain, + }); } } @@ -2181,8 +2224,8 @@ export class WebsiteCollection } catch (error) { debug( `[Knowledge Graph] Error updating hierarchical topics: ${error}`, - // Note: Full document provenance is available via semanticRefIndex lookup - // Each topic has semanticRefs with range.start.messageOrdinal pointing to source documents + // Note: Full document provenance is available via semanticRefIndex lookup + // Each topic has semanticRefs with range.start.messageOrdinal pointing to source documents ); } } @@ -2230,7 +2273,6 @@ export class WebsiteCollection ...(topic.sourceTopicNames || []), ]), ]; - } } } @@ -3062,9 +3104,7 @@ Determine the appropriate relationship action based on the PairwiseTopicRelation /** * Build a simple topic hierarchy when AI is not available */ - private async buildSimpleTopicHierarchy( - topics: string[], - ): Promise { + private async buildSimpleTopicHierarchy(topics: string[]): Promise { debug( `[Knowledge Graph] Building simple topic hierarchy for ${topics.length} topics`, ); From 2816742fa749eeac179f9df2b279f1e1c6128570 Mon Sep 17 00:00:00 2001 From: Hillary Mutisya <150286414+hillary-mutisya@users.noreply.github.com> Date: Fri, 31 Oct 2025 06:52:34 -0700 Subject: [PATCH 07/10] Update graphStateManager.ts --- ts/packages/memory/website/src/graph/graphStateManager.ts | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/ts/packages/memory/website/src/graph/graphStateManager.ts b/ts/packages/memory/website/src/graph/graphStateManager.ts index e81bc6d0c..5ffcb8e47 100644 --- a/ts/packages/memory/website/src/graph/graphStateManager.ts +++ b/ts/packages/memory/website/src/graph/graphStateManager.ts @@ -58,9 +58,7 @@ export class GraphStateManager { ); } - public async addWebpage( - knowledge: WebpageKnowledge, - ): Promise<{ + public async addWebpage(knowledge: WebpageKnowledge): Promise<{ addedTopics: number; updatedTopics: number; addedRelationships: number; From bbc986daecaf5606e40fd2175ec6edb0160b80c3 Mon Sep 17 00:00:00 2001 From: Hillary Mutisya <150286414+hillary-mutisya@users.noreply.github.com> Date: Fri, 31 Oct 2025 14:37:44 -0700 Subject: [PATCH 08/10] Only recompute communities if we've added significant new entities --- .../memory/website/src/websiteCollection.ts | 42 ++++++++++++++++--- 1 file changed, 37 insertions(+), 5 deletions(-) diff --git a/ts/packages/memory/website/src/websiteCollection.ts b/ts/packages/memory/website/src/websiteCollection.ts index f53632294..85ac82503 100644 --- a/ts/packages/memory/website/src/websiteCollection.ts +++ b/ts/packages/memory/website/src/websiteCollection.ts @@ -130,6 +130,7 @@ export class WebsiteCollection private db: sqlite.Database | undefined = undefined; private dbPath: string = ""; private graphStateManager: any = null; + private cachedEntityCount: number = 0; constructor( nameTag: string = "", @@ -1634,14 +1635,19 @@ export class WebsiteCollection ); // Extract entities from cache (much faster than iterating websites) + const entityExtractionStart = Date.now(); const entities = cacheManager.getAllEntities(); debug( - `[Knowledge Graph] Extracted ${entities.length} unique entities in ${Date.now() - startTime}ms`, + `[Knowledge Graph] Extracted ${entities.length} unique entities in ${Date.now() - entityExtractionStart}ms`, ); + // Initialize cached entity count for incremental updates + this.cachedEntityCount = entities.length; + // Store entities in knowledge entities table + const storeStart = Date.now(); await this.storeEntitiesInDatabase(cacheManager, websitesToProcess); - debug(`[Knowledge Graph] Stored entities in database`); + debug(`[Knowledge Graph] Stored entities in database in ${Date.now() - storeStart}ms`); // Build relationships between entities using cache-based approach const relationshipStartTime = Date.now(); @@ -2055,12 +2061,16 @@ export class WebsiteCollection } // Use AI to merge topics into higher-level topics - debug(`[Knowledge Graph] Merging ${flatTopics.length} topics into hierarchy...`); + const mergeStart = Date.now(); + const topicListSize = JSON.stringify(flatTopics).length; + debug(`[Knowledge Graph] Merging ${flatTopics.length} topics into hierarchy (prompt size: ${topicListSize} chars)...`); + const mergeResult = await topicExtractor.mergeTopics( flatTopics, undefined, // No past topics for initial build "comprehensive, hierarchical", ); + debug(`[Knowledge Graph] Topic merge completed in ${Date.now() - mergeStart}ms`); if (mergeResult && mergeResult.status === "Success") { debug(`[Knowledge Graph] Topic merge successful: ${mergeResult.topic}`); @@ -2879,18 +2889,40 @@ Determine the appropriate relationship action based on the PairwiseTopicRelation const startTime = Date.now(); try { + // Extract entities from NEW websites only + const extractionStart = Date.now(); const newEntities = await this.extractEntitiesFromWebsites(newWebsites); + debug( + `[Knowledge Graph] Entity extraction completed in ${Date.now() - extractionStart}ms (${newEntities.length} new entities)`, + ); if (newEntities.length > 0) { + const relationshipsStart = Date.now(); await this.updateRelationships(newEntities); + debug( + `[Knowledge Graph] Relationships updated in ${Date.now() - relationshipsStart}ms`, + ); } + const hierarchyStart = Date.now(); await this.updateHierarchicalTopics(newWebsites); + debug( + `[Knowledge Graph] Hierarchical topics updated in ${Date.now() - hierarchyStart}ms`, + ); + + // ✅ FIX: Track entity count instead of re-extracting from all websites + // Only recompute communities if we've added significant new entities + const previousEntityCount = this.cachedEntityCount || 0; + const currentEntityCount = previousEntityCount + newEntities.length; + this.cachedEntityCount = currentEntityCount; - const totalEntityCount = (await this.extractEntities()).length; - if (this.shouldRecomputeCommunities(totalEntityCount)) { + if (this.shouldRecomputeCommunities(currentEntityCount)) { + const communityStart = Date.now(); await this.recomputeCommunities(); + debug( + `[Knowledge Graph] Communities recomputed in ${Date.now() - communityStart}ms`, + ); } debug( From 9b38f65445f235d63c70b105f895b3fe59b4df50 Mon Sep 17 00:00:00 2001 From: Hillary Mutisya <150286414+hillary-mutisya@users.noreply.github.com> Date: Fri, 31 Oct 2025 15:03:48 -0700 Subject: [PATCH 09/10] Lint fixes --- ts/packages/memory/website/src/websiteCollection.ts | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/ts/packages/memory/website/src/websiteCollection.ts b/ts/packages/memory/website/src/websiteCollection.ts index d5311d350..4e2ed3c29 100644 --- a/ts/packages/memory/website/src/websiteCollection.ts +++ b/ts/packages/memory/website/src/websiteCollection.ts @@ -1647,7 +1647,9 @@ export class WebsiteCollection // Store entities in knowledge entities table const storeStart = Date.now(); await this.storeEntitiesInDatabase(cacheManager, websitesToProcess); - debug(`[Knowledge Graph] Stored entities in database in ${Date.now() - storeStart}ms`); + debug( + `[Knowledge Graph] Stored entities in database in ${Date.now() - storeStart}ms`, + ); // Build relationships between entities using cache-based approach const relationshipStartTime = Date.now(); @@ -2097,14 +2099,18 @@ export class WebsiteCollection // Use AI to merge topics into higher-level topics const mergeStart = Date.now(); const topicListSize = JSON.stringify(flatTopics).length; - debug(`[Knowledge Graph] Merging ${flatTopics.length} topics into hierarchy (prompt size: ${topicListSize} chars)...`); + debug( + `[Knowledge Graph] Merging ${flatTopics.length} topics into hierarchy (prompt size: ${topicListSize} chars)...`, + ); const mergeResult = await topicExtractor.mergeTopics( flatTopics, undefined, // No past topics for initial build "comprehensive, hierarchical", ); - debug(`[Knowledge Graph] Topic merge completed in ${Date.now() - mergeStart}ms`); + debug( + `[Knowledge Graph] Topic merge completed in ${Date.now() - mergeStart}ms`, + ); if (mergeResult && mergeResult.status === "Success") { debug( From c059128cdf1a527b9c13408484abd38eac3c0c79 Mon Sep 17 00:00:00 2001 From: Hillary Mutisya <150286414+hillary-mutisya@users.noreply.github.com> Date: Sat, 1 Nov 2025 00:12:41 -0700 Subject: [PATCH 10/10] Update pnpm-lock.yaml --- ts/pnpm-lock.yaml | 159 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 157 insertions(+), 2 deletions(-) diff --git a/ts/pnpm-lock.yaml b/ts/pnpm-lock.yaml index 572f53d93..b384d590d 100644 --- a/ts/pnpm-lock.yaml +++ b/ts/pnpm-lock.yaml @@ -139,7 +139,7 @@ importers: version: 8.18.1 jest: specifier: ^29.7.0 - version: 29.7.0(@types/node@22.15.18)(ts-node@10.9.2(@types/node@22.15.18)(typescript@5.4.5)) + version: 29.7.0(@types/node@20.19.23)(ts-node@10.9.2(@types/node@20.19.23)(typescript@5.4.5)) prettier: specifier: ^3.5.3 version: 3.5.3 @@ -513,7 +513,7 @@ importers: version: 16.5.0 jest: specifier: ^29.7.0 - version: 29.7.0(@types/node@20.19.23)(ts-node@10.9.2(@types/node@20.19.23)(typescript@5.4.5)) + version: 29.7.0(@types/node@22.15.18)(ts-node@10.9.2(@types/node@22.15.18)(typescript@5.4.5)) prettier: specifier: ^3.5.3 version: 3.5.3 @@ -1194,6 +1194,21 @@ importers: express-rate-limit: specifier: ^7.5.0 version: 7.5.0(express@4.21.2) + graphology: + specifier: ^0.25.4 + version: 0.25.4(graphology-types@0.24.8) + graphology-communities-louvain: + specifier: ^2.0.1 + version: 2.0.2(graphology-types@0.24.8) + graphology-layout: + specifier: ^0.6.1 + version: 0.6.1(graphology-types@0.24.8) + graphology-layout-forceatlas2: + specifier: ^0.10.1 + version: 0.10.1(graphology-types@0.24.8) + graphology-layout-noverlap: + specifier: ^0.4.1 + version: 0.4.2(graphology-types@0.24.8) html-to-text: specifier: ^9.0.5 version: 9.0.5 @@ -3382,6 +3397,15 @@ importers: get-folder-size: specifier: ^5.0.0 version: 5.0.0 + graphology: + specifier: ^0.25.4 + version: 0.25.4(graphology-types@0.24.8) + graphology-communities-louvain: + specifier: ^2.0.1 + version: 2.0.2(graphology-types@0.24.8) + graphology-metrics: + specifier: ^2.1.0 + version: 2.4.0(graphology-types@0.24.8) jsdom: specifier: ^26.1.0 version: 26.1.0 @@ -6857,6 +6881,9 @@ packages: '@xtuc/long@4.2.2': resolution: {integrity: sha512-NuHqBY1PB/D8xU6s/thBgOAiAP7HOYDQ32+BFZILJ8ivkUkAHQnWfn6WhL79Owj1qmUnoN/YPhktdIoucipkAQ==} + '@yomguithereal/helpers@1.1.1': + resolution: {integrity: sha512-UYvAq/XCA7xoh1juWDYsq3W0WywOB+pz8cgVnE1b45ZfdMhBvHDrgmSFG3jXeZSr2tMTYLGHFHON+ekG05Jebg==} + abab@2.0.6: resolution: {integrity: sha512-j2afSsaIENvHZN2B8GOpF566vZ5WVk5opAiMTvWgaQT8DkbOqsTfvNAvHoRGU2zzP8cPoqys+xHTRDWW8L+/BA==} deprecated: Use your platform's native atob() and btoa() methods instead @@ -8991,6 +9018,54 @@ packages: graphlib@2.1.8: resolution: {integrity: sha512-jcLLfkpoVGmH7/InMC/1hIvOPSUh38oJtGhvrOFGzioE1DZ+0YW16RgmOJhHiuWTvGiJQ9Z1Ik43JvkRPRvE+A==} + graphology-communities-louvain@2.0.2: + resolution: {integrity: sha512-zt+2hHVPYxjEquyecxWXoUoIuN/UvYzsvI7boDdMNz0rRvpESQ7+e+Ejv6wK7AThycbZXuQ6DkG8NPMCq6XwoA==} + peerDependencies: + graphology-types: '>=0.19.0' + + graphology-indices@0.17.0: + resolution: {integrity: sha512-A7RXuKQvdqSWOpn7ZVQo4S33O0vCfPBnUSf7FwE0zNCasqwZVUaCXePuWo5HBpWw68KJcwObZDHpFk6HKH6MYQ==} + peerDependencies: + graphology-types: '>=0.20.0' + + graphology-layout-forceatlas2@0.10.1: + resolution: {integrity: sha512-ogzBeF1FvWzjkikrIFwxhlZXvD2+wlY54lqhsrWprcdPjopM2J9HoMweUmIgwaTvY4bUYVimpSsOdvDv1gPRFQ==} + peerDependencies: + graphology-types: '>=0.19.0' + + graphology-layout-noverlap@0.4.2: + resolution: {integrity: sha512-13WwZSx96zim6l1dfZONcqLh3oqyRcjIBsqz2c2iJ3ohgs3605IDWjldH41Gnhh462xGB1j6VGmuGhZ2FKISXA==} + peerDependencies: + graphology-types: '>=0.19.0' + + graphology-layout@0.6.1: + resolution: {integrity: sha512-m9aMvbd0uDPffUCFPng5ibRkb2pmfNvdKjQWeZrf71RS1aOoat5874+DcyNfMeCT4aQguKC7Lj9eCbqZj/h8Ag==} + peerDependencies: + graphology-types: '>=0.19.0' + + graphology-metrics@2.4.0: + resolution: {integrity: sha512-7WOfOP+mFLCaTJx55Qg4eY+211vr1/b3D/R3biz3SXGhAaCVcWYkfabnmO4O4WBNWANEHtVnFrGgJ0kj6MM6xw==} + peerDependencies: + graphology-types: '>=0.20.0' + + graphology-shortest-path@2.1.0: + resolution: {integrity: sha512-KbT9CTkP/u72vGEJzyRr24xFC7usI9Es3LMmCPHGwQ1KTsoZjxwA9lMKxfU0syvT/w+7fZUdB/Hu2wWYcJBm6Q==} + peerDependencies: + graphology-types: '>=0.20.0' + + graphology-types@0.24.8: + resolution: {integrity: sha512-hDRKYXa8TsoZHjgEaysSRyPdT6uB78Ci8WnjgbStlQysz7xR52PInxNsmnB7IBOM1BhikxkNyCVEFgmPKnpx3Q==} + + graphology-utils@2.5.2: + resolution: {integrity: sha512-ckHg8MXrXJkOARk56ZaSCM1g1Wihe2d6iTmz1enGOz4W/l831MBCKSayeFQfowgF8wd+PQ4rlch/56Vs/VZLDQ==} + peerDependencies: + graphology-types: '>=0.23.0' + + graphology@0.25.4: + resolution: {integrity: sha512-33g0Ol9nkWdD6ulw687viS8YJQBxqG5LWII6FI6nul0pq6iM2t5EKquOTFDbyTblRB3O9I+7KX4xI8u5ffekAQ==} + peerDependencies: + graphology-types: '>=0.24.0' + hachure-fill@0.5.2: resolution: {integrity: sha512-3GKBOn+m2LX9iq+JC1064cSFprJY4jL1jCXTcpnfER5HYE2l/4EfWSGzkPa/ZDBmYI0ZOEj5VHV/eKnPGkHuOg==} @@ -10540,6 +10615,9 @@ packages: mlly@1.7.4: resolution: {integrity: sha512-qmdSIPC4bDJXgZTCR7XosJiNKySV7O215tsPtDN9iEO/7q/76b/ijtgRu/+epFXSJhijtTCCGp3DWS549P3xKw==} + mnemonist@0.39.8: + resolution: {integrity: sha512-vyWo2K3fjrUw8YeeZ1zF0fy6Mu59RHokURlld8ymdUPjMlD9EC9ov1/YPqTgqRvUN9nTr3Gqfz29LYAmu0PHPQ==} + mocha@10.8.2: resolution: {integrity: sha512-VZlYo/WE8t1tstuRmqgeyBgCbJc/lEdopaa+axcKzTBJ+UIdlAB9XnmvTCAH4pwR4ElNInaedhEBmZD8iCSVEg==} engines: {node: '>= 14.0.0'} @@ -10766,6 +10844,9 @@ packages: resolution: {integrity: sha512-nK28WOo+QIjBkDduTINE4JkF/UJJKyf2EJxvJKfblDpyg0Q+pkOHNTL0Qwy6NP6FhE/EnzV73BxxqcJaXY9anw==} engines: {node: '>= 0.4'} + obliterator@2.0.5: + resolution: {integrity: sha512-42CPE9AhahZRsMNslczq0ctAEtqk8Eka26QofnqC346BZdHDySk3LWka23LI7ULIw11NmltpiLagIq8gBozxTw==} + obuf@1.1.2: resolution: {integrity: sha512-PX1wu0AmAdPqOL1mWhqmlOd8kOIZQwGZw6rh7uby9fTc5lhaOWFLX3I6R1hrF9k3zUY40e6igsLGkDXK92LJNg==} @@ -10896,6 +10977,9 @@ packages: pako@1.0.11: resolution: {integrity: sha512-4hLB8Py4zZce5s4yd9XzopqwVv/yGNhV1Bl8NTmCq1763HeK2+EwVTv+leGeL13Dnh2wfbqowVPXCIO0z4taYw==} + pandemonium@2.4.1: + resolution: {integrity: sha512-wRqjisUyiUfXowgm7MFH2rwJzKIr20rca5FsHXCMNm1W5YPP1hCtrZfgmQ62kP7OZ7Xt+cR858aB28lu5NX55g==} + param-case@3.0.4: resolution: {integrity: sha512-RXlj7zCYokReqWpOPH9oYivUzLYZ5vAPIfEmCTNViosC78F8F0H9y7T7gG2M39ymgutxF5gcFEsyZQSph9Bp3A==} @@ -17648,6 +17732,8 @@ snapshots: '@xtuc/long@4.2.2': {} + '@yomguithereal/helpers@1.1.1': {} + abab@2.0.6: {} abbrev@1.1.1: {} @@ -20295,6 +20381,65 @@ snapshots: dependencies: lodash: 4.17.21 + graphology-communities-louvain@2.0.2(graphology-types@0.24.8): + dependencies: + graphology-indices: 0.17.0(graphology-types@0.24.8) + graphology-types: 0.24.8 + graphology-utils: 2.5.2(graphology-types@0.24.8) + mnemonist: 0.39.8 + pandemonium: 2.4.1 + + graphology-indices@0.17.0(graphology-types@0.24.8): + dependencies: + graphology-types: 0.24.8 + graphology-utils: 2.5.2(graphology-types@0.24.8) + mnemonist: 0.39.8 + + graphology-layout-forceatlas2@0.10.1(graphology-types@0.24.8): + dependencies: + graphology-types: 0.24.8 + graphology-utils: 2.5.2(graphology-types@0.24.8) + + graphology-layout-noverlap@0.4.2(graphology-types@0.24.8): + dependencies: + graphology-types: 0.24.8 + graphology-utils: 2.5.2(graphology-types@0.24.8) + + graphology-layout@0.6.1(graphology-types@0.24.8): + dependencies: + graphology-types: 0.24.8 + graphology-utils: 2.5.2(graphology-types@0.24.8) + pandemonium: 2.4.1 + + graphology-metrics@2.4.0(graphology-types@0.24.8): + dependencies: + graphology-indices: 0.17.0(graphology-types@0.24.8) + graphology-shortest-path: 2.1.0(graphology-types@0.24.8) + graphology-types: 0.24.8 + graphology-utils: 2.5.2(graphology-types@0.24.8) + mnemonist: 0.39.8 + pandemonium: 2.4.1 + + graphology-shortest-path@2.1.0(graphology-types@0.24.8): + dependencies: + '@yomguithereal/helpers': 1.1.1 + graphology-indices: 0.17.0(graphology-types@0.24.8) + graphology-types: 0.24.8 + graphology-utils: 2.5.2(graphology-types@0.24.8) + mnemonist: 0.39.8 + + graphology-types@0.24.8: {} + + graphology-utils@2.5.2(graphology-types@0.24.8): + dependencies: + graphology-types: 0.24.8 + + graphology@0.25.4(graphology-types@0.24.8): + dependencies: + events: 3.3.0 + graphology-types: 0.24.8 + obliterator: 2.0.5 + hachure-fill@0.5.2: {} handle-thing@2.0.1: {} @@ -22408,6 +22553,10 @@ snapshots: pkg-types: 1.3.1 ufo: 1.6.1 + mnemonist@0.39.8: + dependencies: + obliterator: 2.0.5 + mocha@10.8.2: dependencies: ansi-colors: 4.1.3 @@ -22600,6 +22749,8 @@ snapshots: has-symbols: 1.1.0 object-keys: 1.1.1 + obliterator@2.0.5: {} + obuf@1.1.2: {} on-finished@2.4.1: @@ -22769,6 +22920,10 @@ snapshots: pako@1.0.11: {} + pandemonium@2.4.1: + dependencies: + mnemonist: 0.39.8 + param-case@3.0.4: dependencies: dot-case: 3.0.4