diff --git a/.github/workflows/lint_test_compile.yml b/.github/workflows/lint_test_compile.yml index 53a34b09..3db7e5e8 100644 --- a/.github/workflows/lint_test_compile.yml +++ b/.github/workflows/lint_test_compile.yml @@ -14,7 +14,7 @@ jobs: - name: Set up Deno uses: denoland/setup-deno@v2 with: - deno-version: "2.x" + deno-version: "2.4.0" - name: Install dependencies run: | @@ -35,7 +35,7 @@ jobs: - name: Set up Node.js uses: denoland/setup-deno@v2 with: - deno-version: "2.x" + deno-version: "2.4.0" - name: Install dependencies run: | @@ -53,7 +53,7 @@ jobs: - name: Set up Node.js uses: denoland/setup-deno@v2 with: - deno-version: "2.x" + deno-version: "2.4.0" - name: Install dependencies run: | diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 2a9508e7..2756d4d2 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -16,7 +16,7 @@ jobs: - name: Set up Deno uses: denoland/setup-deno@v2 with: - deno-version: "2.3.5" + deno-version: "2.4.0" - name: Get new version and bump deno.json id: get_version diff --git a/deno.json b/deno.json index 76395ecb..674249e3 100644 --- a/deno.json +++ b/deno.json @@ -1,11 +1,17 @@ { - "version": "1.0.13", + "version": "1.0.14", "name": "@napi/cli", "exports": "./src/index.ts", "nodeModulesDir": "auto", "lock": false, "imports": { "@inquirer/prompts": "npm:@inquirer/prompts@^7.5.3", + "@langchain/anthropic": "npm:@langchain/anthropic@^0.3.23", + "@langchain/core": "npm:@langchain/core@^0.3.61", + "@langchain/google-genai": "npm:@langchain/google-genai@^0.2.14", + "@langchain/google-vertexai": "npm:@langchain/google-vertexai@^0.2.14", + "@langchain/langgraph": "npm:@langchain/langgraph@^0.3.5", + "@langchain/openai": "npm:@langchain/openai@^0.5.15", "@oak/oak": "jsr:@oak/oak@^17.1.4", "@std/expect": "jsr:@std/expect@^1.0.16", "@std/path": "jsr:@std/path@^1.0.9", diff --git a/examples/csharp/EndpointExample/Properties/launchSettings.json b/examples/csharp/EndpointExample/Properties/launchSettings.json index b7d23a26..7b989c87 100644 --- a/examples/csharp/EndpointExample/Properties/launchSettings.json +++ b/examples/csharp/EndpointExample/Properties/launchSettings.json @@ -1,4 +1,4 @@ -{ +{ "$schema": "https://json.schemastore.org/launchsettings.json", "profiles": { "http": { diff --git a/examples/java/websocket/pom.xml b/examples/java/websocket/pom.xml index b1818be0..01673008 100644 --- a/examples/java/websocket/pom.xml +++ b/examples/java/websocket/pom.xml @@ -1,47 +1,51 @@ - - 4.0.0 - websocket - - - org.springframework.boot - spring-boot-starter-parent - 1.0.2.RELEASE - - napi - Spring Boot WebSocket Sample - Spring Boot WebSocket Sample - 1.0-SNAPSHOT - http://projects.spring.io/spring-boot/ - - Pivotal Software, Inc. - http://www.spring.io - - - ${basedir}/../.. - 1.7 - - - - org.springframework.boot - spring-boot-starter-websocket - - - org.springframework.boot - spring-boot-starter-actuator - - - org.springframework.boot - spring-boot-starter-test - test - - - - - - org.springframework.boot - spring-boot-maven-plugin - - - + + 4.0.0 + websocket + + + org.springframework.boot + spring-boot-starter-parent + 1.0.2.RELEASE + + napi + Spring Boot WebSocket Sample + Spring Boot WebSocket Sample + 1.0-SNAPSHOT + http://projects.spring.io/spring-boot/ + + Pivotal Software, Inc. + http://www.spring.io + + + ${basedir}/../.. + 1.7 + + + + org.springframework.boot + spring-boot-starter-websocket + + + org.springframework.boot + spring-boot-starter-actuator + + + org.springframework.boot + spring-boot-starter-test + test + + + + + + org.springframework.boot + spring-boot-maven-plugin + + + diff --git a/src/cli/handlers/init/index.ts b/src/cli/handlers/init/index.ts index 3de22a88..5844e1ef 100644 --- a/src/cli/handlers/init/index.ts +++ b/src/cli/handlers/init/index.ts @@ -20,6 +20,11 @@ import { import { ApiService } from "../../../apiService/index.ts"; import type { globalConfigSchema } from "../../middlewares/globalConfig.ts"; import { isAuthenticatedMiddleware } from "../../middlewares/isAuthenticated.ts"; +import { + ANTHROPIC_PROVIDER, + GOOGLE_PROVIDER, + OPENAI_PROVIDER, +} from "../../../manifest/dependencyManifest/labeling/model.ts"; function builder( yargs: Arguments & { @@ -1031,6 +1036,61 @@ export async function generateConfig( // Show final file selection to the user showFinalFileSelection(workDir, includePatterns, excludePatterns); + // Labeling configuration + console.info("\nšŸ·ļø LABELING CONFIGURATION"); + console.info( + "Labeling helps categorize and organize your code dependencies using AI models.", + ); + + const enableLabeling = await confirm({ + message: "Would you like to enable AI-powered labeling?", + default: false, + }); + + let labelingConfig: + | z.infer["labeling"] + | undefined = undefined; + + if (enableLabeling) { + console.info("\nšŸ¤– AI MODEL SELECTION"); + console.info( + "Choose an AI provider for labeling your dependencies:", + ); + + const modelProvider = await select({ + message: "Select AI model provider:", + choices: [ + { name: "OpenAI (GPT-4o-mini)", value: OPENAI_PROVIDER }, + { name: "Google (Gemini 2.5 Flash)", value: GOOGLE_PROVIDER }, + { name: "Anthropic (Claude 3.5 Sonnet)", value: ANTHROPIC_PROVIDER }, + ], + }) as + | typeof OPENAI_PROVIDER + | typeof GOOGLE_PROVIDER + | typeof ANTHROPIC_PROVIDER; + + const maxConcurrency = await input({ + message: "Enter maximum concurrent requests (leave empty for unlimited):", + validate: (value) => { + if (!value.trim()) return true; // Allow empty for unlimited + const num = parseInt(value); + if (isNaN(num) || num <= 0) { + return "Please enter a positive number or leave empty for unlimited"; + } + return true; + }, + }); + + labelingConfig = { + modelProvider, + maxConcurrency: maxConcurrency.trim() + ? parseInt(maxConcurrency) + : undefined, + }; + + console.info("āœ… Labeling configuration added"); + } + // Build the config object const config: z.infer = { language: language, @@ -1052,5 +1112,10 @@ export async function generateConfig( config.c = cConfig; } + // Add labeling config if it exists + if (labelingConfig) { + config.labeling = labelingConfig; + } + return config; } diff --git a/src/cli/handlers/manifest/generate.ts b/src/cli/handlers/manifest/generate.ts index 152478a0..a754e6b7 100644 --- a/src/cli/handlers/manifest/generate.ts +++ b/src/cli/handlers/manifest/generate.ts @@ -45,6 +45,9 @@ function builder( } return value; }, + }).option("labelingApiKey", { + type: "string", + description: "The API key to use for the labeling", }); } @@ -128,6 +131,7 @@ async function handler( branch?: string; commitSha?: string; commitShaDate?: string; + labelingApiKey?: string; }, ) { const napiConfig = argv.napiConfig as z.infer; @@ -217,7 +221,12 @@ async function handler( console.info(`šŸ“Š Processing ${files.size} files...`); - const dependencyManifest = generateDependencyManifest(files, napiConfig); + const dependencyManifest = await generateDependencyManifest( + files, + napiConfig, + globalConfig, + argv.labelingApiKey, + ); // Upload manifest to API instead of writing to disk const apiService = new ApiService( diff --git a/src/cli/handlers/set/apiKey.ts b/src/cli/handlers/set/apiKey.ts new file mode 100644 index 00000000..26cb3128 --- /dev/null +++ b/src/cli/handlers/set/apiKey.ts @@ -0,0 +1,53 @@ +import type { Arguments } from "yargs-types"; +import type { z } from "zod"; +import { + type globalConfigSchema, + setConfig, +} from "../../middlewares/globalConfig.ts"; +import { + ANTHROPIC_PROVIDER, + GOOGLE_PROVIDER, + type ModelProvider, + OPENAI_PROVIDER, +} from "../../../manifest/dependencyManifest/labeling/model.ts"; +import { input, select } from "@inquirer/prompts"; + +async function handler( + argv: Arguments & { + globalConfig: z.infer; + }, +) { + const globalConfig = argv.globalConfig as z.infer; + + const provider = await select({ + message: "Select a provider", + choices: [ + { name: "Google", value: GOOGLE_PROVIDER }, + { name: "OpenAI", value: OPENAI_PROVIDER }, + { name: "Anthropic", value: ANTHROPIC_PROVIDER }, + ], + }) as ModelProvider; + + const apiKey = await input({ + message: "Enter the API key", + validate: (value) => { + if (value.length === 0) { + return "API key cannot be empty"; + } + return true; + }, + }); + + const labeling = globalConfig.labeling || { apiKeys: {} }; + labeling.apiKeys[provider] = apiKey; + setConfig({ ...globalConfig, labeling }); + + console.info("API key set successfully"); +} + +export default { + command: "apiKey", + describe: "set an API key for a model provider in your global config", + builder: () => {}, + handler, +}; diff --git a/src/cli/handlers/set/index.ts b/src/cli/handlers/set/index.ts new file mode 100644 index 00000000..1c80007e --- /dev/null +++ b/src/cli/handlers/set/index.ts @@ -0,0 +1,21 @@ +import apiKeyHandler from "./apiKey.ts"; +import type { Arguments } from "yargs-types"; +import type { globalConfigSchema } from "../../middlewares/globalConfig.ts"; +import type { z } from "zod"; + +function builder( + yargs: Arguments & { + globalConfig: z.infer; + }, +) { + return yargs + .command(apiKeyHandler) + .demandCommand(1, "You need to specify a valid command"); +} + +export default { + command: "set", + describe: "set a value in the global config", + builder, + handler: () => {}, +}; diff --git a/src/cli/index.ts b/src/cli/index.ts index c55c3220..d8dfbfae 100644 --- a/src/cli/index.ts +++ b/src/cli/index.ts @@ -5,6 +5,7 @@ import { } from "./middlewares/checkVersion.ts"; import loginCommand from "./handlers/login/index.ts"; import initCommand from "./handlers/init/index.ts"; +import setCommand from "./handlers/set/index.ts"; import manifestCommand from "./handlers/manifest/index.ts"; import extractCommand from "./handlers/extract/index.ts"; import { globalConfigMiddleware } from "./middlewares/globalConfig.ts"; @@ -27,6 +28,7 @@ export function initCli() { .middleware(globalConfigMiddleware) .command(loginCommand) .command(initCommand) + .command(setCommand) .command(manifestCommand) .command(extractCommand) .demandCommand(1, "You need to specify a command") diff --git a/src/cli/middlewares/globalConfig.ts b/src/cli/middlewares/globalConfig.ts index 89803248..f8a64c9d 100644 --- a/src/cli/middlewares/globalConfig.ts +++ b/src/cli/middlewares/globalConfig.ts @@ -6,6 +6,13 @@ export const globalConfigSchema = z.object({ jwt: z.string().optional(), token: z.string().optional(), apiHost: z.string(), + labeling: z.object({ + apiKeys: z.object({ + google: z.string().optional(), + openai: z.string().optional(), + anthropic: z.string().optional(), + }), + }).optional(), }); export const defaultApiHost = "https://api.nanoapi.io"; diff --git a/src/cli/middlewares/napiConfig.ts b/src/cli/middlewares/napiConfig.ts index 40a0b052..7192139c 100644 --- a/src/cli/middlewares/napiConfig.ts +++ b/src/cli/middlewares/napiConfig.ts @@ -10,6 +10,11 @@ import { javaLanguage, pythonLanguage, } from "../../helpers/treeSitter/parsers.ts"; +import { + ANTHROPIC_PROVIDER, + GOOGLE_PROVIDER, + OPENAI_PROVIDER, +} from "../../manifest/dependencyManifest/labeling/model.ts"; const pythonVersions = Object.keys(pythonStdlibList); @@ -38,6 +43,14 @@ export const localConfigSchema = z.object({ exclude: z.array(z.string()).optional(), }), outDir: z.string(), + labeling: z.object({ + modelProvider: z.enum([ + GOOGLE_PROVIDER, + OPENAI_PROVIDER, + ANTHROPIC_PROVIDER, + ]), + maxConcurrency: z.number().optional(), + }).optional(), }); const napiConfigFileName = ".napirc"; diff --git a/src/languagePlugins/python/dependencyResolver/index.ts b/src/languagePlugins/python/dependencyResolver/index.ts index 2bdd0083..1195a27c 100644 --- a/src/languagePlugins/python/dependencyResolver/index.ts +++ b/src/languagePlugins/python/dependencyResolver/index.ts @@ -126,6 +126,18 @@ export class PythonDependencyResolver { const symbolDependencies: SymbolDependency = { id: symbol.id, type: symbol.type, + positions: symbol.nodes.map((node) => ({ + start: { + index: node.startIndex, + row: node.startPosition.row, + column: node.startPosition.column, + }, + end: { + index: node.endIndex, + row: node.endPosition.row, + column: node.endPosition.column, + }, + })), metrics: { characterCount: complexityMetrics.characterCount, codeCharacterCount: complexityMetrics.codeCharacterCount, diff --git a/src/languagePlugins/python/dependencyResolver/types.ts b/src/languagePlugins/python/dependencyResolver/types.ts index 4bfe1fa6..e3204022 100644 --- a/src/languagePlugins/python/dependencyResolver/types.ts +++ b/src/languagePlugins/python/dependencyResolver/types.ts @@ -27,6 +27,19 @@ export interface SymbolDependency { id: string; /** Symbol type (class, function, variable) */ type: PythonSymbolType; + /** Positions of the symbol in the file */ + positions: { + start: { + index: number; + row: number; + column: number; + }; + end: { + index: number; + row: number; + column: number; + }; + }[]; /** Size metrics for the symbol */ metrics: { /** Total character count in the symbol */ diff --git a/src/manifest/dependencyManifest/c/index.ts b/src/manifest/dependencyManifest/c/index.ts index 00a6d1a2..ece62f71 100644 --- a/src/manifest/dependencyManifest/c/index.ts +++ b/src/manifest/dependencyManifest/c/index.ts @@ -65,6 +65,19 @@ export function generateCDependencyManifest( symbols[symName] = { id: symName, type: symType as SymbolType, + positions: [{ + start: { + index: symbol.node.startIndex, + row: symbol.node.startPosition.row, + column: symbol.node.startPosition.column, + }, + end: { + index: symbol.node.endIndex, + row: symbol.node.endPosition.row, + column: symbol.node.endPosition.column, + }, + }], + description: "", metrics: { [metricCharacterCount]: metrics.characterCount, [metricCodeCharacterCount]: metrics.codeCharacterCount, diff --git a/src/manifest/dependencyManifest/csharp/index.ts b/src/manifest/dependencyManifest/csharp/index.ts index 01c446cf..8f2f65c1 100644 --- a/src/manifest/dependencyManifest/csharp/index.ts +++ b/src/manifest/dependencyManifest/csharp/index.ts @@ -69,6 +69,19 @@ export function generateCSharpDependencyManifest( symbols[symbolName] = { id: symbolName, type: symbol.type as SymbolType, + positions: [{ + start: { + index: symbol.node.startIndex, + row: symbol.node.startPosition.row, + column: symbol.node.startPosition.column, + }, + end: { + index: symbol.node.endIndex, + row: symbol.node.endPosition.row, + column: symbol.node.endPosition.column, + }, + }], + description: "", metrics: { [metricCharacterCount]: symbol.characterCount, [metricCodeCharacterCount]: metrics.codeCharacterCount, diff --git a/src/manifest/dependencyManifest/index.ts b/src/manifest/dependencyManifest/index.ts index aef3f273..c2adc0ea 100644 --- a/src/manifest/dependencyManifest/index.ts +++ b/src/manifest/dependencyManifest/index.ts @@ -11,6 +11,13 @@ import { pythonLanguage, } from "../../helpers/treeSitter/parsers.ts"; import { generateJavaDependencyManifest } from "./java/index.ts"; +import { generateSymbolDescriptions } from "./labeling/index.ts"; +import type { globalConfigSchema } from "../../cli/middlewares/globalConfig.ts"; +import { + ANTHROPIC_PROVIDER, + GOOGLE_PROVIDER, + OPENAI_PROVIDER, +} from "./labeling/model.ts"; const handlerMap: Record< string, @@ -34,10 +41,12 @@ export class UnsupportedLanguageError extends Error { } } -export function generateDependencyManifest( +export async function generateDependencyManifest( files: Map, napiConfig: z.infer, -): DependencyManifest { + globalConfig: z.infer, + labelingApiKey: string | undefined, +): Promise { const languageName = napiConfig.language; const handler = handlerMap[languageName]; @@ -71,5 +80,39 @@ export function generateDependencyManifest( sortedDepMap[key].symbols = sortedSymbolsMap; } - return sortedDepMap; + if (napiConfig.labeling) { + let apiKey: string | undefined; + if (labelingApiKey) { + apiKey = labelingApiKey; + } else { + if (napiConfig.labeling.modelProvider === GOOGLE_PROVIDER) { + apiKey = globalConfig.labeling?.apiKeys.google; + } + if (napiConfig.labeling.modelProvider === OPENAI_PROVIDER) { + apiKey = globalConfig.labeling?.apiKeys.openai; + } + if (napiConfig.labeling.modelProvider === ANTHROPIC_PROVIDER) { + apiKey = globalConfig.labeling?.apiKeys.anthropic; + } + } + + if (!apiKey) { + console.warn( + "No API key found for the selected model provider. Please run `napi set apiKey` to set an API key.", + ); + return sortedDepMap; + } + + const labeledDependencyManifest = await generateSymbolDescriptions( + files, + sortedDepMap, + apiKey, + napiConfig.labeling.modelProvider, + napiConfig.labeling.maxConcurrency, + ); + + return labeledDependencyManifest; + } else { + return sortedDepMap; + } } diff --git a/src/manifest/dependencyManifest/java/index.ts b/src/manifest/dependencyManifest/java/index.ts index 0d3b498d..e2c6ef7f 100644 --- a/src/manifest/dependencyManifest/java/index.ts +++ b/src/manifest/dependencyManifest/java/index.ts @@ -38,6 +38,19 @@ export function generateJavaDependencyManifest( symbols[symName] = { id: symName, type: symType as SymbolType, + positions: [{ + start: { + index: symbol.node.startIndex, + row: symbol.node.startPosition.row, + column: symbol.node.startPosition.column, + }, + end: { + index: symbol.node.endIndex, + row: symbol.node.endPosition.row, + column: symbol.node.endPosition.column, + }, + }], + description: "", metrics: { [metricCharacterCount]: metrics.characterCount, [metricCodeCharacterCount]: metrics.codeCharacterCount, diff --git a/src/manifest/dependencyManifest/labeling/graph.ts b/src/manifest/dependencyManifest/labeling/graph.ts new file mode 100644 index 00000000..580439ef --- /dev/null +++ b/src/manifest/dependencyManifest/labeling/graph.ts @@ -0,0 +1,326 @@ +import { Annotation, StateGraph } from "@langchain/langgraph"; +import type { DependencyManifest } from "../types.ts"; +import type { GroupLayer, SymbolRef } from "./types.ts"; +import { symbolRefToKey } from "./grouping.ts"; +import { + type AIMessage, + HumanMessage, + SystemMessage, +} from "@langchain/core/messages"; +import { z } from "zod"; +import type { BaseChatModel } from "@langchain/core/language_models/chat_models"; + +function getSymbolDependencyContextMessages( + state: typeof workflowState.State, + symbolRef: SymbolRef, +) { + const symbolDependencyManifest = + state.dependencyManifest[symbolRef.fileId].symbols[symbolRef.symbolId]; + + const messages: HumanMessage[] = []; + + for ( + const fileDependency of Object.values( + symbolDependencyManifest.dependencies, + ) + ) { + // First check if external + if (fileDependency.isExternal) { + const symbols = Object.values(fileDependency.symbols); + if (symbols.length === 0) { + messages.push( + new HumanMessage( + `External dependency from the following source: ${fileDependency.id}`, + ), + ); + } else { + messages.push( + new HumanMessage( + `External dependency from the following source: ${fileDependency.id} +with the following symbols: ${symbols.join(", ")}`, + ), + ); + } + continue; + } + + const filedependencyManifest = state.dependencyManifest[fileDependency.id]; + for (const symbol of Object.values(fileDependency.symbols)) { + const symbolDependencyManifest = + state.dependencyManifest[fileDependency.id].symbols[symbol]; + // Then check if we have the symbol in the labelManifest + if (filedependencyManifest) { + const symbolDependencyManifest = filedependencyManifest.symbols[symbol]; + if (symbolDependencyManifest.description) { + messages.push( + new HumanMessage( + `Dependency from the following source: ${fileDependency.id} with the following symbol: ${symbolDependencyManifest.id} (${symbolDependencyManifest.type}) +Here is a brief description of this symbol: ${symbolDependencyManifest.description}`, + ), + ); + continue; + } + } + + // Last, check if we have the symbol in the notYetProcessedSymbolDescriptionMap + const key = symbolRefToKey({ + fileId: fileDependency.id, + symbolId: symbol, + }); + const description = state.notYetProcessedSymbolDescriptionMap.get(key); + if (description) { + messages.push( + new HumanMessage( + `Dependency from the following source: ${fileDependency.id} with the following symbol: ${symbolDependencyManifest.id} (${symbolDependencyManifest.type}) +Here is a brief description of this symbol: ${description}`, + ), + ); + } + } + } + + if (messages.length >= 1) { + messages.unshift( + new HumanMessage( + "Next messages is a list of dependencies to the symbol you need to process as well as some information about each of them:", + ), + ); + } + + return messages; +} + +function generateContentsMessages( + state: typeof workflowState.State, + symbolRef: SymbolRef, +) { + const file = state.files.get(symbolRef.fileId); + if (!file) { + throw new Error(`File not found: ${symbolRef.fileId}`); + } + + const symbolManifest = + state.dependencyManifest[symbolRef.fileId].symbols[symbolRef.symbolId]; + + const contents: string[] = []; + for (const position of symbolManifest.positions) { + const lines = file.content.split("\n"); + + const symbolLines: string[] = []; + // get content between startLine and endLine + for (let i = position.start.row; i <= position.end.row; i++) { + symbolLines.push(lines[i]); + } + + const content = symbolLines.join("\n"); + + contents.push(content); + } + + const messages: HumanMessage[] = []; + + messages.push( + new HumanMessage( + `Here is the symbolRef of the symbol that you need to process: { fileId: ${symbolRef.fileId}, symbolId: ${symbolRef.symbolId} }`, + ), + ); + + if (contents.length === 0) { + messages.push( + new HumanMessage( + "The symbol that you need to process has no content.", + ), + ); + } else { + messages.push( + new HumanMessage( + `The symbol that you need to process has content. Here is the content (${contents.length} parts):`, + ), + ); + for (const [index, content] of contents.entries()) { + messages.push( + new HumanMessage( + `Part ${index + 1} of ${contents.length}: ${content}`, + ), + ); + } + } + + return messages; +} + +const workflowState = Annotation.Root({ + files: Annotation>, + dependencyManifest: Annotation, + groupLayer: Annotation, + notYetProcessedSymbolDescriptionMap: Annotation>, + model: Annotation, + results: Annotation<{ symbolRef: SymbolRef; description: string }[]>, +}); + +export function createGroupSymbolLabelingWorkflow( + files: Map, + dependencyManifest: DependencyManifest, + groupLayer: GroupLayer, + model: BaseChatModel, +) { + function initNode(_state: typeof workflowState.State) { + return { + files, + dependencyManifest, + groupLayer, + notYetProcessedSymbolDescriptionMap: new Map(), + model, + results: [], + }; + } + + async function generateDescriptionsForYetToBeProcessedSymbols( + state: typeof workflowState.State, + ) { + if (state.groupLayer.symbolRefsToProcess.length === 0) { + // no symbol yet to process + // we have all the context needed in the labelManifest + return state; + } + + const messagesBatch: AIMessage[][] = []; + + for (const symbolRef of state.groupLayer.symbolRefsToProcess) { + const messages = generateDescriptionMessagesForSymbol( + state, + symbolRef, + ); + messagesBatch.push(messages); + } + + const schema = z.object({ + symbolRef: z.object({ + fileId: z.string().describe("The id of the file."), + symbolId: z.string().describe("The id of the symbol."), + }), + description: z.string().max(500).describe( + "A short description of what the symbol is doing.", + ), + }); + + const results = await (model as BaseChatModel).withStructuredOutput(schema) + .batch(messagesBatch) as z.infer[]; + + for (const result of results) { + const key = symbolRefToKey(result.symbolRef); + const description = result.description; + state.notYetProcessedSymbolDescriptionMap.set(key, description); + } + + return state; + } + + function generateDescriptionMessagesForSymbol( + state: typeof workflowState.State, + symbolRef: SymbolRef, + ) { + const messages: AIMessage[] = []; + messages.push( + new SystemMessage( + "You are a helpful assistant that generates a short description of what the symbol (class, function, etc.) is doing.", + ), + ); + + const symbolDependencyMessages = getSymbolDependencyContextMessages( + state, + symbolRef, + ); + for (const message of symbolDependencyMessages) { + messages.push(message); + } + + const symbolContentsMessages = generateContentsMessages( + state, + symbolRef, + ); + for (const message of symbolContentsMessages) { + messages.push(message); + } + + return messages; + } + + async function generateLabels( + state: typeof workflowState.State, + ) { + const messagesBatch: AIMessage[][] = []; + + for (const symbolRef of state.groupLayer.symbolRefsToProcess) { + const messages = generateMessagesForLabelingSymbol(state, symbolRef); + messagesBatch.push(messages); + } + + const schema = z.object({ + symbolRef: z.object({ + fileId: z.string().describe("The id of the file."), + symbolId: z.string().describe("The id of the symbol."), + }), + description: z.string().describe( + "A business focused description of what the symbol is doing (max 500 char).", + ), + }); + + const results = await (model as BaseChatModel).withStructuredOutput(schema) + .batch(messagesBatch) as z.infer[]; + + for (const result of results) { + state.results.push(result); + } + + return state; + } + + function generateMessagesForLabelingSymbol( + state: typeof workflowState.State, + symbolRef: SymbolRef, + ) { + const messages: AIMessage[] = []; + messages.push( + new SystemMessage( + "You are a helpful assistant that generates labels for a symbol (class, function, etc.).", + ), + ); + + const symbolDependencyMessages = getSymbolDependencyContextMessages( + state, + symbolRef, + ); + for (const message of symbolDependencyMessages) { + messages.push(message); + } + + const symbolContentsMessages = generateContentsMessages( + state, + symbolRef, + ); + for (const message of symbolContentsMessages) { + messages.push(message); + } + + return messages; + } + + const workflow = new StateGraph(workflowState).addNode("init", initNode) + // nodes + .addNode( + "generateDescriptionsForYetToBeProcessedSymbols", + generateDescriptionsForYetToBeProcessedSymbols, + ) + .addNode("generateLabels", generateLabels) + // edges + .addEdge("__start__", "init") + .addEdge("init", "generateDescriptionsForYetToBeProcessedSymbols") + .addEdge( + "generateDescriptionsForYetToBeProcessedSymbols", + "generateLabels", + ) + .addEdge("generateLabels", "__end__"); + + return workflow.compile(); +} diff --git a/src/manifest/dependencyManifest/labeling/grouping.ts b/src/manifest/dependencyManifest/labeling/grouping.ts new file mode 100644 index 00000000..53898640 --- /dev/null +++ b/src/manifest/dependencyManifest/labeling/grouping.ts @@ -0,0 +1,484 @@ +import type { DependencyManifest } from "../types.ts"; +import type { GroupLayer, SymbolRef } from "./types.ts"; + +// ============================================================================= +// CONSTANTS AND CONFIGURATION +// ============================================================================= + +/** + * The separator used to join the fileId and symbolId in the key. + * This is a URL-safe separator that won't conflict with file paths. + */ +const JOINT_SYMBOL_SEPARATOR = "::"; + +// ============================================================================= +// SYMBOL KEY MANAGEMENT UTILITIES +// ============================================================================= + +/** + * Converts a SymbolRef into a unique string key for efficient lookups. + * Uses URL encoding to handle special characters in file paths and symbol names. + * + * @param ref - The symbol reference to convert + * @returns A unique string key for the symbol + */ +export function symbolRefToKey(ref: SymbolRef): string { + const urlEncodedFileId = encodeURIComponent(ref.fileId); + const urlEncodedSymbolId = encodeURIComponent(ref.symbolId); + return `${urlEncodedFileId}${JOINT_SYMBOL_SEPARATOR}${urlEncodedSymbolId}`; +} + +/** + * Converts a symbol key back into a SymbolRef. + * This is the inverse operation of symbolRefToKey. + * + * @param key - The string key to convert back + * @returns The original SymbolRef + */ +export function keyToSymbolRef(key: string): SymbolRef { + const [urlEncodedFileId, urlEncodedSymbolId] = key.split( + JOINT_SYMBOL_SEPARATOR, + ); + const fileId = decodeURIComponent(urlEncodedFileId); + const symbolId = decodeURIComponent(urlEncodedSymbolId); + return { fileId, symbolId }; +} + +// ============================================================================= +// DEPENDENCY ANALYSIS FUNCTIONS +// ============================================================================= + +/** + * Gets all unprocessed internal dependencies for a given symbol. + * This function filters out: + * - External dependencies (outside the codebase) + * - Self-dependencies (symbol depending on itself) + * - Already processed dependencies + * + * @param symbolRef - The symbol to analyze + * @param manifest - The complete dependency manifest + * @param processedSymbols - Set of already processed symbol keys + * @returns Array of dependency symbol keys that haven't been processed yet + */ +function getUnprocessedDependencies( + symbolRef: SymbolRef, + manifest: DependencyManifest, + processedSymbols: Set, +): string[] { + const currentSymbolKey = symbolRefToKey(symbolRef); + const symbolManifest = manifest[symbolRef.fileId] + ?.symbols[symbolRef.symbolId]; + if (!symbolManifest) return []; + + const dependencies: string[] = []; + + // Iterate through all files this symbol depends on + for ( + const [depFileId, depInfo] of Object.entries(symbolManifest.dependencies) + ) { + // Skip external dependencies - we only care about internal code dependencies + if (depInfo.isExternal) continue; + + // Check each symbol within the dependency file + for (const depSymbolId of Object.keys(depInfo.symbols)) { + // Verify the dependency symbol actually exists in the manifest + if (manifest[depFileId]?.symbols[depSymbolId]) { + const depKey = symbolRefToKey({ + fileId: depFileId, + symbolId: depSymbolId, + }); + + // Skip self-dependencies and already processed symbols + if (currentSymbolKey !== depKey && !processedSymbols.has(depKey)) { + dependencies.push(depKey); + } + } + } + } + + return dependencies; +} + +// ============================================================================= +// STRONGLY CONNECTED COMPONENTS (SCC) ALGORITHM +// ============================================================================= + +/** + * Tarjan's strongly connected components algorithm. + * This finds groups of symbols that form dependency cycles. + * + * A strongly connected component is a maximal set of vertices such that + * for every pair of vertices u and v, there is a directed path from u to v + * and a directed path from v to u. + * + * Time complexity: O(V + E) where V is vertices and E is edges + * + * https://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm#The_algorithm_in_pseudocode + * + * @param graph - Adjacency list representation of the dependency graph + * @returns Array of sets, each containing symbol keys that form an SCC + */ +function stronglyConnectedComponents( + graph: Map>, +): Array> { + // Tarjan's algorithm state + const indices = new Map(); // Discovery time of each vertex + const lowlinks = new Map(); // Lowest reachable discovery time + const onStack = new Set(); // Vertices currently on the stack + const stack: string[] = []; // Stack for the algorithm + const components: Array> = []; // Resulting SCCs + let index = 0; // Global discovery time counter + + /** + * Recursive function that performs the depth-first search for Tarjan's algorithm. + * This is the core of the SCC detection logic. + */ + function strongConnect(v: string): void { + // Initialize the vertex + indices.set(v, index); + lowlinks.set(v, index); + index++; + stack.push(v); + onStack.add(v); + + // Check all neighbors (dependencies) + const neighbors = graph.get(v) || new Set(); + for (const w of neighbors) { + if (!indices.has(w)) { + // Neighbor w has not yet been visited; recurse on it + strongConnect(w); + lowlinks.set(v, Math.min(lowlinks.get(v)!, lowlinks.get(w)!)); + } else if (onStack.has(w)) { + // Neighbor w is in the stack and hence in the current SCC + lowlinks.set(v, Math.min(lowlinks.get(v)!, indices.get(w)!)); + } + } + + // If v is a root node, pop the stack and create an SCC + if (lowlinks.get(v) === indices.get(v)) { + const component = new Set(); + let w: string; + do { + w = stack.pop()!; + onStack.delete(w); + component.add(w); + } while (w !== v); + components.push(component); + } + } + + // Start DFS from each unvisited vertex + for (const v of graph.keys()) { + if (!indices.has(v)) { + strongConnect(v); + } + } + + return components; +} + +/** + * Builds a dependency graph from remaining symbols and finds SCCs. + * This creates the graph representation needed for SCC analysis. + * + * @param remainingSymbols - Map of symbol keys to SymbolRefs that haven't been processed + * @param manifest - The complete dependency manifest + * @param processedSymbols - Set of already processed symbol keys + * @returns Array of SCCs (sets of symbol keys that form cycles) + */ +function findStronglyConnectedComponents( + remainingSymbols: Map, + manifest: DependencyManifest, + processedSymbols: Set, +): Array> { + const graph = new Map>(); + + // Build the dependency graph for remaining symbols only + for (const [symbolKey, symbolRef] of remainingSymbols) { + const dependencies = getUnprocessedDependencies( + symbolRef, + manifest, + processedSymbols, + ); + + // Only include dependencies that are also in remainingSymbols + // This ensures we only analyze cycles among unprocessed symbols + const filteredDeps = dependencies.filter((dep) => + remainingSymbols.has(dep) + ); + graph.set(symbolKey, new Set(filteredDeps)); + } + + return stronglyConnectedComponents(graph); +} + +// ============================================================================= +// CYCLE BREAKING STRATEGIES +// ============================================================================= + +/** + * Selects the best symbol from a set based on dependency count. + * "Best" means the symbol with the fewest unprocessed dependencies, + * which makes it a good candidate for breaking cycles with minimal impact. + * + * @param symbolKeys - Set of symbol keys to choose from + * @param remainingSymbols - Map of remaining symbols + * @param manifest - The dependency manifest + * @param processedSymbols - Set of processed symbols + * @returns The best SymbolRef or null if none found + */ +function selectBestSymbol( + symbolKeys: Set, + remainingSymbols: Map, + manifest: DependencyManifest, + processedSymbols: Set, +): SymbolRef | null { + let bestSymbol: SymbolRef | null = null; + let minDeps = Infinity; + + for (const symbolKey of symbolKeys) { + const symbolRef = remainingSymbols.get(symbolKey); + if (!symbolRef) continue; + + const depCount = + getUnprocessedDependencies(symbolRef, manifest, processedSymbols).length; + if (depCount < minDeps) { + minDeps = depCount; + bestSymbol = symbolRef; + } + } + + return bestSymbol; +} + +/** + * Selects optimal symbols to break dependency cycles using SCC analysis. + * + * This function implements a two-phase strategy: + * 1. Break major cycles by selecting one representative from each large SCC + * 2. Add all remaining independent symbols that don't depend on selected ones + * + * The goal is to maximize the number of symbols that can be processed + * while minimizing the total notYetProcessedDependencySymbolRefs across all layers. + * + * @param remainingSymbols - Map of unprocessed symbols + * @param manifest - The dependency manifest + * @param processedSymbols - Set of processed symbols + * @returns Object containing selected symbols and their dependencies + */ +function selectCycleBreakers( + remainingSymbols: Map, + manifest: DependencyManifest, + processedSymbols: Set, +): { symbols: SymbolRef[]; dependencies: SymbolRef[] } { + const sccs = findStronglyConnectedComponents( + remainingSymbols, + manifest, + processedSymbols, + ); + const selectedSymbols: SymbolRef[] = []; + const selectedKeys = new Set(); + + // Phase 1: Break major cycles (large SCCs with 3+ symbols) + // These represent significant circular dependencies that need to be broken + const largeSCCs = sccs.filter((scc) => scc.size >= 3); + for (const scc of largeSCCs) { + // Select the symbol with minimum dependencies to minimize impact + const bestSymbol = selectBestSymbol( + scc, + remainingSymbols, + manifest, + processedSymbols, + ); + if (bestSymbol) { + selectedSymbols.push(bestSymbol); + selectedKeys.add(symbolRefToKey(bestSymbol)); + } + } + + // Phase 2: Add all qualifying independent symbols (small SCCs) + // Small SCCs (size < 3) are typically independent symbols or simple mutual dependencies + const smallSCCs = sccs.filter((scc) => scc.size < 3); + const candidates = smallSCCs + .flatMap((scc) => Array.from(scc)) + .map((symbolKey) => { + const symbolRef = remainingSymbols.get(symbolKey); + if (!symbolRef) return null; + return { + key: symbolKey, + ref: symbolRef, + deps: getUnprocessedDependencies(symbolRef, manifest, processedSymbols) + .length, + }; + }) + .filter(Boolean) + .sort((a, b) => a!.deps - b!.deps); // Sort by dependency count (prefer fewer dependencies) + + // Greedily add independent symbols that don't depend on already selected ones + for (const candidate of candidates) { + // Only add if it doesn't depend on any already selected symbols + // This ensures symbols in the same batch can be processed in parallel + const dependencies = getUnprocessedDependencies( + candidate!.ref, + manifest, + processedSymbols, + ); + const dependsOnSelected = dependencies.some((depKey) => + selectedKeys.has(depKey) + ); + + if (!dependsOnSelected) { + selectedSymbols.push(candidate!.ref); + selectedKeys.add(candidate!.key); + } + } + + // Phase 3: Calculate all dependencies for the selected batch + // These will become the notYetProcessedDependencySymbolRefs for this layer + const allDependencies = new Set(); + for (const symbolRef of selectedSymbols) { + const deps = getUnprocessedDependencies( + symbolRef, + manifest, + processedSymbols, + ); + deps.forEach((dep) => allDependencies.add(dep)); + } + + return { + symbols: selectedSymbols, + dependencies: Array.from(allDependencies).map(keyToSymbolRef), + }; +} + +// ============================================================================= +// INDEPENDENT SYMBOL DETECTION +// ============================================================================= + +/** + * Finds symbols that have no unprocessed dependencies. + * These symbols can be processed immediately without waiting for other symbols. + * This is the optimal case - no cycle breaking needed. + * + * @param remainingSymbols - Map of unprocessed symbols + * @param manifest - The dependency manifest + * @param processedSymbols - Set of processed symbols + * @returns Array of symbols that can be processed independently + */ +function findIndependentSymbols( + remainingSymbols: Map, + manifest: DependencyManifest, + processedSymbols: Set, +): SymbolRef[] { + return Array.from(remainingSymbols.values()).filter( + (symbolRef) => + getUnprocessedDependencies(symbolRef, manifest, processedSymbols) + .length === 0, + ); +} + +// ============================================================================= +// BATCH PROCESSING UTILITIES +// ============================================================================= + +/** + * Marks a batch of symbols as processed by updating the tracking sets. + * This is a utility function to keep the main algorithm clean. + * + * @param symbols - Array of symbols to mark as processed + * @param remainingSymbols - Map to remove symbols from + * @param processedSymbols - Set to add symbols to + */ +function processBatch( + symbols: SymbolRef[], + remainingSymbols: Map, + processedSymbols: Set, +): void { + for (const symbolRef of symbols) { + const key = symbolRefToKey(symbolRef); + remainingSymbols.delete(key); + processedSymbols.add(key); + } +} + +// ============================================================================= +// MAIN ALGORITHM +// ============================================================================= + +/** + * Generates group layers for dependency-aware parallel processing. + * + * This is the main algorithm that creates a series of layers where: + * - Symbols in each layer can be processed in parallel + * - Symbols in layer N only depend on symbols from layers 0 to N-1 + * - The algorithm handles circular dependencies using SCC analysis + * + * Algorithm Overview: + * 1. Start with all symbols as "remaining" + * 2. While there are remaining symbols: + * a. Find symbols with no dependencies → process them (optimal case) + * b. If no independent symbols exist → use SCC-based cycle breaking + * 3. Each iteration creates a new layer + * + * The algorithm prioritizes processing independent symbols first (no dependencies), + * and only resorts to cycle breaking when necessary. This minimizes the number + * of layers and maximizes parallelism. + * + * @param manifest - The complete dependency manifest for the codebase + * @returns Array of GroupLayers representing the processing order + */ +export function generateGroupLayers( + manifest: DependencyManifest, +): GroupLayer[] { + const groupLayers: GroupLayer[] = []; + const processedSymbols = new Set(); // Symbols we've already processed + const remainingSymbols = new Map(); // Symbols still to process + + // Initialize: all symbols start as "remaining" + for (const [fileId, fileManifest] of Object.entries(manifest)) { + for (const symbolId of Object.keys(fileManifest.symbols)) { + const ref = { fileId, symbolId }; + remainingSymbols.set(symbolRefToKey(ref), ref); + } + } + + let level = 0; + + // Main processing loop: continue until all symbols are processed + while (remainingSymbols.size > 0) { + // Strategy 1: Look for symbols with no dependencies (optimal case) + const independentSymbols = findIndependentSymbols( + remainingSymbols, + manifest, + processedSymbols, + ); + + if (independentSymbols.length > 0) { + // Found independent symbols - process them all in this layer + groupLayers.push({ + level, + symbolRefsToProcess: independentSymbols, + notYetProcessedDependencySymbolRefs: [], // No dependencies since they're independent + }); + processBatch(independentSymbols, remainingSymbols, processedSymbols); + } else { + // Strategy 2: No independent symbols - must break cycles using SCC analysis + const result = selectCycleBreakers( + remainingSymbols, + manifest, + processedSymbols, + ); + + groupLayers.push({ + level, + symbolRefsToProcess: result.symbols, + notYetProcessedDependencySymbolRefs: result.dependencies, + }); + processBatch(result.symbols, remainingSymbols, processedSymbols); + } + + level++; + } + + return groupLayers; +} diff --git a/src/manifest/dependencyManifest/labeling/index.ts b/src/manifest/dependencyManifest/labeling/index.ts new file mode 100644 index 00000000..6abe115f --- /dev/null +++ b/src/manifest/dependencyManifest/labeling/index.ts @@ -0,0 +1,45 @@ +import type { DependencyManifest } from "../types.ts"; +import { getModel, type ModelProvider } from "./model.ts"; +import { generateGroupLayers } from "./grouping.ts"; +import { createGroupSymbolLabelingWorkflow } from "./graph.ts"; + +export async function generateSymbolDescriptions( + files: Map, + dependencyManifest: DependencyManifest, + apiKey: string, + modelProvider: ModelProvider, + maxConcurrency?: number, +): Promise { + console.info("Generating descriptions for symbols..."); + + const groups = generateGroupLayers(dependencyManifest); + + console.info(`āœ… Successfully generated ${groups.length} independent groups`); + + const model = getModel(modelProvider, apiKey, maxConcurrency); + + console.info("Starting symbol labeling..."); + for (const [index, group] of groups.entries()) { + const workflow = createGroupSymbolLabelingWorkflow( + files, + dependencyManifest, + group, + model, + ); + + const state = await workflow.invoke({}); + for (const result of state.results) { + dependencyManifest[result.symbolRef.fileId].symbols[ + result.symbolRef.symbolId + ].description = result.description; + } + + console.info( + `āœ… Successfully processed group ${ + index + 1 + } of ${groups.length} groups. ${group.symbolRefsToProcess.length} symbols processed`, + ); + } + + return dependencyManifest; +} diff --git a/src/manifest/dependencyManifest/labeling/model.ts b/src/manifest/dependencyManifest/labeling/model.ts new file mode 100644 index 00000000..c6f7c482 --- /dev/null +++ b/src/manifest/dependencyManifest/labeling/model.ts @@ -0,0 +1,46 @@ +import { ChatGoogleGenerativeAI } from "@langchain/google-genai"; +import type { BaseChatModel } from "@langchain/core/language_models/chat_models"; +import { ChatOpenAI } from "@langchain/openai"; +import { ChatAnthropic } from "@langchain/anthropic"; + +export const GOOGLE_PROVIDER = "google"; +export const OPENAI_PROVIDER = "openai"; +export const ANTHROPIC_PROVIDER = "anthropic"; + +export type ModelProvider = + | typeof GOOGLE_PROVIDER + | typeof OPENAI_PROVIDER + | typeof ANTHROPIC_PROVIDER; + +export function getModel( + provider: ModelProvider, + apiKey: string, + maxConcurrency?: number, +): BaseChatModel { + // // Later we will support multiple model and will get them from the config + if (provider === OPENAI_PROVIDER) { + return new ChatOpenAI({ + apiKey, + model: "o3-mini", + maxConcurrency: maxConcurrency ? maxConcurrency : Infinity, + }) as unknown as BaseChatModel; + } + + if (provider === GOOGLE_PROVIDER) { + return new ChatGoogleGenerativeAI({ + apiKey, + model: "gemini-2.5-flash-lite-preview-06-17", + maxConcurrency: maxConcurrency ? maxConcurrency : Infinity, + }) as BaseChatModel; + } + + if (provider === ANTHROPIC_PROVIDER) { + return new ChatAnthropic({ + apiKey, + model: "claude-3-5-sonnet-latest", + maxConcurrency: maxConcurrency ? maxConcurrency : Infinity, + }) as unknown as BaseChatModel; + } + + throw new Error(`Unsupported model provider: ${provider}`); +} diff --git a/src/manifest/dependencyManifest/labeling/types.ts b/src/manifest/dependencyManifest/labeling/types.ts new file mode 100644 index 00000000..06b29ccd --- /dev/null +++ b/src/manifest/dependencyManifest/labeling/types.ts @@ -0,0 +1,36 @@ +/** + * Reference to a specific symbol within a file. + * Used to uniquely identify symbols across the entire codebase. + */ +export type SymbolRef = { + /** The unique identifier of the file containing the symbol */ + fileId: string; + /** The unique identifier of the symbol within the file */ + symbolId: string; +}; + +/** + * Represents a layer of symbols that can be processed in parallel. + * The key insight is that symbols in layer N only depend on symbols from layers 0 to N-1. + * This allows for efficient parallel processing while respecting dependency order. + */ +export type GroupLayer = { + /** + * The dependency level (0 to n). + * Symbols at level n depend on symbols of groups from levels 0 through n-1. + */ + level: number; + /** + * The symbolRefs to process in this layer. + * These symbolRefs can be processed in parallel. They do not depend on each other. + */ + symbolRefsToProcess: SymbolRef[]; + /** + * Dependency symbolRefs that some symbolRefsToProcess depend on. + * These symbolRefs will be process in a later layer. + * We have no information about them. + * This should be as little as possible. Best effort is made to create the groups + * that have the least notYetProcessedDependencySymbolRefs as possible. Ideally none. + */ + notYetProcessedDependencySymbolRefs: SymbolRef[]; +}; diff --git a/src/manifest/dependencyManifest/python/index.ts b/src/manifest/dependencyManifest/python/index.ts index 80aefe6d..39a58d00 100644 --- a/src/manifest/dependencyManifest/python/index.ts +++ b/src/manifest/dependencyManifest/python/index.ts @@ -253,6 +253,8 @@ export function generatePythonDependencyManifest( symbols[symbol.id] = { id: symbol.id, type: symbol.type, + positions: symbol.positions, + description: "", metrics: { [metricLinesCount]: symbol.metrics.linesCount, [metricCodeLineCount]: symbol.metrics.codeLineCount, diff --git a/src/manifest/dependencyManifest/types.ts b/src/manifest/dependencyManifest/types.ts index c5e15f10..9c13748c 100644 --- a/src/manifest/dependencyManifest/types.ts +++ b/src/manifest/dependencyManifest/types.ts @@ -107,6 +107,19 @@ export interface SymbolDependencyManifest { id: string; /** The type of this symbol: "class", "function", or "variable". */ type: SymbolType; + /** The start position of the symbol. */ + positions: { + start: { + index: number; + row: number; + column: number; + }; + end: { + index: number; + row: number; + column: number; + }; + }[]; /** Metrics for the symbol. */ metrics: { /** The number of lines in the symbol. */ @@ -124,6 +137,8 @@ export interface SymbolDependencyManifest { /** The cyclomatic complexity of the symbol. */ [metricCyclomaticComplexity]: number; }; + /** A short description of the symbol. */ + description: string; /** Other modules/files on which this symbol depends. * Keyed by the dependency's unique ID (often a file path). */ dependencies: Record;