From 029cad84224acadc56eb57a2f05b8989957f7e5d Mon Sep 17 00:00:00 2001 From: bkellam Date: Wed, 12 Nov 2025 18:58:25 -0800 Subject: [PATCH 1/5] 10x performance improvement by optimizing zod parsing --- packages/web/package.json | 1 + .../search/components/searchResultsPage.tsx | 2 +- packages/web/src/features/search/schemas.ts | 1 + packages/web/src/features/search/searchApi.ts | 92 +++++++++++++++---- yarn.lock | 10 ++ 5 files changed, 85 insertions(+), 21 deletions(-) diff --git a/packages/web/package.json b/packages/web/package.json index ba3c3dbc4..c829052f6 100644 --- a/packages/web/package.json +++ b/packages/web/package.json @@ -51,6 +51,7 @@ "@codemirror/search": "^6.5.6", "@codemirror/state": "^6.4.1", "@codemirror/view": "^6.33.0", + "@duplojs/zod-accelerator": "^2.6.2", "@floating-ui/react": "^0.27.2", "@hookform/resolvers": "^3.9.0", "@iconify/react": "^5.1.0", diff --git a/packages/web/src/app/[domain]/search/components/searchResultsPage.tsx b/packages/web/src/app/[domain]/search/components/searchResultsPage.tsx index 553ee1329..c500f5e3f 100644 --- a/packages/web/src/app/[domain]/search/components/searchResultsPage.tsx +++ b/packages/web/src/app/[domain]/search/components/searchResultsPage.tsx @@ -35,7 +35,7 @@ import { FilterPanel } from "./filterPanel"; import { useFilteredMatches } from "./filterPanel/useFilterMatches"; import { SearchResultsPanel } from "./searchResultsPanel"; -const DEFAULT_MAX_MATCH_COUNT = 5000; +const DEFAULT_MAX_MATCH_COUNT = 100_000; interface SearchResultsPageProps { searchQuery: string; diff --git a/packages/web/src/features/search/schemas.ts b/packages/web/src/features/search/schemas.ts index 50a4ee03b..711c810d9 100644 --- a/packages/web/src/features/search/schemas.ts +++ b/packages/web/src/features/search/schemas.ts @@ -141,6 +141,7 @@ export const searchResponseSchema = z.object({ repositoryInfo: z.array(repositoryInfoSchema), isBranchFilteringEnabled: z.boolean(), isSearchExhaustive: z.boolean(), + __debug_timings: z.record(z.string(), z.number()).optional(), }); export const fileSourceRequestSchema = z.object({ diff --git a/packages/web/src/features/search/searchApi.ts b/packages/web/src/features/search/searchApi.ts index 35df48486..bfbd42846 100644 --- a/packages/web/src/features/search/searchApi.ts +++ b/packages/web/src/features/search/searchApi.ts @@ -1,16 +1,21 @@ 'use server'; +import { sew } from "@/actions"; +import { withOptionalAuthV2 } from "@/withAuthV2"; +import { ZodAccelerator } from "@duplojs/zod-accelerator"; +import { PrismaClient, Repo } from "@sourcebot/db"; +import { base64Decode, createLogger } from "@sourcebot/shared"; +import { StatusCodes } from "http-status-codes"; +import z from "zod"; +import { ErrorCode } from "../../lib/errorCodes"; import { invalidZoektResponse, ServiceError } from "../../lib/serviceError"; -import { isServiceError } from "../../lib/utils"; +import { isServiceError, measure } from "../../lib/utils"; +import { SearchRequest, SearchResponse, SourceRange } from "./types"; import { zoektFetch } from "./zoektClient"; -import { ErrorCode } from "../../lib/errorCodes"; -import { StatusCodes } from "http-status-codes"; import { zoektSearchResponseSchema } from "./zoektSchema"; -import { SearchRequest, SearchResponse, SourceRange } from "./types"; -import { PrismaClient, Repo } from "@sourcebot/db"; -import { sew } from "@/actions"; -import { base64Decode } from "@sourcebot/shared"; -import { withOptionalAuthV2 } from "@/withAuthV2"; + +const acceleratedZoektSearchResponseSchema = ZodAccelerator.build(zoektSearchResponseSchema); +const logger = createLogger("searchApi"); // List of supported query prefixes in zoekt. // @see : https://github.com/sourcebot-dev/zoekt/blob/main/query/parse.go#L417 @@ -126,7 +131,7 @@ const getFileWebUrl = (template: string, branch: string, fileName: string): stri return encodeURI(url + optionalQueryParams); } -export const search = async ({ query, matches, contextLines, whole }: SearchRequest) => sew(() => +export const search = async ({ query, matches, contextLines, whole }: SearchRequest): Promise => sew(() => withOptionalAuthV2(async ({ org, prisma }) => { const transformedQuery = await transformZoektQuery(query, org.id, prisma); if (isServiceError(transformedQuery)) { @@ -200,20 +205,22 @@ export const search = async ({ query, matches, contextLines, whole }: SearchRequ "X-Tenant-ID": org.id.toString() }; - const searchResponse = await zoektFetch({ - path: "/api/search", - body, - header, - method: "POST", - }); + const { data: searchResponse, durationMs: fetchDurationMs } = await measure( + () => zoektFetch({ + path: "/api/search", + body, + header, + method: "POST", + }), + "zoekt_fetch", + false + ); if (!searchResponse.ok) { return invalidZoektResponse(searchResponse); } - const searchBody = await searchResponse.json(); - - const parser = zoektSearchResponseSchema.transform(async ({ Result }) => { + const transformZoektSearchResponse = async ({ Result }: z.infer) => { // @note (2025-05-12): in zoekt, repositories are identified by the `RepositoryID` field // which corresponds to the `id` in the Repo table. In order to efficiently fetch repository // metadata when transforming (potentially thousands) of file matches, we aggregate a unique @@ -379,7 +386,52 @@ export const search = async ({ query, matches, contextLines, whole }: SearchRequ flushReason: Result.FlushReason, } } satisfies SearchResponse; - }); + } + + const { data: rawZoektResponse, durationMs: parseJsonDurationMs } = await measure( + () => searchResponse.json(), + "parse_json", + false + ); - return parser.parseAsync(searchBody); + const { data: zoektResponse, durationMs: parseZoektResponseDurationMs } = await measure( + () => acceleratedZoektSearchResponseSchema.parseAsync(rawZoektResponse), + "parse_zoekt_response", + false + ); + + const { data: response, durationMs: transformZoektResponseDurationMs } = await measure( + () => transformZoektSearchResponse(zoektResponse), + "transform_zoekt_response", + false + ); + + const totalDurationMs = fetchDurationMs + parseJsonDurationMs + parseZoektResponseDurationMs + transformZoektResponseDurationMs; + + // Debug log: timing breakdown + const timings = [ + { name: "zoekt_fetch", duration: fetchDurationMs }, + { name: "parse_json", duration: parseJsonDurationMs }, + { name: "parse_zoekt_response", duration: parseZoektResponseDurationMs }, + { name: "transform_zoekt_response", duration: transformZoektResponseDurationMs }, + ]; + + logger.debug(`Search timing breakdown (query: "${query}"):`); + timings.forEach(({ name, duration }) => { + const percentage = ((duration / totalDurationMs) * 100).toFixed(1); + const durationStr = duration.toFixed(2).padStart(8); + const percentageStr = percentage.padStart(5); + logger.debug(` ${name.padEnd(25)} ${durationStr}ms (${percentageStr}%)`); + }); + logger.debug(` ${"TOTAL".padEnd(25)} ${totalDurationMs.toFixed(2).padStart(8)}ms (100.0%)`); + + return { + ...response, + __debug_timings: { + zoekt_fetch: fetchDurationMs, + parse_json: parseJsonDurationMs, + parse_zoekt_response: parseZoektResponseDurationMs, + transform_zoekt_response: transformZoektResponseDurationMs, + } + } satisfies SearchResponse; })); diff --git a/yarn.lock b/yarn.lock index f103fe788..26e73c0a6 100644 --- a/yarn.lock +++ b/yarn.lock @@ -1630,6 +1630,15 @@ __metadata: languageName: node linkType: hard +"@duplojs/zod-accelerator@npm:^2.6.2": + version: 2.6.2 + resolution: "@duplojs/zod-accelerator@npm:2.6.2" + peerDependencies: + zod: ">=3.0.0 <4.0.0" + checksum: 10c0/9b8a1dd6cc7c79df16d6e82b34a3f9f76242f513bb272265adbf159f565785f7c4be14d9cf492053fbacd806d0865b20ce1e9adff28f92f7ff9730121688e2b5 + languageName: node + linkType: hard + "@emnapi/core@npm:^1.3.1": version: 1.3.1 resolution: "@emnapi/core@npm:1.3.1" @@ -8058,6 +8067,7 @@ __metadata: "@codemirror/search": "npm:^6.5.6" "@codemirror/state": "npm:^6.4.1" "@codemirror/view": "npm:^6.33.0" + "@duplojs/zod-accelerator": "npm:^2.6.2" "@eslint/eslintrc": "npm:^3" "@floating-ui/react": "npm:^0.27.2" "@hookform/resolvers": "npm:^3.9.0" From 60c2b93987b6a0b3030f24823b176b114bc5810e Mon Sep 17 00:00:00 2001 From: bkellam Date: Wed, 12 Nov 2025 20:14:42 -0800 Subject: [PATCH 2/5] remove zod parsing altogether --- packages/web/package.json | 1 - packages/web/src/features/search/searchApi.ts | 19 ++++++------------- .../web/src/features/search/zoektSchema.ts | 2 ++ yarn.lock | 10 ---------- 4 files changed, 8 insertions(+), 24 deletions(-) diff --git a/packages/web/package.json b/packages/web/package.json index c829052f6..ba3c3dbc4 100644 --- a/packages/web/package.json +++ b/packages/web/package.json @@ -51,7 +51,6 @@ "@codemirror/search": "^6.5.6", "@codemirror/state": "^6.4.1", "@codemirror/view": "^6.33.0", - "@duplojs/zod-accelerator": "^2.6.2", "@floating-ui/react": "^0.27.2", "@hookform/resolvers": "^3.9.0", "@iconify/react": "^5.1.0", diff --git a/packages/web/src/features/search/searchApi.ts b/packages/web/src/features/search/searchApi.ts index bfbd42846..d480c96ab 100644 --- a/packages/web/src/features/search/searchApi.ts +++ b/packages/web/src/features/search/searchApi.ts @@ -2,19 +2,16 @@ import { sew } from "@/actions"; import { withOptionalAuthV2 } from "@/withAuthV2"; -import { ZodAccelerator } from "@duplojs/zod-accelerator"; import { PrismaClient, Repo } from "@sourcebot/db"; import { base64Decode, createLogger } from "@sourcebot/shared"; import { StatusCodes } from "http-status-codes"; -import z from "zod"; import { ErrorCode } from "../../lib/errorCodes"; import { invalidZoektResponse, ServiceError } from "../../lib/serviceError"; import { isServiceError, measure } from "../../lib/utils"; import { SearchRequest, SearchResponse, SourceRange } from "./types"; import { zoektFetch } from "./zoektClient"; -import { zoektSearchResponseSchema } from "./zoektSchema"; +import { ZoektSearchResponse } from "./zoektSchema"; -const acceleratedZoektSearchResponseSchema = ZodAccelerator.build(zoektSearchResponseSchema); const logger = createLogger("searchApi"); // List of supported query prefixes in zoekt. @@ -220,7 +217,7 @@ export const search = async ({ query, matches, contextLines, whole }: SearchRequ return invalidZoektResponse(searchResponse); } - const transformZoektSearchResponse = async ({ Result }: z.infer) => { + const transformZoektSearchResponse = async ({ Result }: ZoektSearchResponse) => { // @note (2025-05-12): in zoekt, repositories are identified by the `RepositoryID` field // which corresponds to the `id` in the Repo table. In order to efficiently fetch repository // metadata when transforming (potentially thousands) of file matches, we aggregate a unique @@ -394,11 +391,9 @@ export const search = async ({ query, matches, contextLines, whole }: SearchRequ false ); - const { data: zoektResponse, durationMs: parseZoektResponseDurationMs } = await measure( - () => acceleratedZoektSearchResponseSchema.parseAsync(rawZoektResponse), - "parse_zoekt_response", - false - ); + // @note: We do not use zod parseAsync here since in cases where the + // response is large (> 40MB), there can be significant performance issues. + const zoektResponse = rawZoektResponse as ZoektSearchResponse; const { data: response, durationMs: transformZoektResponseDurationMs } = await measure( () => transformZoektSearchResponse(zoektResponse), @@ -406,13 +401,12 @@ export const search = async ({ query, matches, contextLines, whole }: SearchRequ false ); - const totalDurationMs = fetchDurationMs + parseJsonDurationMs + parseZoektResponseDurationMs + transformZoektResponseDurationMs; + const totalDurationMs = fetchDurationMs + parseJsonDurationMs + transformZoektResponseDurationMs; // Debug log: timing breakdown const timings = [ { name: "zoekt_fetch", duration: fetchDurationMs }, { name: "parse_json", duration: parseJsonDurationMs }, - { name: "parse_zoekt_response", duration: parseZoektResponseDurationMs }, { name: "transform_zoekt_response", duration: transformZoektResponseDurationMs }, ]; @@ -430,7 +424,6 @@ export const search = async ({ query, matches, contextLines, whole }: SearchRequ __debug_timings: { zoekt_fetch: fetchDurationMs, parse_json: parseJsonDurationMs, - parse_zoekt_response: parseZoektResponseDurationMs, transform_zoekt_response: transformZoektResponseDurationMs, } } satisfies SearchResponse; diff --git a/packages/web/src/features/search/zoektSchema.ts b/packages/web/src/features/search/zoektSchema.ts index 752d360cf..c4f37e38f 100644 --- a/packages/web/src/features/search/zoektSchema.ts +++ b/packages/web/src/features/search/zoektSchema.ts @@ -75,6 +75,8 @@ export const zoektSearchResponseSchema = z.object({ }), }); +export type ZoektSearchResponse = z.infer; + // @see : https://github.com/sourcebot-dev/zoekt/blob/3780e68cdb537d5a7ed2c84d9b3784f80c7c5d04/api.go#L728 const zoektRepoStatsSchema = z.object({ Repos: z.number(), diff --git a/yarn.lock b/yarn.lock index 26e73c0a6..f103fe788 100644 --- a/yarn.lock +++ b/yarn.lock @@ -1630,15 +1630,6 @@ __metadata: languageName: node linkType: hard -"@duplojs/zod-accelerator@npm:^2.6.2": - version: 2.6.2 - resolution: "@duplojs/zod-accelerator@npm:2.6.2" - peerDependencies: - zod: ">=3.0.0 <4.0.0" - checksum: 10c0/9b8a1dd6cc7c79df16d6e82b34a3f9f76242f513bb272265adbf159f565785f7c4be14d9cf492053fbacd806d0865b20ce1e9adff28f92f7ff9730121688e2b5 - languageName: node - linkType: hard - "@emnapi/core@npm:^1.3.1": version: 1.3.1 resolution: "@emnapi/core@npm:1.3.1" @@ -8067,7 +8058,6 @@ __metadata: "@codemirror/search": "npm:^6.5.6" "@codemirror/state": "npm:^6.4.1" "@codemirror/view": "npm:^6.33.0" - "@duplojs/zod-accelerator": "npm:^2.6.2" "@eslint/eslintrc": "npm:^3" "@floating-ui/react": "npm:^0.27.2" "@hookform/resolvers": "npm:^3.9.0" From 3efe7bf256c2688ad2ed66bea728137d34acce2d Mon Sep 17 00:00:00 2001 From: bkellam Date: Wed, 12 Nov 2025 23:13:37 -0800 Subject: [PATCH 3/5] changelog --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index fce2fdf57..3dc75e06b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,11 +7,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Changed +- Bumped the default requested search result count from 5k to 100k after optimization pass. [#615](https://github.com/sourcebot-dev/sourcebot/pull/615) + ### Fixed - Fixed incorrect shutdown of PostHog SDK in the worker. [#609](https://github.com/sourcebot-dev/sourcebot/pull/609) - Fixed race condition in job schedulers. [#607](https://github.com/sourcebot-dev/sourcebot/pull/607) - Fixed connection sync jobs getting stuck in pending or in progress after restarting the worker. [#612](https://github.com/sourcebot-dev/sourcebot/pull/612) - Fixed issue where connections would always sync on startup, regardless if they changed or not. [#613](https://github.com/sourcebot-dev/sourcebot/pull/613) +- Fixed performance bottleneck in search api, resulting in a order of magnitutde performance improvement. [#615](https://github.com/sourcebot-dev/sourcebot/pull/615) ### Added - Added force resync buttons for connections and repositories. [#610](https://github.com/sourcebot-dev/sourcebot/pull/610) From d4ae3ffd69504e6a122291bc8a40efd9c5e22ee3 Mon Sep 17 00:00:00 2001 From: bkellam Date: Wed, 12 Nov 2025 23:13:56 -0800 Subject: [PATCH 4/5] remove unecassary zod parsing client side --- packages/web/src/app/api/(client)/client.ts | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/packages/web/src/app/api/(client)/client.ts b/packages/web/src/app/api/(client)/client.ts index 3238c7c5e..7e5466d8c 100644 --- a/packages/web/src/app/api/(client)/client.ts +++ b/packages/web/src/app/api/(client)/client.ts @@ -1,6 +1,5 @@ 'use client'; -import { getVersionResponseSchema, getReposResponseSchema } from "@/lib/schemas"; import { ServiceError } from "@/lib/serviceError"; import { GetVersionResponse, GetReposResponse } from "@/lib/types"; import { isServiceError } from "@/lib/utils"; @@ -10,10 +9,6 @@ import { SearchRequest, SearchResponse, } from "@/features/search/types"; -import { - fileSourceResponseSchema, - searchResponseSchema, -} from "@/features/search/schemas"; export const search = async (body: SearchRequest, domain: string): Promise => { const result = await fetch("/api/search", { @@ -29,10 +24,10 @@ export const search = async (body: SearchRequest, domain: string): Promise => { +export const fetchFileSource = async (body: FileSourceRequest, domain: string): Promise => { const result = await fetch("/api/source", { method: "POST", headers: { @@ -42,7 +37,7 @@ export const fetchFileSource = async (body: FileSourceRequest, domain: string): body: JSON.stringify(body), }).then(response => response.json()); - return fileSourceResponseSchema.parse(result); + return result as FileSourceResponse | ServiceError; } export const getRepos = async (): Promise => { @@ -53,7 +48,7 @@ export const getRepos = async (): Promise => { }, }).then(response => response.json()); - return getReposResponseSchema.parse(result); + return result as GetReposResponse | ServiceError; } export const getVersion = async (): Promise => { @@ -63,5 +58,5 @@ export const getVersion = async (): Promise => { "Content-Type": "application/json", }, }).then(response => response.json()); - return getVersionResponseSchema.parse(result); + return result as GetVersionResponse; } From e7c4730d5c3325e5f53138f6801b8e0e944661b2 Mon Sep 17 00:00:00 2001 From: bkellam Date: Wed, 12 Nov 2025 23:16:19 -0800 Subject: [PATCH 5/5] nit on changelog --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3dc75e06b..0ebd93190 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,7 +15,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Fixed race condition in job schedulers. [#607](https://github.com/sourcebot-dev/sourcebot/pull/607) - Fixed connection sync jobs getting stuck in pending or in progress after restarting the worker. [#612](https://github.com/sourcebot-dev/sourcebot/pull/612) - Fixed issue where connections would always sync on startup, regardless if they changed or not. [#613](https://github.com/sourcebot-dev/sourcebot/pull/613) -- Fixed performance bottleneck in search api, resulting in a order of magnitutde performance improvement. [#615](https://github.com/sourcebot-dev/sourcebot/pull/615) +- Fixed performance bottleneck in search api. Result is a order of magnitutde improvement to average search time according to benchmarks. [#615](https://github.com/sourcebot-dev/sourcebot/pull/615) ### Added - Added force resync buttons for connections and repositories. [#610](https://github.com/sourcebot-dev/sourcebot/pull/610)