diff --git a/CHANGELOG.md b/CHANGELOG.md index fce2fdf57..0ebd93190 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,11 +7,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Changed +- Bumped the default requested search result count from 5k to 100k after optimization pass. [#615](https://github.com/sourcebot-dev/sourcebot/pull/615) + ### Fixed - Fixed incorrect shutdown of PostHog SDK in the worker. [#609](https://github.com/sourcebot-dev/sourcebot/pull/609) - Fixed race condition in job schedulers. [#607](https://github.com/sourcebot-dev/sourcebot/pull/607) - Fixed connection sync jobs getting stuck in pending or in progress after restarting the worker. [#612](https://github.com/sourcebot-dev/sourcebot/pull/612) - Fixed issue where connections would always sync on startup, regardless if they changed or not. [#613](https://github.com/sourcebot-dev/sourcebot/pull/613) +- Fixed performance bottleneck in search api. Result is a order of magnitutde improvement to average search time according to benchmarks. [#615](https://github.com/sourcebot-dev/sourcebot/pull/615) ### Added - Added force resync buttons for connections and repositories. [#610](https://github.com/sourcebot-dev/sourcebot/pull/610) diff --git a/packages/web/src/app/[domain]/search/components/searchResultsPage.tsx b/packages/web/src/app/[domain]/search/components/searchResultsPage.tsx index 553ee1329..c500f5e3f 100644 --- a/packages/web/src/app/[domain]/search/components/searchResultsPage.tsx +++ b/packages/web/src/app/[domain]/search/components/searchResultsPage.tsx @@ -35,7 +35,7 @@ import { FilterPanel } from "./filterPanel"; import { useFilteredMatches } from "./filterPanel/useFilterMatches"; import { SearchResultsPanel } from "./searchResultsPanel"; -const DEFAULT_MAX_MATCH_COUNT = 5000; +const DEFAULT_MAX_MATCH_COUNT = 100_000; interface SearchResultsPageProps { searchQuery: string; diff --git a/packages/web/src/app/api/(client)/client.ts b/packages/web/src/app/api/(client)/client.ts index 3238c7c5e..7e5466d8c 100644 --- a/packages/web/src/app/api/(client)/client.ts +++ b/packages/web/src/app/api/(client)/client.ts @@ -1,6 +1,5 @@ 'use client'; -import { getVersionResponseSchema, getReposResponseSchema } from "@/lib/schemas"; import { ServiceError } from "@/lib/serviceError"; import { GetVersionResponse, GetReposResponse } from "@/lib/types"; import { isServiceError } from "@/lib/utils"; @@ -10,10 +9,6 @@ import { SearchRequest, SearchResponse, } from "@/features/search/types"; -import { - fileSourceResponseSchema, - searchResponseSchema, -} from "@/features/search/schemas"; export const search = async (body: SearchRequest, domain: string): Promise => { const result = await fetch("/api/search", { @@ -29,10 +24,10 @@ export const search = async (body: SearchRequest, domain: string): Promise => { +export const fetchFileSource = async (body: FileSourceRequest, domain: string): Promise => { const result = await fetch("/api/source", { method: "POST", headers: { @@ -42,7 +37,7 @@ export const fetchFileSource = async (body: FileSourceRequest, domain: string): body: JSON.stringify(body), }).then(response => response.json()); - return fileSourceResponseSchema.parse(result); + return result as FileSourceResponse | ServiceError; } export const getRepos = async (): Promise => { @@ -53,7 +48,7 @@ export const getRepos = async (): Promise => { }, }).then(response => response.json()); - return getReposResponseSchema.parse(result); + return result as GetReposResponse | ServiceError; } export const getVersion = async (): Promise => { @@ -63,5 +58,5 @@ export const getVersion = async (): Promise => { "Content-Type": "application/json", }, }).then(response => response.json()); - return getVersionResponseSchema.parse(result); + return result as GetVersionResponse; } diff --git a/packages/web/src/features/search/schemas.ts b/packages/web/src/features/search/schemas.ts index 50a4ee03b..711c810d9 100644 --- a/packages/web/src/features/search/schemas.ts +++ b/packages/web/src/features/search/schemas.ts @@ -141,6 +141,7 @@ export const searchResponseSchema = z.object({ repositoryInfo: z.array(repositoryInfoSchema), isBranchFilteringEnabled: z.boolean(), isSearchExhaustive: z.boolean(), + __debug_timings: z.record(z.string(), z.number()).optional(), }); export const fileSourceRequestSchema = z.object({ diff --git a/packages/web/src/features/search/searchApi.ts b/packages/web/src/features/search/searchApi.ts index 35df48486..d480c96ab 100644 --- a/packages/web/src/features/search/searchApi.ts +++ b/packages/web/src/features/search/searchApi.ts @@ -1,16 +1,18 @@ 'use server'; -import { invalidZoektResponse, ServiceError } from "../../lib/serviceError"; -import { isServiceError } from "../../lib/utils"; -import { zoektFetch } from "./zoektClient"; -import { ErrorCode } from "../../lib/errorCodes"; -import { StatusCodes } from "http-status-codes"; -import { zoektSearchResponseSchema } from "./zoektSchema"; -import { SearchRequest, SearchResponse, SourceRange } from "./types"; -import { PrismaClient, Repo } from "@sourcebot/db"; import { sew } from "@/actions"; -import { base64Decode } from "@sourcebot/shared"; import { withOptionalAuthV2 } from "@/withAuthV2"; +import { PrismaClient, Repo } from "@sourcebot/db"; +import { base64Decode, createLogger } from "@sourcebot/shared"; +import { StatusCodes } from "http-status-codes"; +import { ErrorCode } from "../../lib/errorCodes"; +import { invalidZoektResponse, ServiceError } from "../../lib/serviceError"; +import { isServiceError, measure } from "../../lib/utils"; +import { SearchRequest, SearchResponse, SourceRange } from "./types"; +import { zoektFetch } from "./zoektClient"; +import { ZoektSearchResponse } from "./zoektSchema"; + +const logger = createLogger("searchApi"); // List of supported query prefixes in zoekt. // @see : https://github.com/sourcebot-dev/zoekt/blob/main/query/parse.go#L417 @@ -126,7 +128,7 @@ const getFileWebUrl = (template: string, branch: string, fileName: string): stri return encodeURI(url + optionalQueryParams); } -export const search = async ({ query, matches, contextLines, whole }: SearchRequest) => sew(() => +export const search = async ({ query, matches, contextLines, whole }: SearchRequest): Promise => sew(() => withOptionalAuthV2(async ({ org, prisma }) => { const transformedQuery = await transformZoektQuery(query, org.id, prisma); if (isServiceError(transformedQuery)) { @@ -200,20 +202,22 @@ export const search = async ({ query, matches, contextLines, whole }: SearchRequ "X-Tenant-ID": org.id.toString() }; - const searchResponse = await zoektFetch({ - path: "/api/search", - body, - header, - method: "POST", - }); + const { data: searchResponse, durationMs: fetchDurationMs } = await measure( + () => zoektFetch({ + path: "/api/search", + body, + header, + method: "POST", + }), + "zoekt_fetch", + false + ); if (!searchResponse.ok) { return invalidZoektResponse(searchResponse); } - const searchBody = await searchResponse.json(); - - const parser = zoektSearchResponseSchema.transform(async ({ Result }) => { + const transformZoektSearchResponse = async ({ Result }: ZoektSearchResponse) => { // @note (2025-05-12): in zoekt, repositories are identified by the `RepositoryID` field // which corresponds to the `id` in the Repo table. In order to efficiently fetch repository // metadata when transforming (potentially thousands) of file matches, we aggregate a unique @@ -379,7 +383,48 @@ export const search = async ({ query, matches, contextLines, whole }: SearchRequ flushReason: Result.FlushReason, } } satisfies SearchResponse; - }); + } + + const { data: rawZoektResponse, durationMs: parseJsonDurationMs } = await measure( + () => searchResponse.json(), + "parse_json", + false + ); + + // @note: We do not use zod parseAsync here since in cases where the + // response is large (> 40MB), there can be significant performance issues. + const zoektResponse = rawZoektResponse as ZoektSearchResponse; + + const { data: response, durationMs: transformZoektResponseDurationMs } = await measure( + () => transformZoektSearchResponse(zoektResponse), + "transform_zoekt_response", + false + ); - return parser.parseAsync(searchBody); + const totalDurationMs = fetchDurationMs + parseJsonDurationMs + transformZoektResponseDurationMs; + + // Debug log: timing breakdown + const timings = [ + { name: "zoekt_fetch", duration: fetchDurationMs }, + { name: "parse_json", duration: parseJsonDurationMs }, + { name: "transform_zoekt_response", duration: transformZoektResponseDurationMs }, + ]; + + logger.debug(`Search timing breakdown (query: "${query}"):`); + timings.forEach(({ name, duration }) => { + const percentage = ((duration / totalDurationMs) * 100).toFixed(1); + const durationStr = duration.toFixed(2).padStart(8); + const percentageStr = percentage.padStart(5); + logger.debug(` ${name.padEnd(25)} ${durationStr}ms (${percentageStr}%)`); + }); + logger.debug(` ${"TOTAL".padEnd(25)} ${totalDurationMs.toFixed(2).padStart(8)}ms (100.0%)`); + + return { + ...response, + __debug_timings: { + zoekt_fetch: fetchDurationMs, + parse_json: parseJsonDurationMs, + transform_zoekt_response: transformZoektResponseDurationMs, + } + } satisfies SearchResponse; })); diff --git a/packages/web/src/features/search/zoektSchema.ts b/packages/web/src/features/search/zoektSchema.ts index 752d360cf..c4f37e38f 100644 --- a/packages/web/src/features/search/zoektSchema.ts +++ b/packages/web/src/features/search/zoektSchema.ts @@ -75,6 +75,8 @@ export const zoektSearchResponseSchema = z.object({ }), }); +export type ZoektSearchResponse = z.infer; + // @see : https://github.com/sourcebot-dev/zoekt/blob/3780e68cdb537d5a7ed2c84d9b3784f80c7c5d04/api.go#L728 const zoektRepoStatsSchema = z.object({ Repos: z.number(),