From b0fbc714990f8eb94051c72facbab98364110dbe Mon Sep 17 00:00:00 2001 From: bkellam Date: Tue, 7 Oct 2025 19:47:25 -0700 Subject: [PATCH 1/4] improvements --- packages/web/src/app/[domain]/search/page.tsx | 149 ++++++++++-------- .../web/src/app/components/codeSnippet.tsx | 4 +- packages/web/src/env.mjs | 3 - packages/web/src/features/codeNav/actions.ts | 2 +- packages/web/src/features/search/schemas.ts | 102 ++++++++---- packages/web/src/features/search/searchApi.ts | 89 ++++++++--- packages/web/src/features/search/types.ts | 4 +- packages/web/src/lib/posthogEvents.ts | 1 + 8 files changed, 231 insertions(+), 123 deletions(-) diff --git a/packages/web/src/app/[domain]/search/page.tsx b/packages/web/src/app/[domain]/search/page.tsx index a92956d91..e992f1e44 100644 --- a/packages/web/src/app/[domain]/search/page.tsx +++ b/packages/web/src/app/[domain]/search/page.tsx @@ -21,19 +21,21 @@ import { FilterPanel } from "./components/filterPanel"; import { SearchResultsPanel } from "./components/searchResultsPanel"; import { useDomain } from "@/hooks/useDomain"; import { useToast } from "@/components/hooks/use-toast"; -import { RepositoryInfo, SearchResultFile } from "@/features/search/types"; +import { RepositoryInfo, SearchResultFile, SearchStats } from "@/features/search/types"; import { AnimatedResizableHandle } from "@/components/ui/animatedResizableHandle"; import { useFilteredMatches } from "./components/filterPanel/useFilterMatches"; import { Button } from "@/components/ui/button"; import { ImperativePanelHandle } from "react-resizable-panels"; -import { FilterIcon } from "lucide-react"; +import { AlertTriangleIcon, FilterIcon } from "lucide-react"; import { useHotkeys } from "react-hotkeys-hook"; import { useLocalStorage } from "@uidotdev/usehooks"; import { Tooltip, TooltipContent, TooltipTrigger } from "@/components/ui/tooltip"; import { KeyboardShortcutHint } from "@/app/components/keyboardShortcutHint"; import { SearchBar } from "../components/searchBar"; +import { CodeSnippet } from "@/app/components/codeSnippet"; +import { CopyIconButton } from "../components/copyIconButton"; -const DEFAULT_MAX_MATCH_COUNT = 10000; +const DEFAULT_MAX_MATCH_COUNT = 500; export default function SearchPage() { // We need a suspense boundary here since we are accessing query params @@ -58,7 +60,12 @@ const SearchPageInternal = () => { const _maxMatchCount = parseInt(useNonEmptyQueryParam(SearchQueryParams.matches) ?? `${DEFAULT_MAX_MATCH_COUNT}`); const maxMatchCount = isNaN(_maxMatchCount) ? DEFAULT_MAX_MATCH_COUNT : _maxMatchCount; - const { data: searchResponse, isLoading: isSearchLoading, error } = useQuery({ + const { + data: searchResponse, + isPending: isSearchPending, + isFetching: isFetching, + error + } = useQuery({ queryKey: ["search", searchQuery, maxMatchCount], queryFn: () => measure(() => unwrapServiceError(search({ query: searchQuery, @@ -68,14 +75,17 @@ const SearchPageInternal = () => { }, domain)), "client.search"), select: ({ data, durationMs }) => ({ ...data, - durationMs, + totalClientSearchDurationMs: durationMs, }), enabled: searchQuery.length > 0, refetchOnWindowFocus: false, retry: false, - staleTime: Infinity, + staleTime: 0, }); + console.log(`isSearchPending`, isSearchPending); + console.log(`isFetching`, isFetching); + useEffect(() => { if (error) { toast({ @@ -109,58 +119,31 @@ const SearchPageInternal = () => { const fileLanguages = searchResponse.files?.map(file => file.language) || []; captureEvent("search_finished", { - durationMs: searchResponse.durationMs, - fileCount: searchResponse.zoektStats.fileCount, - matchCount: searchResponse.zoektStats.matchCount, - filesSkipped: searchResponse.zoektStats.filesSkipped, - contentBytesLoaded: searchResponse.zoektStats.contentBytesLoaded, - indexBytesLoaded: searchResponse.zoektStats.indexBytesLoaded, - crashes: searchResponse.zoektStats.crashes, - shardFilesConsidered: searchResponse.zoektStats.shardFilesConsidered, - filesConsidered: searchResponse.zoektStats.filesConsidered, - filesLoaded: searchResponse.zoektStats.filesLoaded, - shardsScanned: searchResponse.zoektStats.shardsScanned, - shardsSkipped: searchResponse.zoektStats.shardsSkipped, - shardsSkippedFilter: searchResponse.zoektStats.shardsSkippedFilter, - ngramMatches: searchResponse.zoektStats.ngramMatches, - ngramLookups: searchResponse.zoektStats.ngramLookups, - wait: searchResponse.zoektStats.wait, - matchTreeConstruction: searchResponse.zoektStats.matchTreeConstruction, - matchTreeSearch: searchResponse.zoektStats.matchTreeSearch, - regexpsConsidered: searchResponse.zoektStats.regexpsConsidered, - flushReason: searchResponse.zoektStats.flushReason, + durationMs: searchResponse.totalClientSearchDurationMs, + fileCount: searchResponse.stats.fileCount, + matchCount: searchResponse.stats.totalMatchCount, + actualMatchCount: searchResponse.stats.actualMatchCount, + filesSkipped: searchResponse.stats.filesSkipped, + contentBytesLoaded: searchResponse.stats.contentBytesLoaded, + indexBytesLoaded: searchResponse.stats.indexBytesLoaded, + crashes: searchResponse.stats.crashes, + shardFilesConsidered: searchResponse.stats.shardFilesConsidered, + filesConsidered: searchResponse.stats.filesConsidered, + filesLoaded: searchResponse.stats.filesLoaded, + shardsScanned: searchResponse.stats.shardsScanned, + shardsSkipped: searchResponse.stats.shardsSkipped, + shardsSkippedFilter: searchResponse.stats.shardsSkippedFilter, + ngramMatches: searchResponse.stats.ngramMatches, + ngramLookups: searchResponse.stats.ngramLookups, + wait: searchResponse.stats.wait, + matchTreeConstruction: searchResponse.stats.matchTreeConstruction, + matchTreeSearch: searchResponse.stats.matchTreeSearch, + regexpsConsidered: searchResponse.stats.regexpsConsidered, + flushReason: searchResponse.stats.flushReason, fileLanguages, }); }, [captureEvent, searchQuery, searchResponse]); - const { fileMatches, searchDurationMs, totalMatchCount, isBranchFilteringEnabled, repositoryInfo, matchCount } = useMemo(() => { - if (!searchResponse) { - return { - fileMatches: [], - searchDurationMs: 0, - totalMatchCount: 0, - isBranchFilteringEnabled: false, - repositoryInfo: {}, - matchCount: 0, - }; - } - - return { - fileMatches: searchResponse.files ?? [], - searchDurationMs: Math.round(searchResponse.durationMs), - totalMatchCount: searchResponse.zoektStats.matchCount, - isBranchFilteringEnabled: searchResponse.isBranchFilteringEnabled, - repositoryInfo: searchResponse.repositoryInfo.reduce((acc, repo) => { - acc[repo.id] = repo; - return acc; - }, {} as Record), - matchCount: searchResponse.stats.matchCount, - } - }, [searchResponse]); - - const isMoreResultsButtonVisible = useMemo(() => { - return totalMatchCount > maxMatchCount; - }, [totalMatchCount, maxMatchCount]); const onLoadMoreResults = useCallback(() => { const url = createPathWithQueryParams(`/${domain}/search`, @@ -183,20 +166,27 @@ const SearchPageInternal = () => { /> - {(isSearchLoading) ? ( + {(isSearchPending || isFetching) ? (

Searching...

+ ) : error ? ( +
+ +

Failed to search

+

{error.message}

+
) : ( )} @@ -208,9 +198,10 @@ interface PanelGroupProps { isMoreResultsButtonVisible?: boolean; onLoadMoreResults: () => void; isBranchFilteringEnabled: boolean; - repoInfo: Record; + repoInfo: RepositoryInfo[]; searchDurationMs: number; numMatches: number; + searchStats?: SearchStats; } const PanelGroup = ({ @@ -218,9 +209,10 @@ const PanelGroup = ({ isMoreResultsButtonVisible, onLoadMoreResults, isBranchFilteringEnabled, - repoInfo, - searchDurationMs, + repoInfo: _repoInfo, + searchDurationMs: _searchDurationMs, numMatches, + searchStats, }: PanelGroupProps) => { const [previewedFile, setPreviewedFile] = useState(undefined); const filteredFileMatches = useFilteredMatches(fileMatches); @@ -241,6 +233,17 @@ const PanelGroup = ({ description: "Toggle filter panel", }); + const searchDurationMs = useMemo(() => { + return Math.round(_searchDurationMs); + }, [_searchDurationMs]); + + const repoInfo = useMemo(() => { + return _repoInfo.reduce((acc, repo) => { + acc[repo.id] = repo; + return acc; + }, {} as Record); + }, [_repoInfo]); + return (
- + + + + + +
+

Search stats for nerds

+ { + navigator.clipboard.writeText(JSON.stringify(searchStats, null, 2)); + return true; + }} /> +
+ + {JSON.stringify(searchStats, null, 2)} + +
+
{ fileMatches.length > 0 ? (

{`[${searchDurationMs} ms] Found ${numMatches} matches in ${fileMatches.length} ${fileMatches.length > 1 ? 'files' : 'file'}`}

diff --git a/packages/web/src/app/components/codeSnippet.tsx b/packages/web/src/app/components/codeSnippet.tsx index 93ca4de31..e77a24b79 100644 --- a/packages/web/src/app/components/codeSnippet.tsx +++ b/packages/web/src/app/components/codeSnippet.tsx @@ -1,12 +1,12 @@ import { cn } from "@/lib/utils" -export const CodeSnippet = ({ children, className, title }: { children: React.ReactNode, className?: string, title?: string }) => { +export const CodeSnippet = ({ children, className, title, renderNewlines = false }: { children: React.ReactNode, className?: string, title?: string, renderNewlines?: boolean }) => { return ( - {children} + {renderNewlines ?
{children}
: children}
) } \ No newline at end of file diff --git a/packages/web/src/env.mjs b/packages/web/src/env.mjs index 922b2b840..7a9c15894 100644 --- a/packages/web/src/env.mjs +++ b/packages/web/src/env.mjs @@ -15,9 +15,6 @@ export const env = createEnv({ server: { // Zoekt ZOEKT_WEBSERVER_URL: z.string().url().default("http://localhost:6070"), - SHARD_MAX_MATCH_COUNT: numberSchema.default(10000), - TOTAL_MAX_MATCH_COUNT: numberSchema.default(100000), - ZOEKT_MAX_WALL_TIME_MS: numberSchema.default(10000), // Auth FORCE_ENABLE_ANONYMOUS_ACCESS: booleanSchema.default('false'), diff --git a/packages/web/src/features/codeNav/actions.ts b/packages/web/src/features/codeNav/actions.ts index b55cfa30d..839ef3819 100644 --- a/packages/web/src/features/codeNav/actions.ts +++ b/packages/web/src/features/codeNav/actions.ts @@ -80,7 +80,7 @@ export const findSearchBasedSymbolDefinitions = async ( const parseRelatedSymbolsSearchResponse = (searchResult: SearchResponse) => { const parser = searchResponseSchema.transform(async ({ files }) => ({ stats: { - matchCount: searchResult.stats.matchCount, + matchCount: searchResult.stats.actualMatchCount, }, files: files.flatMap((file) => { const chunks = file.chunks; diff --git a/packages/web/src/features/search/schemas.ts b/packages/web/src/features/search/schemas.ts index 18dfd8d4d..1867d8491 100644 --- a/packages/web/src/features/search/schemas.ts +++ b/packages/web/src/features/search/schemas.ts @@ -37,35 +37,82 @@ export const repositoryInfoSchema = z.object({ name: z.string(), displayName: z.string().optional(), webUrl: z.string().optional(), -}) +}); + +// Many of these fields are defined in zoekt/api.go. +export const searchStatsSchema = z.object({ + // The actual number of matches returned by the search. + // This will always be less than or equal to `totalMatchCount`. + actualMatchCount: z.number(), + + // The total number of matches found during the search. + totalMatchCount: z.number(), + + // The duration (in nanoseconds) of the search. + duration: z.number(), + + // Number of files containing a match. + fileCount: z.number(), + + // Candidate files whose contents weren't examined because we + // gathered enough matches. + filesSkipped: z.number(), + + // Amount of I/O for reading contents. + contentBytesLoaded: z.number(), + + // Amount of I/O for reading from index. + indexBytesLoaded: z.number(), + + // Number of search shards that had a crash. + crashes: z.number(), + + // Number of files in shards that we considered. + shardFilesConsidered: z.number(), + + // Files that we evaluated. Equivalent to files for which all + // atom matches (including negations) evaluated to true. + filesConsidered: z.number(), + + // Files for which we loaded file content to verify substring matches + filesLoaded: z.number(), + + // Shards that we scanned to find matches. + shardsScanned: z.number(), + + // Shards that we did not process because a query was canceled. + shardsSkipped: z.number(), + + // Shards that we did not process because the query was rejected by the + // ngram filter indicating it had no matches. + shardsSkippedFilter: z.number(), + + // Number of candidate matches as a result of searching ngrams. + ngramMatches: z.number(), + + // NgramLookups is the number of times we accessed an ngram in the index. + ngramLookups: z.number(), + + // Wall clock time for queued search. + wait: z.number(), + + // Aggregate wall clock time spent constructing and pruning the match tree. + // This accounts for time such as lookups in the trigram index. + matchTreeConstruction: z.number(), + + // Aggregate wall clock time spent searching the match tree. This accounts + // for the bulk of search work done looking for matches. + matchTreeSearch: z.number(), + + // Number of times regexp was called on files that we evaluated. + regexpsConsidered: z.number(), + + // FlushReason explains why results were flushed. + flushReason: z.number(), +}); export const searchResponseSchema = z.object({ - zoektStats: z.object({ - // The duration (in nanoseconds) of the search. - duration: z.number(), - fileCount: z.number(), - matchCount: z.number(), - filesSkipped: z.number(), - contentBytesLoaded: z.number(), - indexBytesLoaded: z.number(), - crashes: z.number(), - shardFilesConsidered: z.number(), - filesConsidered: z.number(), - filesLoaded: z.number(), - shardsScanned: z.number(), - shardsSkipped: z.number(), - shardsSkippedFilter: z.number(), - ngramMatches: z.number(), - ngramLookups: z.number(), - wait: z.number(), - matchTreeConstruction: z.number(), - matchTreeSearch: z.number(), - regexpsConsidered: z.number(), - flushReason: z.number(), - }), - stats: z.object({ - matchCount: z.number(), - }), + stats: searchStatsSchema, files: z.array(z.object({ fileName: z.object({ // The name of the file @@ -92,6 +139,7 @@ export const searchResponseSchema = z.object({ })), repositoryInfo: z.array(repositoryInfoSchema), isBranchFilteringEnabled: z.boolean(), + isSearchExhaustive: z.boolean(), }); export const fileSourceRequestSchema = z.object({ diff --git a/packages/web/src/features/search/searchApi.ts b/packages/web/src/features/search/searchApi.ts index 60d04dec4..3c7ea3732 100644 --- a/packages/web/src/features/search/searchApi.ts +++ b/packages/web/src/features/search/searchApi.ts @@ -151,12 +151,48 @@ export const search = async ({ query, matches, contextLines, whole }: SearchRequ // @see: https://github.com/sourcebot-dev/zoekt/blob/main/api.go#L892 opts: { ChunkMatches: true, + // @note: Zoekt has several different ways to limit a given search. The two that + // we care about are `MaxMatchDisplayCount` and `TotalMaxMatchCount`: + // - `MaxMatchDisplayCount` truncates the number of matches AFTER performing + // a search (specifically, after collating and sorting the results). The number of + // results returned by the API will be less than or equal to this value. + // + // - `TotalMaxMatchCount` truncates the number of matches DURING a search. The results + // returned by the API the API can be less than, equal to, or greater than this value. + // Why greater? Because this value is compared _after_ a given shard has finished + // being processed, the number of matches returned by the last shard may have exceeded + // this value. + // + // Let's define two variables: + // - `actualMatchCount` : The number of matches that are returned by the API. This is + // always less than or equal to `MaxMatchDisplayCount`. + // - `totalMatchCount` : The number of matches that zoekt found before it either + // 1) found all matches or 2) hit the `TotalMaxMatchCount` limit. This number is + // not bounded and can be less than, equal to, or greater than both `TotalMaxMatchCount` + // and `MaxMatchDisplayCount`. + // + // + // Our challenge is to determine whether or not the search returned all possible matches/ + // (it was exaustive) or if it was truncated. By setting the `TotalMaxMatchCount` to + // `MaxMatchDisplayCount + 1`, we can determine which of these occurred by comparing + // `totalMatchCount` to `MaxMatchDisplayCount`. + // + // if (totalMatchCount ≤ actualMatchCount): + // Search is EXHAUSTIVE (found all possible matches) + // Proof: totalMatchCount ≤ MaxMatchDisplayCount < TotalMaxMatchCount + // Therefore Zoekt stopped naturally, not due to limit + // + // if (totalMatchCount > actualMatchCount): + // Search is TRUNCATED (more matches exist) + // Proof: totalMatchCount > MaxMatchDisplayCount + 1 = TotalMaxMatchCount + // Therefore Zoekt hit the limit and stopped searching + // MaxMatchDisplayCount: matches, + TotalMaxMatchCount: matches + 1, NumContextLines: contextLines, Whole: !!whole, - TotalMaxMatchCount: env.TOTAL_MAX_MATCH_COUNT, - ShardMaxMatchCount: env.SHARD_MAX_MATCH_COUNT, - MaxWallTime: env.ZOEKT_MAX_WALL_TIME_MS * 1000 * 1000, // zoekt expects a duration in nanoseconds + ShardMaxMatchCount: -1, + MaxWallTime: 0, // zoekt expects a duration in nanoseconds } }); @@ -296,11 +332,35 @@ export const search = async ({ query, matches, contextLines, whole }: SearchRequ } }).filter((file) => file !== undefined) ?? []; + const actualMatchCount = files.reduce( + (acc, file) => + // Match count is the sum of the number of chunk matches and file name matches. + acc + file.chunks.reduce( + (acc, chunk) => acc + chunk.matchRanges.length, + 0, + ) + file.fileName.matchRanges.length, + 0, + ); + + const totalMatchCount = Result.MatchCount; + const isSearchExhaustive = totalMatchCount <= actualMatchCount; + return { - zoektStats: { + files, + repositoryInfo: Array.from(repos.values()).map((repo) => ({ + id: repo.id, + codeHostType: repo.external_codeHostType, + name: repo.name, + displayName: repo.displayName ?? undefined, + webUrl: repo.webUrl ?? undefined, + })), + isBranchFilteringEnabled, + isSearchExhaustive, + stats: { + actualMatchCount, + totalMatchCount, duration: Result.Duration, fileCount: Result.FileCount, - matchCount: Result.MatchCount, filesSkipped: Result.FilesSkipped, contentBytesLoaded: Result.ContentBytesLoaded, indexBytesLoaded: Result.IndexBytesLoaded, @@ -318,25 +378,6 @@ export const search = async ({ query, matches, contextLines, whole }: SearchRequ matchTreeSearch: Result.MatchTreeSearch, regexpsConsidered: Result.RegexpsConsidered, flushReason: Result.FlushReason, - }, - files, - repositoryInfo: Array.from(repos.values()).map((repo) => ({ - id: repo.id, - codeHostType: repo.external_codeHostType, - name: repo.name, - displayName: repo.displayName ?? undefined, - webUrl: repo.webUrl ?? undefined, - })), - isBranchFilteringEnabled: isBranchFilteringEnabled, - stats: { - matchCount: files.reduce( - (acc, file) => - acc + file.chunks.reduce( - (acc, chunk) => acc + chunk.matchRanges.length, - 0, - ), - 0, - ) } } satisfies SearchResponse; }); diff --git a/packages/web/src/features/search/types.ts b/packages/web/src/features/search/types.ts index f9af8dbe2..2a238857c 100644 --- a/packages/web/src/features/search/types.ts +++ b/packages/web/src/features/search/types.ts @@ -8,6 +8,7 @@ import { fileSourceRequestSchema, symbolSchema, repositoryInfoSchema, + searchStatsSchema, } from "./schemas"; import { z } from "zod"; @@ -22,4 +23,5 @@ export type FileSourceRequest = z.infer; export type FileSourceResponse = z.infer; export type RepositoryInfo = z.infer; -export type SourceRange = z.infer; \ No newline at end of file +export type SourceRange = z.infer; +export type SearchStats = z.infer; \ No newline at end of file diff --git a/packages/web/src/lib/posthogEvents.ts b/packages/web/src/lib/posthogEvents.ts index d9a9ed4ba..9ed40fd96 100644 --- a/packages/web/src/lib/posthogEvents.ts +++ b/packages/web/src/lib/posthogEvents.ts @@ -15,6 +15,7 @@ export type PosthogEventMap = { shardsSkipped: number, shardsSkippedFilter: number, matchCount: number, + actualMatchCount: number, ngramMatches: number, ngramLookups: number, wait: number, From 1b2339133953b0d8977d2bdab629ca43906c43ad Mon Sep 17 00:00:00 2001 From: bkellam Date: Tue, 7 Oct 2025 19:50:04 -0700 Subject: [PATCH 2/4] remove comments --- packages/web/src/app/[domain]/search/page.tsx | 3 --- 1 file changed, 3 deletions(-) diff --git a/packages/web/src/app/[domain]/search/page.tsx b/packages/web/src/app/[domain]/search/page.tsx index e992f1e44..5ffc79f90 100644 --- a/packages/web/src/app/[domain]/search/page.tsx +++ b/packages/web/src/app/[domain]/search/page.tsx @@ -83,9 +83,6 @@ const SearchPageInternal = () => { staleTime: 0, }); - console.log(`isSearchPending`, isSearchPending); - console.log(`isFetching`, isFetching); - useEffect(() => { if (error) { toast({ From ad91f6b7b8d3b8b7dee01dfebdda1c910c9a5fcb Mon Sep 17 00:00:00 2001 From: bkellam Date: Tue, 7 Oct 2025 22:04:05 -0700 Subject: [PATCH 3/4] changelog + small nits --- .env.development | 2 -- CHANGELOG.md | 1 + .../configuration/environment-variables.mdx | 3 --- packages/web/src/app/[domain]/search/page.tsx | 18 +++++++++++------- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/.env.development b/.env.development index 0309b5fbd..1740c2df8 100644 --- a/.env.development +++ b/.env.development @@ -4,8 +4,6 @@ DATABASE_URL="postgresql://postgres:postgres@localhost:5432/postgres" # Zoekt ZOEKT_WEBSERVER_URL="http://localhost:6070" -# SHARD_MAX_MATCH_COUNT=10000 -# TOTAL_MAX_MATCH_COUNT=100000 # The command to use for generating ctags. CTAGS_COMMAND=ctags # logging, strict diff --git a/CHANGELOG.md b/CHANGELOG.md index 32410873a..792a81a02 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed - Remove spam "login page loaded" log. [#552](https://github.com/sourcebot-dev/sourcebot/pull/552) +- Improved search performance for unbounded search queries. [#555](https://github.com/sourcebot-dev/sourcebot/pull/555) ### Added - Added support for passing db connection url as seperate `DATABASE_HOST`, `DATABASE_USERNAME`, `DATABASE_PASSWORD`, `DATABASE_NAME`, and `DATABASE_ARGS` env vars. [#545](https://github.com/sourcebot-dev/sourcebot/pull/545) diff --git a/docs/docs/configuration/environment-variables.mdx b/docs/docs/configuration/environment-variables.mdx index d49073fd3..a51aeb370 100644 --- a/docs/docs/configuration/environment-variables.mdx +++ b/docs/docs/configuration/environment-variables.mdx @@ -28,7 +28,6 @@ The following environment variables allow you to configure your Sourcebot deploy | `REDIS_REMOVE_ON_FAIL` | `100` |

Controls how many failed jobs are allowed to remain in Redis queues

| | `REPO_SYNC_RETRY_BASE_SLEEP_SECONDS` | `60` |

The base sleep duration (in seconds) for exponential backoff when retrying repository sync operations that fail

| | `GITLAB_CLIENT_QUERY_TIMEOUT_SECONDS` | `600` |

The timeout duration (in seconds) for GitLab client queries

| -| `SHARD_MAX_MATCH_COUNT` | `10000` |

The maximum shard count per query

| | `SMTP_CONNECTION_URL` | `-` |

The url to the SMTP service used for sending transactional emails. See [this doc](/docs/configuration/transactional-emails) for more info.

| | `SOURCEBOT_ENCRYPTION_KEY` | Automatically generated at startup if no value is provided. Generated using `openssl rand -base64 24` |

Used to encrypt connection secrets and generate API keys.

| | `SOURCEBOT_PUBLIC_KEY_PATH` | `/app/public.pem` |

Sourcebot's public key that's used to verify encrypted license key signatures.

| @@ -36,8 +35,6 @@ The following environment variables allow you to configure your Sourcebot deploy | `SOURCEBOT_STRUCTURED_LOGGING_ENABLED` | `false` |

Enables/disable structured JSON logging. See [this doc](/docs/configuration/structured-logging) for more info.

| | `SOURCEBOT_STRUCTURED_LOGGING_FILE` | - |

Optional file to log to if structured logging is enabled

| | `SOURCEBOT_TELEMETRY_DISABLED` | `false` |

Enables/disables telemetry collection in Sourcebot. See [this doc](/docs/overview.mdx#telemetry) for more info.

| -| `TOTAL_MAX_MATCH_COUNT` | `100000` |

The maximum number of matches per query

| -| `ZOEKT_MAX_WALL_TIME_MS` | `10000` |

The maximum real world duration (in milliseconds) per zoekt query

| ### Enterprise Environment Variables | Variable | Default | Description | diff --git a/packages/web/src/app/[domain]/search/page.tsx b/packages/web/src/app/[domain]/search/page.tsx index 5ffc79f90..2d6e497cf 100644 --- a/packages/web/src/app/[domain]/search/page.tsx +++ b/packages/web/src/app/[domain]/search/page.tsx @@ -26,7 +26,7 @@ import { AnimatedResizableHandle } from "@/components/ui/animatedResizableHandle import { useFilteredMatches } from "./components/filterPanel/useFilterMatches"; import { Button } from "@/components/ui/button"; import { ImperativePanelHandle } from "react-resizable-panels"; -import { AlertTriangleIcon, FilterIcon } from "lucide-react"; +import { AlertTriangleIcon, BugIcon, FilterIcon } from "lucide-react"; import { useHotkeys } from "react-hotkeys-hook"; import { useLocalStorage } from "@uidotdev/usehooks"; import { Tooltip, TooltipContent, TooltipTrigger } from "@/components/ui/tooltip"; @@ -301,13 +301,17 @@ const PanelGroup = ({ - -
+ +
+

Search stats for nerds

- { - navigator.clipboard.writeText(JSON.stringify(searchStats, null, 2)); - return true; - }} /> + { + navigator.clipboard.writeText(JSON.stringify(searchStats, null, 2)); + return true; + }} + className="ml-auto" + />
{JSON.stringify(searchStats, null, 2)} From 2fb540b562c120f8074ff6077e2f336a35ce2563 Mon Sep 17 00:00:00 2001 From: bkellam Date: Tue, 7 Oct 2025 23:44:52 -0700 Subject: [PATCH 4/4] Changelog and update MCP --- CHANGELOG.md | 2 + packages/mcp/src/schemas.ts | 99 ++++++++++++++++++++++++++++--------- 2 files changed, 77 insertions(+), 24 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 792a81a02..82b854fd8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] + + ### Fixed - Fixed "dubious ownership" errors when cloning / fetching repos. [#553](https://github.com/sourcebot-dev/sourcebot/pull/553) diff --git a/packages/mcp/src/schemas.ts b/packages/mcp/src/schemas.ts index 40736b596..0bb8ff9aa 100644 --- a/packages/mcp/src/schemas.ts +++ b/packages/mcp/src/schemas.ts @@ -38,32 +38,82 @@ export const repositoryInfoSchema = z.object({ name: z.string(), displayName: z.string().optional(), webUrl: z.string().optional(), -}) +}); + +// Many of these fields are defined in zoekt/api.go. +export const searchStatsSchema = z.object({ + // The actual number of matches returned by the search. + // This will always be less than or equal to `totalMatchCount`. + actualMatchCount: z.number(), + + // The total number of matches found during the search. + totalMatchCount: z.number(), + + // The duration (in nanoseconds) of the search. + duration: z.number(), + + // Number of files containing a match. + fileCount: z.number(), + + // Candidate files whose contents weren't examined because we + // gathered enough matches. + filesSkipped: z.number(), + + // Amount of I/O for reading contents. + contentBytesLoaded: z.number(), + + // Amount of I/O for reading from index. + indexBytesLoaded: z.number(), + + // Number of search shards that had a crash. + crashes: z.number(), + + // Number of files in shards that we considered. + shardFilesConsidered: z.number(), + + // Files that we evaluated. Equivalent to files for which all + // atom matches (including negations) evaluated to true. + filesConsidered: z.number(), + + // Files for which we loaded file content to verify substring matches + filesLoaded: z.number(), + + // Shards that we scanned to find matches. + shardsScanned: z.number(), + + // Shards that we did not process because a query was canceled. + shardsSkipped: z.number(), + + // Shards that we did not process because the query was rejected by the + // ngram filter indicating it had no matches. + shardsSkippedFilter: z.number(), + + // Number of candidate matches as a result of searching ngrams. + ngramMatches: z.number(), + + // NgramLookups is the number of times we accessed an ngram in the index. + ngramLookups: z.number(), + + // Wall clock time for queued search. + wait: z.number(), + + // Aggregate wall clock time spent constructing and pruning the match tree. + // This accounts for time such as lookups in the trigram index. + matchTreeConstruction: z.number(), + + // Aggregate wall clock time spent searching the match tree. This accounts + // for the bulk of search work done looking for matches. + matchTreeSearch: z.number(), + + // Number of times regexp was called on files that we evaluated. + regexpsConsidered: z.number(), + + // FlushReason explains why results were flushed. + flushReason: z.number(), +}); export const searchResponseSchema = z.object({ - zoektStats: z.object({ - // The duration (in nanoseconds) of the search. - duration: z.number(), - fileCount: z.number(), - matchCount: z.number(), - filesSkipped: z.number(), - contentBytesLoaded: z.number(), - indexBytesLoaded: z.number(), - crashes: z.number(), - shardFilesConsidered: z.number(), - filesConsidered: z.number(), - filesLoaded: z.number(), - shardsScanned: z.number(), - shardsSkipped: z.number(), - shardsSkippedFilter: z.number(), - ngramMatches: z.number(), - ngramLookups: z.number(), - wait: z.number(), - matchTreeConstruction: z.number(), - matchTreeSearch: z.number(), - regexpsConsidered: z.number(), - flushReason: z.number(), - }), + stats: searchStatsSchema, files: z.array(z.object({ fileName: z.object({ // The name of the file @@ -90,6 +140,7 @@ export const searchResponseSchema = z.object({ })), repositoryInfo: z.array(repositoryInfoSchema), isBranchFilteringEnabled: z.boolean(), + isSearchExhaustive: z.boolean(), }); enum RepoIndexingStatus {