Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions .env.development
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@ DATABASE_URL="postgresql://postgres:postgres@localhost:5432/postgres"

# Zoekt
ZOEKT_WEBSERVER_URL="http://localhost:6070"
# SHARD_MAX_MATCH_COUNT=10000
# TOTAL_MAX_MATCH_COUNT=100000
# The command to use for generating ctags.
CTAGS_COMMAND=ctags
# logging, strict
Expand Down
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

<!-- @NOTE: On next release, please bump the MCP pacakge as there are breaking changes in this! -->

### Fixed
- Fixed "dubious ownership" errors when cloning / fetching repos. [#553](https://github.com/sourcebot-dev/sourcebot/pull/553)

### Changed
- Remove spam "login page loaded" log. [#552](https://github.com/sourcebot-dev/sourcebot/pull/552)
- Improved search performance for unbounded search queries. [#555](https://github.com/sourcebot-dev/sourcebot/pull/555)

### Added
- Added support for passing db connection url as seperate `DATABASE_HOST`, `DATABASE_USERNAME`, `DATABASE_PASSWORD`, `DATABASE_NAME`, and `DATABASE_ARGS` env vars. [#545](https://github.com/sourcebot-dev/sourcebot/pull/545)
Expand Down
3 changes: 0 additions & 3 deletions docs/docs/configuration/environment-variables.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -28,16 +28,13 @@ The following environment variables allow you to configure your Sourcebot deploy
| `REDIS_REMOVE_ON_FAIL` | `100` | <p>Controls how many failed jobs are allowed to remain in Redis queues</p> |
| `REPO_SYNC_RETRY_BASE_SLEEP_SECONDS` | `60` | <p>The base sleep duration (in seconds) for exponential backoff when retrying repository sync operations that fail</p> |
| `GITLAB_CLIENT_QUERY_TIMEOUT_SECONDS` | `600` | <p>The timeout duration (in seconds) for GitLab client queries</p> |
| `SHARD_MAX_MATCH_COUNT` | `10000` | <p>The maximum shard count per query</p> |
| `SMTP_CONNECTION_URL` | `-` | <p>The url to the SMTP service used for sending transactional emails. See [this doc](/docs/configuration/transactional-emails) for more info.</p> |
| `SOURCEBOT_ENCRYPTION_KEY` | Automatically generated at startup if no value is provided. Generated using `openssl rand -base64 24` | <p>Used to encrypt connection secrets and generate API keys.</p> |
| `SOURCEBOT_PUBLIC_KEY_PATH` | `/app/public.pem` | <p>Sourcebot's public key that's used to verify encrypted license key signatures.</p> |
| `SOURCEBOT_LOG_LEVEL` | `info` | <p>The Sourcebot logging level. Valid values are `debug`, `info`, `warn`, `error`, in order of severity.</p> |
| `SOURCEBOT_STRUCTURED_LOGGING_ENABLED` | `false` | <p>Enables/disable structured JSON logging. See [this doc](/docs/configuration/structured-logging) for more info.</p> |
| `SOURCEBOT_STRUCTURED_LOGGING_FILE` | - | <p>Optional file to log to if structured logging is enabled</p> |
| `SOURCEBOT_TELEMETRY_DISABLED` | `false` | <p>Enables/disables telemetry collection in Sourcebot. See [this doc](/docs/overview.mdx#telemetry) for more info.</p> |
| `TOTAL_MAX_MATCH_COUNT` | `100000` | <p>The maximum number of matches per query</p> |
| `ZOEKT_MAX_WALL_TIME_MS` | `10000` | <p>The maximum real world duration (in milliseconds) per zoekt query</p> |

### Enterprise Environment Variables
| Variable | Default | Description |
Expand Down
99 changes: 75 additions & 24 deletions packages/mcp/src/schemas.ts
Original file line number Diff line number Diff line change
Expand Up @@ -38,32 +38,82 @@ export const repositoryInfoSchema = z.object({
name: z.string(),
displayName: z.string().optional(),
webUrl: z.string().optional(),
})
});

// Many of these fields are defined in zoekt/api.go.
export const searchStatsSchema = z.object({
// The actual number of matches returned by the search.
// This will always be less than or equal to `totalMatchCount`.
actualMatchCount: z.number(),

// The total number of matches found during the search.
totalMatchCount: z.number(),

// The duration (in nanoseconds) of the search.
duration: z.number(),

// Number of files containing a match.
fileCount: z.number(),

// Candidate files whose contents weren't examined because we
// gathered enough matches.
filesSkipped: z.number(),

// Amount of I/O for reading contents.
contentBytesLoaded: z.number(),

// Amount of I/O for reading from index.
indexBytesLoaded: z.number(),

// Number of search shards that had a crash.
crashes: z.number(),

// Number of files in shards that we considered.
shardFilesConsidered: z.number(),

// Files that we evaluated. Equivalent to files for which all
// atom matches (including negations) evaluated to true.
filesConsidered: z.number(),

// Files for which we loaded file content to verify substring matches
filesLoaded: z.number(),

// Shards that we scanned to find matches.
shardsScanned: z.number(),

// Shards that we did not process because a query was canceled.
shardsSkipped: z.number(),

// Shards that we did not process because the query was rejected by the
// ngram filter indicating it had no matches.
shardsSkippedFilter: z.number(),

// Number of candidate matches as a result of searching ngrams.
ngramMatches: z.number(),

// NgramLookups is the number of times we accessed an ngram in the index.
ngramLookups: z.number(),

// Wall clock time for queued search.
wait: z.number(),

// Aggregate wall clock time spent constructing and pruning the match tree.
// This accounts for time such as lookups in the trigram index.
matchTreeConstruction: z.number(),

// Aggregate wall clock time spent searching the match tree. This accounts
// for the bulk of search work done looking for matches.
matchTreeSearch: z.number(),

// Number of times regexp was called on files that we evaluated.
regexpsConsidered: z.number(),

// FlushReason explains why results were flushed.
flushReason: z.number(),
});

export const searchResponseSchema = z.object({
zoektStats: z.object({
// The duration (in nanoseconds) of the search.
duration: z.number(),
fileCount: z.number(),
matchCount: z.number(),
filesSkipped: z.number(),
contentBytesLoaded: z.number(),
indexBytesLoaded: z.number(),
crashes: z.number(),
shardFilesConsidered: z.number(),
filesConsidered: z.number(),
filesLoaded: z.number(),
shardsScanned: z.number(),
shardsSkipped: z.number(),
shardsSkippedFilter: z.number(),
ngramMatches: z.number(),
ngramLookups: z.number(),
wait: z.number(),
matchTreeConstruction: z.number(),
matchTreeSearch: z.number(),
regexpsConsidered: z.number(),
flushReason: z.number(),
}),
stats: searchStatsSchema,
files: z.array(z.object({
fileName: z.object({
// The name of the file
Expand All @@ -90,6 +140,7 @@ export const searchResponseSchema = z.object({
})),
repositoryInfo: z.array(repositoryInfoSchema),
isBranchFilteringEnabled: z.boolean(),
isSearchExhaustive: z.boolean(),
});

enum RepoIndexingStatus {
Expand Down
150 changes: 85 additions & 65 deletions packages/web/src/app/[domain]/search/page.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -21,19 +21,21 @@ import { FilterPanel } from "./components/filterPanel";
import { SearchResultsPanel } from "./components/searchResultsPanel";
import { useDomain } from "@/hooks/useDomain";
import { useToast } from "@/components/hooks/use-toast";
import { RepositoryInfo, SearchResultFile } from "@/features/search/types";
import { RepositoryInfo, SearchResultFile, SearchStats } from "@/features/search/types";
import { AnimatedResizableHandle } from "@/components/ui/animatedResizableHandle";
import { useFilteredMatches } from "./components/filterPanel/useFilterMatches";
import { Button } from "@/components/ui/button";
import { ImperativePanelHandle } from "react-resizable-panels";
import { FilterIcon } from "lucide-react";
import { AlertTriangleIcon, BugIcon, FilterIcon } from "lucide-react";
import { useHotkeys } from "react-hotkeys-hook";
import { useLocalStorage } from "@uidotdev/usehooks";
import { Tooltip, TooltipContent, TooltipTrigger } from "@/components/ui/tooltip";
import { KeyboardShortcutHint } from "@/app/components/keyboardShortcutHint";
import { SearchBar } from "../components/searchBar";
import { CodeSnippet } from "@/app/components/codeSnippet";
import { CopyIconButton } from "../components/copyIconButton";

const DEFAULT_MAX_MATCH_COUNT = 10000;
const DEFAULT_MAX_MATCH_COUNT = 500;

export default function SearchPage() {
// We need a suspense boundary here since we are accessing query params
Expand All @@ -58,7 +60,12 @@ const SearchPageInternal = () => {
const _maxMatchCount = parseInt(useNonEmptyQueryParam(SearchQueryParams.matches) ?? `${DEFAULT_MAX_MATCH_COUNT}`);
const maxMatchCount = isNaN(_maxMatchCount) ? DEFAULT_MAX_MATCH_COUNT : _maxMatchCount;

const { data: searchResponse, isLoading: isSearchLoading, error } = useQuery({
const {
data: searchResponse,
isPending: isSearchPending,
isFetching: isFetching,
error
} = useQuery({
queryKey: ["search", searchQuery, maxMatchCount],
queryFn: () => measure(() => unwrapServiceError(search({
query: searchQuery,
Expand All @@ -68,12 +75,12 @@ const SearchPageInternal = () => {
}, domain)), "client.search"),
select: ({ data, durationMs }) => ({
...data,
durationMs,
totalClientSearchDurationMs: durationMs,
}),
enabled: searchQuery.length > 0,
refetchOnWindowFocus: false,
retry: false,
staleTime: Infinity,
staleTime: 0,
});

useEffect(() => {
Expand Down Expand Up @@ -109,58 +116,31 @@ const SearchPageInternal = () => {
const fileLanguages = searchResponse.files?.map(file => file.language) || [];

captureEvent("search_finished", {
durationMs: searchResponse.durationMs,
fileCount: searchResponse.zoektStats.fileCount,
matchCount: searchResponse.zoektStats.matchCount,
filesSkipped: searchResponse.zoektStats.filesSkipped,
contentBytesLoaded: searchResponse.zoektStats.contentBytesLoaded,
indexBytesLoaded: searchResponse.zoektStats.indexBytesLoaded,
crashes: searchResponse.zoektStats.crashes,
shardFilesConsidered: searchResponse.zoektStats.shardFilesConsidered,
filesConsidered: searchResponse.zoektStats.filesConsidered,
filesLoaded: searchResponse.zoektStats.filesLoaded,
shardsScanned: searchResponse.zoektStats.shardsScanned,
shardsSkipped: searchResponse.zoektStats.shardsSkipped,
shardsSkippedFilter: searchResponse.zoektStats.shardsSkippedFilter,
ngramMatches: searchResponse.zoektStats.ngramMatches,
ngramLookups: searchResponse.zoektStats.ngramLookups,
wait: searchResponse.zoektStats.wait,
matchTreeConstruction: searchResponse.zoektStats.matchTreeConstruction,
matchTreeSearch: searchResponse.zoektStats.matchTreeSearch,
regexpsConsidered: searchResponse.zoektStats.regexpsConsidered,
flushReason: searchResponse.zoektStats.flushReason,
durationMs: searchResponse.totalClientSearchDurationMs,
fileCount: searchResponse.stats.fileCount,
matchCount: searchResponse.stats.totalMatchCount,
actualMatchCount: searchResponse.stats.actualMatchCount,
filesSkipped: searchResponse.stats.filesSkipped,
contentBytesLoaded: searchResponse.stats.contentBytesLoaded,
indexBytesLoaded: searchResponse.stats.indexBytesLoaded,
crashes: searchResponse.stats.crashes,
shardFilesConsidered: searchResponse.stats.shardFilesConsidered,
filesConsidered: searchResponse.stats.filesConsidered,
filesLoaded: searchResponse.stats.filesLoaded,
shardsScanned: searchResponse.stats.shardsScanned,
shardsSkipped: searchResponse.stats.shardsSkipped,
shardsSkippedFilter: searchResponse.stats.shardsSkippedFilter,
ngramMatches: searchResponse.stats.ngramMatches,
ngramLookups: searchResponse.stats.ngramLookups,
wait: searchResponse.stats.wait,
matchTreeConstruction: searchResponse.stats.matchTreeConstruction,
matchTreeSearch: searchResponse.stats.matchTreeSearch,
regexpsConsidered: searchResponse.stats.regexpsConsidered,
flushReason: searchResponse.stats.flushReason,
fileLanguages,
});
}, [captureEvent, searchQuery, searchResponse]);

const { fileMatches, searchDurationMs, totalMatchCount, isBranchFilteringEnabled, repositoryInfo, matchCount } = useMemo(() => {
if (!searchResponse) {
return {
fileMatches: [],
searchDurationMs: 0,
totalMatchCount: 0,
isBranchFilteringEnabled: false,
repositoryInfo: {},
matchCount: 0,
};
}

return {
fileMatches: searchResponse.files ?? [],
searchDurationMs: Math.round(searchResponse.durationMs),
totalMatchCount: searchResponse.zoektStats.matchCount,
isBranchFilteringEnabled: searchResponse.isBranchFilteringEnabled,
repositoryInfo: searchResponse.repositoryInfo.reduce((acc, repo) => {
acc[repo.id] = repo;
return acc;
}, {} as Record<number, RepositoryInfo>),
matchCount: searchResponse.stats.matchCount,
}
}, [searchResponse]);

const isMoreResultsButtonVisible = useMemo(() => {
return totalMatchCount > maxMatchCount;
}, [totalMatchCount, maxMatchCount]);

const onLoadMoreResults = useCallback(() => {
const url = createPathWithQueryParams(`/${domain}/search`,
Expand All @@ -183,20 +163,27 @@ const SearchPageInternal = () => {
/>
</TopBar>

{(isSearchLoading) ? (
{(isSearchPending || isFetching) ? (
<div className="flex flex-col items-center justify-center h-full gap-2">
<SymbolIcon className="h-6 w-6 animate-spin" />
<p className="font-semibold text-center">Searching...</p>
</div>
) : error ? (
<div className="flex flex-col items-center justify-center h-full gap-2">
<AlertTriangleIcon className="h-6 w-6" />
<p className="font-semibold text-center">Failed to search</p>
<p className="text-sm text-center">{error.message}</p>
</div>
) : (
<PanelGroup
fileMatches={fileMatches}
isMoreResultsButtonVisible={isMoreResultsButtonVisible}
fileMatches={searchResponse.files}
isMoreResultsButtonVisible={searchResponse.isSearchExhaustive === false}
onLoadMoreResults={onLoadMoreResults}
isBranchFilteringEnabled={isBranchFilteringEnabled}
repoInfo={repositoryInfo}
searchDurationMs={searchDurationMs}
numMatches={matchCount}
isBranchFilteringEnabled={searchResponse.isBranchFilteringEnabled}
repoInfo={searchResponse.repositoryInfo}
searchDurationMs={searchResponse.totalClientSearchDurationMs}
numMatches={searchResponse.stats.actualMatchCount}
searchStats={searchResponse.stats}
/>
)}
</div>
Expand All @@ -208,19 +195,21 @@ interface PanelGroupProps {
isMoreResultsButtonVisible?: boolean;
onLoadMoreResults: () => void;
isBranchFilteringEnabled: boolean;
repoInfo: Record<number, RepositoryInfo>;
repoInfo: RepositoryInfo[];
searchDurationMs: number;
numMatches: number;
searchStats?: SearchStats;
}

const PanelGroup = ({
fileMatches,
isMoreResultsButtonVisible,
onLoadMoreResults,
isBranchFilteringEnabled,
repoInfo,
searchDurationMs,
repoInfo: _repoInfo,
searchDurationMs: _searchDurationMs,
numMatches,
searchStats,
}: PanelGroupProps) => {
const [previewedFile, setPreviewedFile] = useState<SearchResultFile | undefined>(undefined);
const filteredFileMatches = useFilteredMatches(fileMatches);
Expand All @@ -241,6 +230,17 @@ const PanelGroup = ({
description: "Toggle filter panel",
});

const searchDurationMs = useMemo(() => {
return Math.round(_searchDurationMs);
}, [_searchDurationMs]);

const repoInfo = useMemo(() => {
return _repoInfo.reduce((acc, repo) => {
acc[repo.id] = repo;
return acc;
}, {} as Record<number, RepositoryInfo>);
}, [_repoInfo]);

return (
<ResizablePanelGroup
direction="horizontal"
Expand Down Expand Up @@ -297,7 +297,27 @@ const PanelGroup = ({
order={2}
>
<div className="py-1 px-2 flex flex-row items-center">
<InfoCircledIcon className="w-4 h-4 mr-2" />
<Tooltip>
<TooltipTrigger asChild>
<InfoCircledIcon className="w-4 h-4 mr-2" />
</TooltipTrigger>
<TooltipContent side="right" className="flex flex-col items-start gap-2 p-4">
<div className="flex flex-row items-center w-full">
<BugIcon className="w-4 h-4 mr-1.5" />
<p className="text-md font-medium">Search stats for nerds</p>
<CopyIconButton
onCopy={() => {
navigator.clipboard.writeText(JSON.stringify(searchStats, null, 2));
return true;
}}
className="ml-auto"
/>
</div>
<CodeSnippet renderNewlines>
{JSON.stringify(searchStats, null, 2)}
</CodeSnippet>
</TooltipContent>
</Tooltip>
{
fileMatches.length > 0 ? (
<p className="text-sm font-medium">{`[${searchDurationMs} ms] Found ${numMatches} matches in ${fileMatches.length} ${fileMatches.length > 1 ? 'files' : 'file'}`}</p>
Expand Down
Loading
Loading