From 310e74a6f6fc851ed1b7fce9693fd84f387c45d1 Mon Sep 17 00:00:00 2001 From: Wayne Sun Date: Sun, 23 Nov 2025 20:07:20 -0500 Subject: [PATCH] feat: add temporal filtering to search and repository APIs Add temporal filtering capabilities for searches by git branch/revision and repository index dates (since/until). Integrates with the refactored QueryIR-based search architecture. - Add gitRevision, since, until parameters to SearchOptions - Implement temporal repo filtering by indexedAt field - Add branch filtering via QueryIR wrapper - Add search_commits MCP tool for commit-based searches - Update list_repos with activeAfter/activeBefore filtering - Add 88 new tests (all passing) Signed-off-by: Wayne Sun --- CHANGELOG.md | 1 + packages/mcp/CHANGELOG.md | 16 + packages/mcp/README.md | 52 +- packages/mcp/src/client.ts | 34 +- packages/mcp/src/index.ts | 116 +++- packages/mcp/src/schemas.ts | 27 +- packages/mcp/src/types.ts | 5 + packages/shared/src/constants.server.ts | 7 + packages/shared/src/index.server.ts | 4 + packages/web/src/actions.ts | 29 +- .../web/src/app/api/(server)/commits/route.ts | 24 + .../web/src/app/api/(server)/repos/route.ts | 12 +- .../web/src/features/search/dateUtils.test.ts | 379 ++++++++++++ packages/web/src/features/search/dateUtils.ts | 186 ++++++ .../web/src/features/search/gitApi.test.ts | 575 ++++++++++++++++++ packages/web/src/features/search/gitApi.ts | 197 ++++++ packages/web/src/features/search/searchApi.ts | 156 ++++- packages/web/src/features/search/types.ts | 14 + 18 files changed, 1805 insertions(+), 29 deletions(-) create mode 100644 packages/shared/src/constants.server.ts create mode 100644 packages/web/src/app/api/(server)/commits/route.ts create mode 100644 packages/web/src/features/search/dateUtils.test.ts create mode 100644 packages/web/src/features/search/dateUtils.ts create mode 100644 packages/web/src/features/search/gitApi.test.ts create mode 100644 packages/web/src/features/search/gitApi.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index fd71cf1e7..e1182097d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added +- Added temporal filtering to search and repository APIs with support for git branch/revision filtering and repository index date filtering (since/until parameters). Supports both ISO 8601 and relative date formats (e.g., "30 days ago", "last week"). - Added support for streaming code search results. [#623](https://github.com/sourcebot-dev/sourcebot/pull/623) - Added buttons to toggle case sensitivity and regex patterns. [#623](https://github.com/sourcebot-dev/sourcebot/pull/623) - Added counts to members, requets, and invites tabs in the members settings. [#621](https://github.com/sourcebot-dev/sourcebot/pull/621) diff --git a/packages/mcp/CHANGELOG.md b/packages/mcp/CHANGELOG.md index e6332a354..c8911dc0f 100644 --- a/packages/mcp/CHANGELOG.md +++ b/packages/mcp/CHANGELOG.md @@ -7,6 +7,22 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Added +- Added comprehensive relative date support for all temporal parameters (e.g., "30 days ago", "last week", "yesterday") +- Added `search_commits` tool to search commits by actual commit time with full temporal filtering. Accepts both numeric database IDs (e.g., 123) and string repository names (e.g., "github.com/owner/repo") for the `repoId` parameter, allowing direct use of repository names from `list_repos` output +- Added `since`/`until` parameters to `search_code` (filters by index time - when Sourcebot indexed the repo) +- Added `gitRevision` parameter to `search_code` +- Added `activeAfter`/`activeBefore` parameters to `list_repos` (filters by index time - when Sourcebot indexed the repo) +- Added date range validation to prevent invalid date ranges (since > until) +- Added 30-second timeout for git operations to handle large repositories +- Added enhanced error messages for git operations (timeout, repository not found, invalid git repository, ambiguous arguments) +- Added clarification that repositories must be cloned on Sourcebot server disk for `search_commits` to work +- Added comprehensive temporal parameter documentation to README with clear distinction between index time and commit time filtering +- Added comprehensive unit tests for date parsing utilities (90+ test cases) +- Added unit tests for git commit search functionality with mocking +- Added integration tests for temporal parameter validation +- Added unit tests for repository identifier resolution (both string and number types) + ### Changed - Updated API client to match the latest Sourcebot release. [#555](https://github.com/sourcebot-dev/sourcebot/pull/555) diff --git a/packages/mcp/README.md b/packages/mcp/README.md index a0a875a0f..fde5d743b 100644 --- a/packages/mcp/README.md +++ b/packages/mcp/README.md @@ -166,6 +166,8 @@ For a more detailed guide, checkout [the docs](https://docs.sourcebot.dev/docs/f Fetches code that matches the provided regex pattern in `query`. +**Temporal Filtering**: Use `since` and `until` to filter by repository index time (when Sourcebot last indexed the repo). This is different from commit time. See `search_commits` for commit-time filtering. +
Parameters @@ -176,6 +178,9 @@ Fetches code that matches the provided regex pattern in `query`. | `filterByLanguages` | no | Restrict search to specific languages (GitHub linguist format, e.g., Python, JavaScript). | | `caseSensitive` | no | Case sensitive search (default: false). | | `includeCodeSnippets` | no | Include code snippets in results (default: false). | +| `gitRevision` | no | Git revision to search (e.g., 'main', 'develop', 'v1.0.0'). Defaults to HEAD. | +| `since` | no | Only search repos indexed after this date. Supports ISO 8601 or relative (e.g., "30 days ago"). | +| `until` | no | Only search repos indexed before this date. Supports ISO 8601 or relative (e.g., "yesterday"). | | `maxTokens` | no | Max tokens to return (default: env.DEFAULT_MINIMUM_TOKENS). |
@@ -184,14 +189,18 @@ Fetches code that matches the provided regex pattern in `query`. Lists repositories indexed by Sourcebot with optional filtering and pagination. +**Temporal Filtering**: Use `activeAfter` and `activeBefore` to filter by repository index time (when Sourcebot last indexed the repo). This is the same filtering behavior as `search_code`'s `since`/`until` parameters. +
Parameters -| Name | Required | Description | -|:-------------|:---------|:--------------------------------------------------------------------| -| `query` | no | Filter repositories by name (case-insensitive). | -| `pageNumber` | no | Page number (1-indexed, default: 1). | -| `limit` | no | Number of repositories per page (default: 50). | +| Name | Required | Description | +|:----------------|:---------|:-----------------------------------------------------------------------------------------------| +| `query` | no | Filter repositories by name (case-insensitive). | +| `pageNumber` | no | Page number (1-indexed, default: 1). | +| `limit` | no | Number of repositories per page (default: 50). | +| `activeAfter` | no | Only return repos indexed after this date. Supports ISO 8601 or relative (e.g., "30 days ago"). | +| `activeBefore` | no | Only return repos indexed before this date. Supports ISO 8601 or relative (e.g., "yesterday"). |
@@ -208,6 +217,39 @@ Fetches the source code for a given file. | `repoId` | yes | The Sourcebot repository ID. | +### search_commits + +Searches for commits in a specific repository based on actual commit time (NOT index time). + +**Requirements**: Repository must be cloned on the Sourcebot server disk. Sourcebot automatically clones repositories during indexing, but the cloning process may not be finished when this query is executed. Use `list_repos` first to get the repository ID. + +**Date Formats**: Supports ISO 8601 dates (e.g., "2024-01-01") and relative formats (e.g., "30 days ago", "last week", "yesterday"). + +
+Parameters + +| Name | Required | Description | +|:-----------|:---------|:-----------------------------------------------------------------------------------------------| +| `repoId` | yes | Repository identifier: either numeric database ID (e.g., 123) or full repository name (e.g., "github.com/owner/repo") as returned by `list_repos`. | +| `query` | no | Search query to filter commits by message (case-insensitive). | +| `since` | no | Show commits after this date (by commit time). Supports ISO 8601 or relative formats. | +| `until` | no | Show commits before this date (by commit time). Supports ISO 8601 or relative formats. | +| `author` | no | Filter by author name or email (supports partial matches). | +| `maxCount` | no | Maximum number of commits to return (default: 50). | + +
+ +## Date Format Examples + +All temporal parameters support: +- **ISO 8601**: `"2024-01-01"`, `"2024-12-31T23:59:59Z"` +- **Relative dates**: `"30 days ago"`, `"1 week ago"`, `"last month"`, `"yesterday"` + +**Important**: Different tools filter by different time dimensions: +- `search_code` `since`/`until`: Filters by **index time** (when Sourcebot indexed the repo) +- `list_repos` `activeAfter`/`activeBefore`: Filters by **index time** (when Sourcebot indexed the repo) +- `search_commits` `since`/`until`: Filters by **commit time** (actual git commit dates) + ## Supported Code Hosts Sourcebot supports the following code hosts: diff --git a/packages/mcp/src/client.ts b/packages/mcp/src/client.ts index 3754c605f..fe3995c32 100644 --- a/packages/mcp/src/client.ts +++ b/packages/mcp/src/client.ts @@ -1,6 +1,6 @@ import { env } from './env.js'; -import { listRepositoriesResponseSchema, searchResponseSchema, fileSourceResponseSchema } from './schemas.js'; -import { FileSourceRequest, FileSourceResponse, ListRepositoriesResponse, SearchRequest, SearchResponse, ServiceError } from './types.js'; +import { listRepositoriesResponseSchema, searchResponseSchema, fileSourceResponseSchema, searchCommitsResponseSchema } from './schemas.js'; +import { FileSourceRequest, FileSourceResponse, ListRepositoriesResponse, SearchRequest, SearchResponse, ServiceError, SearchCommitsRequest, SearchCommitsResponse } from './types.js'; import { isServiceError } from './utils.js'; export const search = async (request: SearchRequest): Promise => { @@ -21,8 +21,16 @@ export const search = async (request: SearchRequest): Promise => { - const result = await fetch(`${env.SOURCEBOT_HOST}/api/repos`, { +export const listRepos = async (params?: { activeAfter?: string, activeBefore?: string }): Promise => { + const url = new URL(`${env.SOURCEBOT_HOST}/api/repos`); + if (params?.activeAfter) { + url.searchParams.append('activeAfter', params.activeAfter); + } + if (params?.activeBefore) { + url.searchParams.append('activeBefore', params.activeBefore); + } + + const result = await fetch(url.toString(), { method: 'GET', headers: { 'Content-Type': 'application/json', @@ -55,3 +63,21 @@ export const getFileSource = async (request: FileSourceRequest): Promise => { + const result = await fetch(`${env.SOURCEBOT_HOST}/api/commits`, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + 'X-Org-Domain': '~', + ...(env.SOURCEBOT_API_KEY ? { 'X-Sourcebot-Api-Key': env.SOURCEBOT_API_KEY } : {}) + }, + body: JSON.stringify(request) + }).then(response => response.json()); + + if (isServiceError(result)) { + return result; + } + + return searchCommitsResponseSchema.parse(result); +} diff --git a/packages/mcp/src/index.ts b/packages/mcp/src/index.ts index a8d178894..f7762dcbb 100644 --- a/packages/mcp/src/index.ts +++ b/packages/mcp/src/index.ts @@ -5,7 +5,7 @@ import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'; import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js'; import escapeStringRegexp from 'escape-string-regexp'; import { z } from 'zod'; -import { listRepos, search, getFileSource } from './client.js'; +import { listRepos, search, getFileSource, searchCommits } from './client.js'; import { env, numberSchema } from './env.js'; import { listReposRequestSchema } from './schemas.js'; import { TextContent } from './types.js'; @@ -49,6 +49,18 @@ server.tool( .boolean() .describe(`Whether to include the code snippets in the response (default: false). If false, only the file's URL, repository, and language will be returned. Set to false to get a more concise response.`) .optional(), + gitRevision: z + .string() + .describe(`The git revision to search in (e.g., 'main', 'HEAD', 'v1.0.0', 'a1b2c3d'). If not provided, defaults to the default branch (usually 'main' or 'master').`) + .optional(), + since: z + .string() + .describe(`Filter repositories by when they were last indexed by Sourcebot (NOT by commit time). Only searches in repos indexed after this date. Supports ISO 8601 (e.g., '2024-01-01') or relative formats (e.g., '30 days ago', 'last week', 'yesterday').`) + .optional(), + until: z + .string() + .describe(`Filter repositories by when they were last indexed by Sourcebot (NOT by commit time). Only searches in repos indexed before this date. Supports ISO 8601 (e.g., '2024-12-31') or relative formats (e.g., 'yesterday').`) + .optional(), maxTokens: numberSchema .describe(`The maximum number of tokens to return (default: ${env.DEFAULT_MINIMUM_TOKENS}). Higher values provide more context but consume more tokens. Values less than ${env.DEFAULT_MINIMUM_TOKENS} will be ignored.`) .transform((val) => (val < env.DEFAULT_MINIMUM_TOKENS ? env.DEFAULT_MINIMUM_TOKENS : val)) @@ -61,6 +73,9 @@ server.tool( maxTokens = env.DEFAULT_MINIMUM_TOKENS, includeCodeSnippets = false, caseSensitive = false, + gitRevision, + since, + until, }) => { if (repoIds.length > 0) { query += ` ( repo:${repoIds.map(id => escapeStringRegexp(id)).join(' or repo:')} )`; @@ -76,6 +91,9 @@ server.tool( contextLines: env.DEFAULT_CONTEXT_LINES, isRegexEnabled: true, isCaseSensitivityEnabled: caseSensitive, + gitRevision, + since, + until, }); if (isServiceError(response)) { @@ -160,16 +178,95 @@ server.tool( } ); +server.tool( + "search_commits", + `Searches for commits in a specific repository based on actual commit time (NOT index time). + + **Requirements**: The repository must be cloned on the Sourcebot server disk. Sourcebot automatically clones repositories during indexing, but the cloning process may not be finished when this query is executed. If the repository is not found on the server disk, an error will be returned asking you to try again later. + + **Date Formats**: Supports ISO 8601 (e.g., "2024-01-01") or relative formats (e.g., "30 days ago", "last week", "yesterday"). + + **YOU MUST** call 'list_repos' first to obtain the exact repository ID. + + If you receive an error that indicates that you're not authenticated, please inform the user to set the SOURCEBOT_API_KEY environment variable.`, + { + repoId: z.union([z.number(), z.string()]).describe(`Repository identifier. Can be either: + - Numeric database ID (e.g., 123) + - Full repository name (e.g., "github.com/owner/repo") as returned by 'list_repos' + + **YOU MUST** call 'list_repos' first to obtain the repository identifier.`), + query: z.string().describe(`Search query to filter commits by message content (case-insensitive).`).optional(), + since: z.string().describe(`Show commits more recent than this date. Filters by actual commit time. Supports ISO 8601 (e.g., '2024-01-01') or relative formats (e.g., '30 days ago', 'last week').`).optional(), + until: z.string().describe(`Show commits older than this date. Filters by actual commit time. Supports ISO 8601 (e.g., '2024-12-31') or relative formats (e.g., 'yesterday').`).optional(), + author: z.string().describe(`Filter commits by author name or email (supports partial matches and patterns).`).optional(), + maxCount: z.number().int().positive().default(50).describe(`Maximum number of commits to return (default: 50).`), + }, + async ({ repoId, query, since, until, author, maxCount }) => { + const result = await searchCommits({ + repoId, + query, + since, + until, + author, + maxCount, + }); + + if (isServiceError(result)) { + return { + content: [{ type: "text", text: `Error: ${result.message}` }], + isError: true, + }; + } + + return { + content: [{ type: "text", text: JSON.stringify(result, null, 2) }], + }; + } +); + server.tool( "list_repos", - "Lists repositories in the organization with optional filtering and pagination. If you receive an error that indicates that you're not authenticated, please inform the user to set the SOURCEBOT_API_KEY environment variable.", - listReposRequestSchema.shape, - async ({ query, pageNumber = 1, limit = 50 }: { + `Lists repositories in the organization with optional filtering and pagination. + + **Temporal Filtering**: When using 'activeAfter' or 'activeBefore', only repositories indexed within the specified timeframe are returned. This filters by when Sourcebot last indexed the repository (indexedAt), NOT by git commit dates. For commit-time filtering, use 'search_commits'. When temporal filters are applied, the output includes a 'lastIndexed' field showing when each repository was last indexed. + + **Date Formats**: Supports ISO 8601 (e.g., "2024-01-01") and relative dates (e.g., "30 days ago", "last week", "yesterday"). + + If you receive an error that indicates that you're not authenticated, please inform the user to set the SOURCEBOT_API_KEY environment variable.`, + { + query: z + .string() + .describe("Filter repositories by name (case-insensitive).") + .optional(), + pageNumber: z + .number() + .int() + .positive() + .describe("Page number (1-indexed, default: 1)") + .default(1), + limit: z + .number() + .int() + .positive() + .describe("Number of repositories per page (default: 50)") + .default(50), + activeAfter: z + .string() + .describe("Only return repositories indexed after this date (filters by indexedAt). Supports ISO 8601 (e.g., '2024-01-01') or relative formats (e.g., '30 days ago', 'last week').") + .optional(), + activeBefore: z + .string() + .describe("Only return repositories indexed before this date (filters by indexedAt). Supports ISO 8601 (e.g., '2024-12-31') or relative formats (e.g., 'yesterday').") + .optional(), + }, + async ({ query, pageNumber = 1, limit = 50, activeAfter, activeBefore }: { query?: string; pageNumber?: number; limit?: number; + activeAfter?: string; + activeBefore?: string; }) => { - const response = await listRepos(); + const response = await listRepos({ activeAfter, activeBefore }); if (isServiceError(response)) { return { content: [{ @@ -199,9 +296,16 @@ server.tool( // Format output const content: TextContent[] = paginated.map(repo => { + let output = `id: ${repo.repoName}\nurl: ${repo.webUrl}`; + + // Include indexedAt when temporal filtering is used + if ((activeAfter || activeBefore) && repo.indexedAt) { + output += `\nlastIndexed: ${repo.indexedAt.toISOString()}`; + } + return { type: "text", - text: `id: ${repo.repoName}\nurl: ${repo.webUrl}`, + text: output, } }); diff --git a/packages/mcp/src/schemas.ts b/packages/mcp/src/schemas.ts index bab83a0d6..fe48a2c6a 100644 --- a/packages/mcp/src/schemas.ts +++ b/packages/mcp/src/schemas.ts @@ -1,4 +1,4 @@ -// @NOTE : Please keep this file in sync with @sourcebot/web/src/features/search/schemas.ts +// @NOTE : Please keep this file in sync with @sourcebot/web/src/features/search/types.ts // At some point, we should move these to a shared package... import { z } from "zod"; @@ -27,6 +27,9 @@ export const searchOptionsSchema = z.object({ whole: z.boolean().optional(), // Whether to return the whole file as part of the response. isRegexEnabled: z.boolean().optional(), // Whether to enable regular expression search. isCaseSensitivityEnabled: z.boolean().optional(), // Whether to enable case sensitivity. + gitRevision: z.string().optional(), // Filter by git branch/revision. + since: z.string().optional(), // Filter repositories by indexed date (start). Filters by when the repo was last indexed by Sourcebot, not by commit time. + until: z.string().optional(), // Filter repositories by indexed date (end). Filters by when the repo was last indexed by Sourcebot, not by commit time. }); export const searchRequestSchema = z.object({ @@ -142,6 +145,7 @@ export const searchResponseSchema = z.object({ })), repositoryInfo: z.array(repositoryInfoSchema), isSearchExhaustive: z.boolean(), + isBranchFilteringEnabled: z.boolean().optional(), // Whether branch filtering is enabled for this search. }); export const repositoryQuerySchema = z.object({ @@ -192,3 +196,24 @@ export const serviceErrorSchema = z.object({ errorCode: z.string(), message: z.string(), }); + +export const searchCommitsRequestSchema = z.object({ + repoId: z.union([z.number(), z.string()]), + query: z.string().optional(), + since: z.string().optional(), + until: z.string().optional(), + author: z.string().optional(), + maxCount: z.number().int().positive().max(500).optional(), +}); + +export const commitSchema = z.object({ + hash: z.string(), + date: z.string(), + message: z.string(), + refs: z.string(), + body: z.string(), + author_name: z.string(), + author_email: z.string(), +}); + +export const searchCommitsResponseSchema = z.array(commitSchema); diff --git a/packages/mcp/src/types.ts b/packages/mcp/src/types.ts index 9c858fe5b..720867a8f 100644 --- a/packages/mcp/src/types.ts +++ b/packages/mcp/src/types.ts @@ -10,6 +10,8 @@ import { fileSourceRequestSchema, symbolSchema, serviceErrorSchema, + searchCommitsRequestSchema, + searchCommitsResponseSchema, } from "./schemas.js"; import { z } from "zod"; @@ -29,3 +31,6 @@ export type FileSourceResponse = z.infer; export type TextContent = { type: "text", text: string }; export type ServiceError = z.infer; + +export type SearchCommitsRequest = z.infer; +export type SearchCommitsResponse = z.infer; diff --git a/packages/shared/src/constants.server.ts b/packages/shared/src/constants.server.ts new file mode 100644 index 000000000..a9efd184b --- /dev/null +++ b/packages/shared/src/constants.server.ts @@ -0,0 +1,7 @@ +import { env } from "./env.server.js"; +import path from "path"; + +// Guard against env.DATA_CACHE_DIR being undefined (e.g., when SKIP_ENV_VALIDATION=1) +// Use fallback to prevent module load errors in non-runtime contexts like builds +export const REPOS_CACHE_DIR = env.DATA_CACHE_DIR ? path.join(env.DATA_CACHE_DIR, 'repos') : '/tmp/sourcebot/repos'; +export const INDEX_CACHE_DIR = env.DATA_CACHE_DIR ? path.join(env.DATA_CACHE_DIR, 'index') : '/tmp/sourcebot/index'; diff --git a/packages/shared/src/index.server.ts b/packages/shared/src/index.server.ts index fabe608e7..4cb7bffbe 100644 --- a/packages/shared/src/index.server.ts +++ b/packages/shared/src/index.server.ts @@ -26,6 +26,10 @@ export { getConfigSettings, } from "./utils.js"; export * from "./constants.js"; +export { + REPOS_CACHE_DIR, + INDEX_CACHE_DIR, +} from "./constants.server.js"; export { env, resolveEnvironmentVariableOverridesFromConfig, diff --git a/packages/web/src/actions.ts b/packages/web/src/actions.ts index cc239fa9d..98d39271c 100644 --- a/packages/web/src/actions.ts +++ b/packages/web/src/actions.ts @@ -467,15 +467,42 @@ export const getUserApiKeys = async (domain: string): Promise<{ name: string; cr export const getRepos = async ({ where, take, + activeAfter, + activeBefore, }: { where?: Prisma.RepoWhereInput, - take?: number + take?: number, + activeAfter?: string, + activeBefore?: string, } = {}) => sew(() => withOptionalAuthV2(async ({ org, prisma }) => { + // Build temporal filter for indexedAt if activeAfter or activeBefore are provided + const temporalWhere: Prisma.RepoWhereInput = {}; + if (activeAfter || activeBefore) { + const { toDbDate, validateDateRange } = await import('@/features/search/dateUtils'); + + // Validate date range if both dates are provided + if (activeAfter && activeBefore) { + const dateRangeError = validateDateRange(activeAfter, activeBefore); + if (dateRangeError) { + throw new Error(dateRangeError); + } + } + + temporalWhere.indexedAt = {}; + if (activeAfter) { + temporalWhere.indexedAt.gte = toDbDate(activeAfter); + } + if (activeBefore) { + temporalWhere.indexedAt.lte = toDbDate(activeBefore); + } + } + const repos = await prisma.repo.findMany({ where: { orgId: org.id, ...where, + ...temporalWhere, }, take, }); diff --git a/packages/web/src/app/api/(server)/commits/route.ts b/packages/web/src/app/api/(server)/commits/route.ts new file mode 100644 index 000000000..941ca8605 --- /dev/null +++ b/packages/web/src/app/api/(server)/commits/route.ts @@ -0,0 +1,24 @@ +import { searchCommits } from "@/features/search/gitApi"; +import { serviceErrorResponse, schemaValidationError } from "@/lib/serviceError"; +import { isServiceError } from "@/lib/utils"; +import { NextRequest } from "next/server"; +import { searchCommitsRequestSchema } from "@/features/search/types"; + +export async function POST(request: NextRequest): Promise { + const body = await request.json(); + const parsed = await searchCommitsRequestSchema.safeParseAsync(body); + + if (!parsed.success) { + return serviceErrorResponse( + schemaValidationError(parsed.error) + ); + } + + const result = await searchCommits(parsed.data); + + if (isServiceError(result)) { + return serviceErrorResponse(result); + } + + return Response.json(result); +} diff --git a/packages/web/src/app/api/(server)/repos/route.ts b/packages/web/src/app/api/(server)/repos/route.ts index acc3f9ce0..7cfb14e7d 100644 --- a/packages/web/src/app/api/(server)/repos/route.ts +++ b/packages/web/src/app/api/(server)/repos/route.ts @@ -2,10 +2,18 @@ import { getRepos } from "@/actions"; import { serviceErrorResponse } from "@/lib/serviceError"; import { isServiceError } from "@/lib/utils"; import { GetReposResponse } from "@/lib/types"; +import { NextRequest } from "next/server"; -export const GET = async () => { - const response: GetReposResponse = await getRepos(); +export const GET = async (request: NextRequest) => { + const searchParams = request.nextUrl.searchParams; + const activeAfter = searchParams.get('activeAfter') || undefined; + const activeBefore = searchParams.get('activeBefore') || undefined; + + const response: GetReposResponse = await getRepos({ + activeAfter, + activeBefore, + }); if (isServiceError(response)) { return serviceErrorResponse(response); } diff --git a/packages/web/src/features/search/dateUtils.test.ts b/packages/web/src/features/search/dateUtils.test.ts new file mode 100644 index 000000000..a64a83c14 --- /dev/null +++ b/packages/web/src/features/search/dateUtils.test.ts @@ -0,0 +1,379 @@ +import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest'; +import { + parseTemporalDate, + validateDateRange, + toDbDate, + toGitDate, +} from './dateUtils'; + +describe('dateUtils', () => { + // Mock the current time for consistent testing + const MOCK_NOW = new Date('2024-06-15T12:00:00.000Z'); + + beforeEach(() => { + vi.useFakeTimers(); + vi.setSystemTime(MOCK_NOW); + }); + + afterEach(() => { + vi.useRealTimers(); + }); + + describe('parseTemporalDate', () => { + describe('ISO 8601 dates', () => { + it('should parse ISO date (YYYY-MM-DD)', () => { + const result = parseTemporalDate('2024-01-01'); + expect(result).toBe('2024-01-01T00:00:00.000Z'); + }); + + it('should parse ISO datetime with timezone', () => { + const result = parseTemporalDate('2024-01-01T12:30:00Z'); + expect(result).toBe('2024-01-01T12:30:00.000Z'); + }); + + it('should parse ISO datetime without timezone', () => { + const result = parseTemporalDate('2024-01-01T12:30:00'); + expect(result).toBeDefined(); + expect(result).toContain('2024-01-01'); + }); + + it('should return undefined for undefined input', () => { + const result = parseTemporalDate(undefined); + expect(result).toBeUndefined(); + }); + + it('should return undefined for empty string', () => { + const result = parseTemporalDate(''); + expect(result).toBeUndefined(); + }); + }); + + describe('relative dates - yesterday', () => { + it('should parse "yesterday"', () => { + const result = parseTemporalDate('yesterday'); + expect(result).toBe('2024-06-14T12:00:00.000Z'); + }); + + it('should parse "YESTERDAY" (case insensitive)', () => { + const result = parseTemporalDate('YESTERDAY'); + expect(result).toBe('2024-06-14T12:00:00.000Z'); + }); + }); + + describe('relative dates - N units ago', () => { + it('should parse "1 day ago"', () => { + const result = parseTemporalDate('1 day ago'); + expect(result).toBe('2024-06-14T12:00:00.000Z'); + }); + + it('should parse "30 days ago"', () => { + const result = parseTemporalDate('30 days ago'); + expect(result).toBe('2024-05-16T12:00:00.000Z'); + }); + + it('should parse "1 week ago"', () => { + const result = parseTemporalDate('1 week ago'); + expect(result).toBe('2024-06-08T12:00:00.000Z'); + }); + + it('should parse "2 weeks ago"', () => { + const result = parseTemporalDate('2 weeks ago'); + expect(result).toBe('2024-06-01T12:00:00.000Z'); + }); + + it('should parse "1 month ago"', () => { + const result = parseTemporalDate('1 month ago'); + expect(result).toBe('2024-05-15T12:00:00.000Z'); + }); + + it('should parse "3 months ago"', () => { + const result = parseTemporalDate('3 months ago'); + expect(result).toBe('2024-03-15T12:00:00.000Z'); + }); + + it('should parse "1 year ago"', () => { + const result = parseTemporalDate('1 year ago'); + expect(result).toBe('2023-06-15T12:00:00.000Z'); + }); + + it('should parse "2 hours ago"', () => { + const result = parseTemporalDate('2 hours ago'); + expect(result).toBe('2024-06-15T10:00:00.000Z'); + }); + + it('should parse "30 minutes ago"', () => { + const result = parseTemporalDate('30 minutes ago'); + expect(result).toBe('2024-06-15T11:30:00.000Z'); + }); + + it('should parse "45 seconds ago"', () => { + const result = parseTemporalDate('45 seconds ago'); + expect(result).toBe('2024-06-15T11:59:15.000Z'); + }); + + it('should handle singular "day" without "s"', () => { + const result = parseTemporalDate('1 day ago'); + expect(result).toBe('2024-06-14T12:00:00.000Z'); + }); + + it('should be case insensitive', () => { + const result = parseTemporalDate('30 DAYS AGO'); + expect(result).toBe('2024-05-16T12:00:00.000Z'); + }); + }); + + describe('relative dates - last unit', () => { + it('should parse "last week"', () => { + const result = parseTemporalDate('last week'); + expect(result).toBe('2024-06-08T12:00:00.000Z'); + }); + + it('should parse "last month"', () => { + const result = parseTemporalDate('last month'); + expect(result).toBe('2024-05-15T12:00:00.000Z'); + }); + + it('should parse "last year"', () => { + const result = parseTemporalDate('last year'); + expect(result).toBe('2023-06-15T12:00:00.000Z'); + }); + + it('should be case insensitive', () => { + const result = parseTemporalDate('LAST WEEK'); + expect(result).toBe('2024-06-08T12:00:00.000Z'); + }); + }); + + describe('invalid or unknown formats', () => { + it('should return original string for unrecognized format', () => { + const result = parseTemporalDate('some random string'); + expect(result).toBe('some random string'); + }); + + it('should return original string for git-specific formats', () => { + // Git understands these but our parser doesn't convert them + const result = parseTemporalDate('2 weeks 3 days ago'); + expect(result).toBe('2 weeks 3 days ago'); + }); + }); + }); + + describe('validateDateRange', () => { + it('should return null for valid date range', () => { + const error = validateDateRange('2024-01-01', '2024-12-31'); + expect(error).toBeNull(); + }); + + it('should return null when only since is provided', () => { + const error = validateDateRange('2024-01-01', undefined); + expect(error).toBeNull(); + }); + + it('should return null when only until is provided', () => { + const error = validateDateRange(undefined, '2024-12-31'); + expect(error).toBeNull(); + }); + + it('should return null when both are undefined', () => { + const error = validateDateRange(undefined, undefined); + expect(error).toBeNull(); + }); + + it('should return error when since > until', () => { + const error = validateDateRange('2024-12-31', '2024-01-01'); + expect(error).toContain('since'); + expect(error).toContain('until'); + expect(error).toContain('before'); + }); + + it('should validate relative dates', () => { + const error = validateDateRange('30 days ago', '1 day ago'); + expect(error).toBeNull(); + }); + + it('should return error for invalid relative date range', () => { + const error = validateDateRange('1 day ago', '30 days ago'); + expect(error).toContain('since'); + expect(error).toContain('until'); + }); + + it('should handle mixed ISO and relative dates', () => { + const error = validateDateRange('2024-01-01', '30 days ago'); + expect(error).toBeNull(); // 2024-01-01 is before 30 days ago + }); + + it('should return null for same date', () => { + const error = validateDateRange('2024-06-15', '2024-06-15'); + expect(error).toBeNull(); + }); + }); + + describe('toDbDate', () => { + it('should convert ISO date to Date object', () => { + const result = toDbDate('2024-01-01'); + expect(result).toBeInstanceOf(Date); + expect(result?.toISOString()).toBe('2024-01-01T00:00:00.000Z'); + }); + + it('should convert relative date to Date object', () => { + const result = toDbDate('30 days ago'); + expect(result).toBeInstanceOf(Date); + expect(result?.toISOString()).toBe('2024-05-16T12:00:00.000Z'); + }); + + it('should return undefined for undefined input', () => { + const result = toDbDate(undefined); + expect(result).toBeUndefined(); + }); + + it('should return undefined for empty string', () => { + const result = toDbDate(''); + expect(result).toBeUndefined(); + }); + + it('should handle "yesterday"', () => { + const result = toDbDate('yesterday'); + expect(result).toBeInstanceOf(Date); + expect(result?.toISOString()).toBe('2024-06-14T12:00:00.000Z'); + }); + + it('should handle "last week"', () => { + const result = toDbDate('last week'); + expect(result).toBeInstanceOf(Date); + expect(result?.toISOString()).toBe('2024-06-08T12:00:00.000Z'); + }); + }); + + describe('toGitDate', () => { + it('should preserve ISO date format', () => { + const result = toGitDate('2024-01-01'); + expect(result).toBe('2024-01-01'); + }); + + it('should preserve ISO datetime format', () => { + const result = toGitDate('2024-01-01T12:30:00Z'); + expect(result).toBe('2024-01-01T12:30:00Z'); + }); + + it('should preserve "N days ago" format', () => { + const result = toGitDate('30 days ago'); + expect(result).toBe('30 days ago'); + }); + + it('should preserve "yesterday" format', () => { + const result = toGitDate('yesterday'); + expect(result).toBe('yesterday'); + }); + + it('should preserve "last week" format', () => { + const result = toGitDate('last week'); + expect(result).toBe('last week'); + }); + + it('should preserve "last month" format', () => { + const result = toGitDate('last month'); + expect(result).toBe('last month'); + }); + + it('should preserve "last year" format', () => { + const result = toGitDate('last year'); + expect(result).toBe('last year'); + }); + + it('should return undefined for undefined input', () => { + const result = toGitDate(undefined); + expect(result).toBeUndefined(); + }); + + it('should pass through unrecognized format unchanged', () => { + // For formats git doesn't natively understand, pass through to git + const result = toGitDate('some random string'); + expect(result).toBe('some random string'); + }); + + it('should preserve relative time formats', () => { + const result = toGitDate('2 weeks ago'); + expect(result).toBe('2 weeks ago'); + }); + }); + + describe('edge cases', () => { + it('should handle dates at month boundaries', () => { + vi.setSystemTime(new Date('2024-03-31T12:00:00.000Z')); + const result = parseTemporalDate('1 month ago'); + // JavaScript Date handles month rollover + expect(result).toBeDefined(); + }); + + it('should handle dates at year boundaries', () => { + vi.setSystemTime(new Date('2024-01-15T12:00:00.000Z')); + const result = parseTemporalDate('1 month ago'); + expect(result).toBe('2023-12-15T12:00:00.000Z'); + }); + + it('should handle leap year February', () => { + vi.setSystemTime(new Date('2024-03-01T12:00:00.000Z')); + const result = parseTemporalDate('1 month ago'); + expect(result).toBe('2024-02-01T12:00:00.000Z'); + }); + + it('should handle midnight times', () => { + vi.setSystemTime(new Date('2024-06-15T00:00:00.000Z')); + const result = parseTemporalDate('1 day ago'); + expect(result).toBe('2024-06-14T00:00:00.000Z'); + }); + + it('should handle end of day times', () => { + vi.setSystemTime(new Date('2024-06-15T23:59:59.999Z')); + const result = parseTemporalDate('1 day ago'); + expect(result).toBe('2024-06-14T23:59:59.999Z'); + }); + }); + + describe('integration scenarios', () => { + it('should correctly validate a typical user query range', () => { + const since = '30 days ago'; + const until = 'yesterday'; + + const parsedSince = parseTemporalDate(since); + const parsedUntil = parseTemporalDate(until); + const validationError = validateDateRange(since, until); + + expect(parsedSince).toBe('2024-05-16T12:00:00.000Z'); + expect(parsedUntil).toBe('2024-06-14T12:00:00.000Z'); + expect(validationError).toBeNull(); + }); + + it('should correctly convert for database queries', () => { + const since = '7 days ago'; + const until = 'yesterday'; + + const dbSince = toDbDate(since); + const dbUntil = toDbDate(until); + + expect(dbSince).toBeInstanceOf(Date); + expect(dbUntil).toBeInstanceOf(Date); + expect(dbSince!.getTime()).toBeLessThan(dbUntil!.getTime()); + }); + + it('should correctly preserve for git commands', () => { + const since = '30 days ago'; + const until = 'yesterday'; + + const gitSince = toGitDate(since); + const gitUntil = toGitDate(until); + + // Git natively understands these, so they're preserved + expect(gitSince).toBe('30 days ago'); + expect(gitUntil).toBe('yesterday'); + }); + + it('should handle mixed ISO and relative dates in range validation', () => { + const since = '2024-01-01'; + const until = '7 days ago'; + + const validationError = validateDateRange(since, until); + expect(validationError).toBeNull(); + }); + }); +}); diff --git a/packages/web/src/features/search/dateUtils.ts b/packages/web/src/features/search/dateUtils.ts new file mode 100644 index 000000000..f28e9a5bd --- /dev/null +++ b/packages/web/src/features/search/dateUtils.ts @@ -0,0 +1,186 @@ +/** + * Utilities for parsing and validating date parameters for temporal queries. + * Supports both absolute (ISO 8601) and relative date formats. + */ + +/** + * Parse a date string that can be either: + * - ISO 8601 format (e.g., "2024-01-01", "2024-01-01T12:00:00Z") + * - Relative format (e.g., "30 days ago", "1 week ago", "yesterday", "last week") + * + * @param dateStr - The date string to parse + * @returns ISO 8601 string if successfully parsed, original string if not parseable (to allow git to try), or undefined if input is falsy + * + * @example + * parseTemporalDate('2024-01-01') // '2024-01-01T00:00:00.000Z' + * parseTemporalDate('30 days ago') // Calculates and returns ISO string + * parseTemporalDate('yesterday') // Yesterday's date as ISO string + * parseTemporalDate('some-git-format') // 'some-git-format' (passed through) + * parseTemporalDate(undefined) // undefined + */ +export function parseTemporalDate(dateStr: string | undefined): string | undefined { + if (!dateStr) { + return undefined; + } + + // Try parsing as ISO date first + const isoDate = new Date(dateStr); + if (!isNaN(isoDate.getTime())) { + return isoDate.toISOString(); + } + + // Parse relative dates (Git-compatible format) + // Git accepts these natively, but we normalize to ISO for consistency + const lowerStr = dateStr.toLowerCase().trim(); + + // Handle "yesterday" + if (lowerStr === 'yesterday') { + const date = new Date(); + date.setDate(date.getDate() - 1); + return date.toISOString(); + } + + // Handle "N s ago" format + const matchRelative = lowerStr.match(/^(\d+)\s+(second|minute|hour|day|week|month|year)s?\s+ago$/i); + if (matchRelative) { + const amount = parseInt(matchRelative[1]); + const unit = matchRelative[2].toLowerCase(); + const date = new Date(); + + switch (unit) { + case 'second': + date.setSeconds(date.getSeconds() - amount); + break; + case 'minute': + date.setMinutes(date.getMinutes() - amount); + break; + case 'hour': + date.setHours(date.getHours() - amount); + break; + case 'day': + date.setDate(date.getDate() - amount); + break; + case 'week': + date.setDate(date.getDate() - (amount * 7)); + break; + case 'month': + date.setMonth(date.getMonth() - amount); + break; + case 'year': + date.setFullYear(date.getFullYear() - amount); + break; + } + + return date.toISOString(); + } + + // Handle "last " format + const matchLast = lowerStr.match(/^last\s+(week|month|year)$/i); + if (matchLast) { + const unit = matchLast[1].toLowerCase(); + const date = new Date(); + + switch (unit) { + case 'week': + date.setDate(date.getDate() - 7); + break; + case 'month': + date.setMonth(date.getMonth() - 1); + break; + case 'year': + date.setFullYear(date.getFullYear() - 1); + break; + } + + return date.toISOString(); + } + + // If we can't parse it, return the original string + // This allows git log to try parsing it with its own logic + return dateStr; +} + +/** + * Validate that a date range is consistent (since < until). + * + * @param since - Start date (inclusive) + * @param until - End date (inclusive) + * @returns Error message if invalid, null if valid + */ +export function validateDateRange(since: string | undefined, until: string | undefined): string | null { + if (!since || !until) { + return null; // No validation needed if either is missing + } + + const parsedSince = parseTemporalDate(since); + const parsedUntil = parseTemporalDate(until); + + if (!parsedSince || !parsedUntil) { + return null; // Let individual date parsing handle invalid formats + } + + const sinceDate = new Date(parsedSince); + const untilDate = new Date(parsedUntil); + + if (isNaN(sinceDate.getTime()) || isNaN(untilDate.getTime())) { + return null; + } + + if (sinceDate > untilDate) { + return `Invalid date range: 'since' (${since}) must be before 'until' (${until})`; + } + + return null; +} + +/** + * Convert a date to a format suitable for Prisma database queries. + * Returns a Date object or undefined. + * + * @param dateStr - The date string to convert + * @returns Date object or undefined + */ +export function toDbDate(dateStr: string | undefined): Date | undefined { + if (!dateStr) { + return undefined; + } + + const parsed = parseTemporalDate(dateStr); + if (!parsed) { + return undefined; + } + + const date = new Date(parsed); + return isNaN(date.getTime()) ? undefined : date; +} + +/** + * Convert a date to a format suitable for git log commands. + * Git accepts relative formats directly, so we preserve them when possible. + * + * @param dateStr - The date string to convert + * @returns Git-compatible date string or undefined + */ +export function toGitDate(dateStr: string | undefined): string | undefined { + if (!dateStr) { + return undefined; + } + + // Git natively understands these formats, so preserve them + const gitNativeFormats = [ + /^\d+\s+(second|minute|hour|day|week|month|year)s?\s+ago$/i, + /^yesterday$/i, + /^last\s+(week|month|year)$/i, + /^\d{4}-\d{2}-\d{2}$/, // ISO date + /^\d{4}-\d{2}-\d{2}T/, // ISO datetime + ]; + + for (const pattern of gitNativeFormats) { + if (pattern.test(dateStr)) { + return dateStr; // Git can handle this directly + } + } + + // Otherwise, parse and convert to ISO + return parseTemporalDate(dateStr); +} diff --git a/packages/web/src/features/search/gitApi.test.ts b/packages/web/src/features/search/gitApi.test.ts new file mode 100644 index 000000000..1c4a46a58 --- /dev/null +++ b/packages/web/src/features/search/gitApi.test.ts @@ -0,0 +1,575 @@ +import { describe, it, expect, vi, beforeEach } from 'vitest'; +import { searchCommits } from './gitApi'; +import * as dateUtils from './dateUtils'; + +// Mock dependencies +vi.mock('simple-git'); +vi.mock('fs'); +vi.mock('@sourcebot/shared', () => ({ + REPOS_CACHE_DIR: '/mock/cache/dir', +})); +vi.mock('@/lib/serviceError', () => ({ + unexpectedError: (message: string) => ({ + errorCode: 'UNEXPECTED_ERROR', + message, + }), +})); +vi.mock('@/actions', () => ({ + sew: async (fn: () => Promise | T): Promise => { + try { + return await fn(); + } catch (error) { + // Mock sew to convert thrown errors to ServiceError + return { + errorCode: 'UNEXPECTED_ERROR', + message: error instanceof Error ? error.message : String(error), + } as T; + } + }, +})); +// Create a mock findFirst function that we can configure per-test +const mockFindFirst = vi.fn(); + +vi.mock('@/withAuthV2', () => ({ + withOptionalAuthV2: async (fn: (args: { org: { id: number; name: string }; prisma: unknown }) => Promise): Promise => { + // Mock withOptionalAuthV2 to provide org and prisma context + const mockOrg = { id: 1, name: 'test-org' }; + const mockPrisma = { + repo: { + findFirst: mockFindFirst, + }, + }; + return await fn({ org: mockOrg, prisma: mockPrisma }); + }, +})); +vi.mock('@/lib/utils', () => ({ + isServiceError: (obj: unknown): obj is { errorCode: string } => { + return obj !== null && typeof obj === 'object' && 'errorCode' in obj; + }, +})); + +// Import mocked modules +import { simpleGit } from 'simple-git'; +import { existsSync } from 'fs'; + +describe('searchCommits', () => { + const mockGitLog = vi.fn(); + const mockSimpleGit = simpleGit as unknown as vi.Mock; + const mockExistsSync = existsSync as unknown as vi.Mock; + + beforeEach(() => { + vi.clearAllMocks(); + + // Setup default mocks + mockExistsSync.mockReturnValue(true); + mockSimpleGit.mockReturnValue({ + log: mockGitLog, + }); + }); + + describe('repository validation', () => { + it('should return error when repository does not exist on disk', async () => { + mockExistsSync.mockReturnValue(false); + + const result = await searchCommits({ + repoId: 123, + }); + + expect(result).toMatchObject({ + errorCode: 'UNEXPECTED_ERROR', + message: expect.stringContaining('not found on Sourcebot server disk'), + }); + expect(result).toMatchObject({ + message: expect.stringContaining('123'), + }); + }); + + it('should check the correct repository path', async () => { + mockExistsSync.mockReturnValue(false); + + await searchCommits({ + repoId: 456, + }); + + expect(mockExistsSync).toHaveBeenCalledWith('/mock/cache/dir/456'); + }); + }); + + describe('date range validation', () => { + it('should validate date range and return error for invalid range', async () => { + vi.spyOn(dateUtils, 'validateDateRange').mockReturnValue( + 'Invalid date range: since must be before until' + ); + + const result = await searchCommits({ + repoId: 123, + since: '2024-12-31', + until: '2024-01-01', + }); + + expect(result).toMatchObject({ + errorCode: 'UNEXPECTED_ERROR', + message: 'Invalid date range: since must be before until', + }); + }); + + it('should proceed when date range is valid', async () => { + vi.spyOn(dateUtils, 'validateDateRange').mockReturnValue(null); + vi.spyOn(dateUtils, 'toGitDate').mockImplementation((date) => date); + mockGitLog.mockResolvedValue({ all: [] }); + + const result = await searchCommits({ + repoId: 123, + since: '2024-01-01', + until: '2024-12-31', + }); + + expect(Array.isArray(result)).toBe(true); + }); + }); + + describe('date parsing', () => { + it('should parse dates using toGitDate', async () => { + const toGitDateSpy = vi.spyOn(dateUtils, 'toGitDate'); + toGitDateSpy.mockImplementation((date) => date); + mockGitLog.mockResolvedValue({ all: [] }); + + await searchCommits({ + repoId: 123, + since: '30 days ago', + until: 'yesterday', + }); + + expect(toGitDateSpy).toHaveBeenCalledWith('30 days ago'); + expect(toGitDateSpy).toHaveBeenCalledWith('yesterday'); + }); + + it('should pass parsed dates to git log', async () => { + vi.spyOn(dateUtils, 'toGitDate') + .mockReturnValueOnce('2024-01-01') + .mockReturnValueOnce('2024-12-31'); + mockGitLog.mockResolvedValue({ all: [] }); + + await searchCommits({ + repoId: 123, + since: '30 days ago', + until: 'yesterday', + }); + + expect(mockGitLog).toHaveBeenCalledWith( + expect.objectContaining({ + '--since': '2024-01-01', + '--until': '2024-12-31', + }) + ); + }); + }); + + describe('git log options', () => { + beforeEach(() => { + vi.spyOn(dateUtils, 'toGitDate').mockImplementation((date) => date); + mockGitLog.mockResolvedValue({ all: [] }); + }); + + it('should set default maxCount', async () => { + await searchCommits({ + repoId: 123, + }); + + expect(mockGitLog).toHaveBeenCalledWith( + expect.objectContaining({ + maxCount: 50, + }) + ); + }); + + it('should use custom maxCount', async () => { + await searchCommits({ + repoId: 123, + maxCount: 100, + }); + + expect(mockGitLog).toHaveBeenCalledWith( + expect.objectContaining({ + maxCount: 100, + }) + ); + }); + + it('should add --since when since is provided', async () => { + await searchCommits({ + repoId: 123, + since: '30 days ago', + }); + + expect(mockGitLog).toHaveBeenCalledWith( + expect.objectContaining({ + '--since': '30 days ago', + }) + ); + }); + + it('should add --until when until is provided', async () => { + await searchCommits({ + repoId: 123, + until: 'yesterday', + }); + + expect(mockGitLog).toHaveBeenCalledWith( + expect.objectContaining({ + '--until': 'yesterday', + }) + ); + }); + + it('should add --author when author is provided', async () => { + await searchCommits({ + repoId: 123, + author: 'john@example.com', + }); + + expect(mockGitLog).toHaveBeenCalledWith( + expect.objectContaining({ + '--author': 'john@example.com', + }) + ); + }); + + it('should add --grep and --regexp-ignore-case when query is provided', async () => { + await searchCommits({ + repoId: 123, + query: 'fix bug', + }); + + expect(mockGitLog).toHaveBeenCalledWith( + expect.objectContaining({ + '--grep': 'fix bug', + '--regexp-ignore-case': null, + }) + ); + }); + + it('should combine all options', async () => { + await searchCommits({ + repoId: 123, + query: 'feature', + since: '2024-01-01', + until: '2024-12-31', + author: 'jane@example.com', + maxCount: 25, + }); + + expect(mockGitLog).toHaveBeenCalledWith({ + maxCount: 25, + '--since': '2024-01-01', + '--until': '2024-12-31', + '--author': 'jane@example.com', + '--grep': 'feature', + '--regexp-ignore-case': null, + }); + }); + }); + + describe('successful responses', () => { + it('should return commit array from git log', async () => { + const mockCommits = [ + { + hash: 'abc123', + date: '2024-06-15', + message: 'feat: add feature', + refs: 'HEAD -> main', + body: '', + author_name: 'John Doe', + author_email: 'john@example.com', + }, + { + hash: 'def456', + date: '2024-06-14', + message: 'fix: bug fix', + refs: '', + body: '', + author_name: 'Jane Smith', + author_email: 'jane@example.com', + }, + ]; + + mockGitLog.mockResolvedValue({ all: mockCommits }); + + const result = await searchCommits({ + repoId: 123, + }); + + expect(result).toEqual(mockCommits); + }); + + it('should return empty array when no commits match', async () => { + mockGitLog.mockResolvedValue({ all: [] }); + + const result = await searchCommits({ + repoId: 123, + query: 'nonexistent', + }); + + expect(result).toEqual([]); + }); + }); + + describe('error handling', () => { + it('should return error for "not a git repository"', async () => { + mockGitLog.mockRejectedValue(new Error('not a git repository')); + + const result = await searchCommits({ + repoId: 123, + }); + + expect(result).toMatchObject({ + errorCode: 'UNEXPECTED_ERROR', + message: expect.stringContaining('not a valid git repository'), + }); + }); + + it('should return error for "ambiguous argument"', async () => { + mockGitLog.mockRejectedValue(new Error('ambiguous argument')); + + const result = await searchCommits({ + repoId: 123, + since: 'invalid-date', + }); + + expect(result).toMatchObject({ + errorCode: 'UNEXPECTED_ERROR', + message: expect.stringContaining('Invalid git reference or date format'), + }); + }); + + it('should return error for timeout', async () => { + mockGitLog.mockRejectedValue(new Error('timeout exceeded')); + + const result = await searchCommits({ + repoId: 123, + }); + + expect(result).toMatchObject({ + errorCode: 'UNEXPECTED_ERROR', + message: expect.stringContaining('timed out'), + }); + }); + + it('should return ServiceError for other Error instances', async () => { + mockGitLog.mockRejectedValue(new Error('some other error')); + + const result = await searchCommits({ + repoId: 123, + }); + + expect(result).toMatchObject({ + errorCode: 'UNEXPECTED_ERROR', + message: expect.stringContaining('Failed to search commits in repository 123'), + }); + }); + + it('should return ServiceError for non-Error exceptions', async () => { + mockGitLog.mockRejectedValue('string error'); + + const result = await searchCommits({ + repoId: 123, + }); + + expect(result).toMatchObject({ + errorCode: 'UNEXPECTED_ERROR', + message: expect.stringContaining('Failed to search commits in repository 123'), + }); + }); + }); + + describe('git client configuration', () => { + it('should configure simple-git with correct options', async () => { + mockGitLog.mockResolvedValue({ all: [] }); + + await searchCommits({ + repoId: 123, + }); + + expect(mockSimpleGit).toHaveBeenCalledWith({ + baseDir: '/mock/cache/dir/123', + binary: 'git', + maxConcurrentProcesses: 6, + timeout: { + block: 30000, + }, + }); + }); + + it('should create git client for the correct repository path', async () => { + mockGitLog.mockResolvedValue({ all: [] }); + + await searchCommits({ + repoId: 456, + }); + + expect(mockSimpleGit).toHaveBeenCalledWith( + expect.objectContaining({ + baseDir: '/mock/cache/dir/456', + }) + ); + }); + }); + + describe('integration scenarios', () => { + it('should handle a typical commit search with filters', async () => { + const mockCommits = [ + { + hash: 'abc123', + date: '2024-06-10T14:30:00Z', + message: 'fix: resolve authentication bug', + refs: 'HEAD -> main', + body: 'Fixed issue with JWT token validation', + author_name: 'Security Team', + author_email: 'security@example.com', + }, + ]; + + vi.spyOn(dateUtils, 'validateDateRange').mockReturnValue(null); + vi.spyOn(dateUtils, 'toGitDate').mockImplementation((date) => date); + mockGitLog.mockResolvedValue({ all: mockCommits }); + + const result = await searchCommits({ + repoId: 123, + query: 'authentication', + since: '30 days ago', + until: 'yesterday', + author: 'security', + maxCount: 20, + }); + + expect(result).toEqual(mockCommits); + expect(mockGitLog).toHaveBeenCalledWith({ + maxCount: 20, + '--since': '30 days ago', + '--until': 'yesterday', + '--author': 'security', + '--grep': 'authentication', + '--regexp-ignore-case': null, + }); + }); + + it('should handle repository not cloned yet', async () => { + mockExistsSync.mockReturnValue(false); + + const result = await searchCommits({ + repoId: 999, + query: 'feature', + }); + + expect(result).toMatchObject({ + errorCode: 'UNEXPECTED_ERROR', + }); + expect(result).toHaveProperty('message'); + const message = (result as { message: string }).message; + expect(message).toContain('999'); + expect(message).toContain('not found on Sourcebot server disk'); + expect(message).toContain('cloning process may not be finished yet'); + }); + }); + + describe('repository identifier resolution', () => { + beforeEach(() => { + // Reset mockFindFirst before each test in this suite + mockFindFirst.mockReset(); + }); + + it('should accept numeric repository ID', async () => { + mockGitLog.mockResolvedValue({ all: [] }); + + const result = await searchCommits({ + repoId: 123, + }); + + expect(Array.isArray(result)).toBe(true); + expect(mockExistsSync).toHaveBeenCalledWith('/mock/cache/dir/123'); + // mockFindFirst should not be called for numeric IDs + expect(mockFindFirst).not.toHaveBeenCalled(); + }); + + it('should accept string repository name and resolve to numeric ID', async () => { + mockFindFirst.mockResolvedValue({ id: 456 }); + mockGitLog.mockResolvedValue({ all: [] }); + + const result = await searchCommits({ + repoId: 'github.com/owner/repo', + }); + + expect(Array.isArray(result)).toBe(true); + expect(mockExistsSync).toHaveBeenCalledWith('/mock/cache/dir/456'); + expect(mockFindFirst).toHaveBeenCalledWith({ + where: { + name: 'github.com/owner/repo', + orgId: 1, + }, + select: { id: true }, + }); + }); + + it('should return error when string repository name is not found', async () => { + mockFindFirst.mockResolvedValue(null); + + const result = await searchCommits({ + repoId: 'github.com/nonexistent/repo', + }); + + expect(result).toMatchObject({ + errorCode: 'UNEXPECTED_ERROR', + message: expect.stringContaining('Repository "github.com/nonexistent/repo" not found'), + }); + expect(result).toMatchObject({ + message: expect.stringContaining('Use \'list_repos\' to get valid repository identifiers'), + }); + }); + + it('should query database with correct parameters for string repo name', async () => { + mockFindFirst.mockResolvedValue({ id: 789 }); + mockGitLog.mockResolvedValue({ all: [] }); + + await searchCommits({ + repoId: 'github.com/example/project', + }); + + expect(mockFindFirst).toHaveBeenCalledWith({ + where: { + name: 'github.com/example/project', + orgId: 1, + }, + select: { id: true }, + }); + }); + + it('should work with string repo name in full search scenario', async () => { + const mockCommits = [ + { + hash: 'xyz789', + date: '2024-06-20T10:00:00Z', + message: 'feat: new feature', + refs: 'main', + body: 'Added new functionality', + author_name: 'Developer', + author_email: 'dev@example.com', + }, + ]; + + mockFindFirst.mockResolvedValue({ id: 555 }); + vi.spyOn(dateUtils, 'validateDateRange').mockReturnValue(null); + vi.spyOn(dateUtils, 'toGitDate').mockImplementation((date) => date); + mockGitLog.mockResolvedValue({ all: mockCommits }); + + const result = await searchCommits({ + repoId: 'github.com/test/repository', + query: 'feature', + since: '7 days ago', + author: 'Developer', + }); + + expect(result).toEqual(mockCommits); + expect(mockExistsSync).toHaveBeenCalledWith('/mock/cache/dir/555'); + }); + }); +}); diff --git a/packages/web/src/features/search/gitApi.ts b/packages/web/src/features/search/gitApi.ts new file mode 100644 index 000000000..e6e1269f3 --- /dev/null +++ b/packages/web/src/features/search/gitApi.ts @@ -0,0 +1,197 @@ +import { simpleGit } from 'simple-git'; +import { existsSync } from 'fs'; +import { REPOS_CACHE_DIR } from '@sourcebot/shared'; +import path from 'path'; +import { ServiceError, unexpectedError } from '@/lib/serviceError'; +import { sew } from '@/actions'; +import { toGitDate, validateDateRange } from './dateUtils'; +import { withOptionalAuthV2 } from '@/withAuthV2'; +import { isServiceError } from '@/lib/utils'; +import type { PrismaClient } from '@sourcebot/db'; + +const createGitClientForPath = (repoPath: string) => { + return simpleGit({ + baseDir: repoPath, + binary: 'git', + maxConcurrentProcesses: 6, + timeout: { + block: 30000, // 30 second timeout for git operations + }, + }); +} + +/** + * Resolves a repository identifier to a numeric ID. + * Accepts both numeric IDs and string repository names. + * + * @param identifier - Either a numeric repo ID or a string repo name (e.g., "github.com/owner/repo") + * @param orgId - Organization ID to scope the lookup + * @param prisma - Prisma client instance + * @returns Numeric repository ID or ServiceError if not found + */ +const resolveRepoId = async ( + identifier: string | number, + orgId: number, + prisma: PrismaClient +): Promise => { + // If already numeric, return as-is + if (typeof identifier === 'number') { + return identifier; + } + + // Convert string name to numeric ID + const repo = await prisma.repo.findFirst({ + where: { + name: identifier, + orgId: orgId, + }, + select: { id: true } + }); + + if (!repo) { + return unexpectedError( + `Repository "${identifier}" not found. ` + + `Use 'list_repos' to get valid repository identifiers.` + ); + } + + return repo.id; +} + +export interface SearchCommitsRequest { + repoId: string | number; + query?: string; + since?: string; + until?: string; + author?: string; + maxCount?: number; +} + +export interface Commit { + hash: string; + date: string; + message: string; + refs: string; + body: string; + author_name: string; + author_email: string; +} + +/** + * Search commits in a repository using git log. + * + * **Date Formats**: Supports both ISO 8601 dates and relative formats + * (e.g., "30 days ago", "last week", "yesterday"). Git natively handles + * these formats in the --since and --until flags. + * + * **Requirements**: The repository must be cloned on the Sourcebot server disk. + * Sourcebot automatically clones repositories during indexing, but the cloning + * process might not be finished when this query is executed. If the repository + * is not found on the server disk, an error will be returned. + * + * **Repository ID**: Accepts either a numeric database ID or a string repository name + * (e.g., "github.com/owner/repo") as returned by list_repos. + * + * @param request - Search parameters including timeframe filters + * @returns Array of commits or ServiceError + */ +export const searchCommits = async ({ + repoId: repoIdInput, + query, + since, + until, + author, + maxCount = 50, +}: SearchCommitsRequest): Promise => sew(() => + withOptionalAuthV2(async ({ org, prisma }) => { + // Resolve repository identifier to numeric ID + const repoId = await resolveRepoId(repoIdInput, org.id, prisma); + if (isServiceError(repoId)) { + return repoId; + } + + const repoPath = path.join(REPOS_CACHE_DIR, repoId.toString()); + + // Check if repository exists on Sourcebot server disk + if (!existsSync(repoPath)) { + return unexpectedError( + `Repository ${repoId} not found on Sourcebot server disk. ` + + `Sourcebot automatically clones repositories during indexing, but the ` + + `cloning process may not be finished yet. Please try again later. ` + + `Path checked: ${repoPath}` + ); + } + + // Validate date range if both since and until are provided + const dateRangeError = validateDateRange(since, until); + if (dateRangeError) { + return unexpectedError(dateRangeError); + } + + // Parse dates to git-compatible format + const gitSince = toGitDate(since); + const gitUntil = toGitDate(until); + + const git = createGitClientForPath(repoPath); + + try { + const logOptions: Record = { + maxCount, + }; + + if (gitSince) { + logOptions['--since'] = gitSince; + } + + if (gitUntil) { + logOptions['--until'] = gitUntil; + } + + if (author) { + logOptions['--author'] = author; + } + + if (query) { + logOptions['--grep'] = query; + logOptions['--regexp-ignore-case'] = null; // Case insensitive + } + + const log = await git.log(logOptions); + return log.all as unknown as Commit[]; + } catch (error: unknown) { + // Provide detailed error messages for common git errors + const errorMessage = error instanceof Error ? error.message : String(error); + + if (errorMessage.includes('not a git repository')) { + return unexpectedError( + `Invalid git repository at ${repoPath}. ` + + `The directory exists but is not a valid git repository.` + ); + } + + if (errorMessage.includes('ambiguous argument')) { + return unexpectedError( + `Invalid git reference or date format. ` + + `Please check your date parameters: since="${since}", until="${until}"` + ); + } + + if (errorMessage.includes('timeout')) { + return unexpectedError( + `Git operation timed out after 30 seconds for repository ${repoId}. ` + + `The repository may be too large or the git operation is taking too long.` + ); + } + + // Generic error fallback + if (error instanceof Error) { + throw new Error( + `Failed to search commits in repository ${repoId}: ${error.message}` + ); + } else { + throw new Error( + `Failed to search commits in repository ${repoId}: ${errorMessage}` + ); + } + } +})); diff --git a/packages/web/src/features/search/searchApi.ts b/packages/web/src/features/search/searchApi.ts index cf362c861..cb9ebe684 100644 --- a/packages/web/src/features/search/searchApi.ts +++ b/packages/web/src/features/search/searchApi.ts @@ -1,14 +1,13 @@ import { sew } from "@/actions"; import { getRepoPermissionFilterForUser } from "@/prisma"; import { withOptionalAuthV2 } from "@/withAuthV2"; -import { PrismaClient, UserWithAccounts } from "@sourcebot/db"; -import { createLogger, env, hasEntitlement } from "@sourcebot/shared"; +import { PrismaClient, Prisma, UserWithAccounts } from "@sourcebot/db"; +import { env, hasEntitlement } from "@sourcebot/shared"; import { QueryIR } from './ir'; import { parseQuerySyntaxIntoIR } from './parser'; import { SearchOptions } from "./types"; import { createZoektSearchRequest, zoektSearch, zoektStreamSearch } from './zoektSearcher'; - -const logger = createLogger("searchApi"); +import { toDbDate } from './dateUtils'; type QueryStringSearchRequest = { queryType: 'string'; @@ -26,16 +25,33 @@ type QueryIRSearchRequest = { type SearchRequest = QueryStringSearchRequest | QueryIRSearchRequest; export const search = (request: SearchRequest) => sew(() => - withOptionalAuthV2(async ({ prisma, user }) => { - const repoSearchScope = await getAccessibleRepoNamesForUser({ user, prisma }); + withOptionalAuthV2(async ({ prisma, user, org }) => { + // Get repos filtered by permissions (if enabled) + const permissionFilteredRepos = await getAccessibleRepoNamesForUser({ user, prisma }); + + // Get repos filtered by temporal constraints (if specified) + const temporalFilteredRepos = await getTemporallyFilteredRepos({ + prisma, + org, + since: request.options.since, + until: request.options.until + }); + + // Combine filters: intersection of permission and temporal filters + const repoSearchScope = combineRepoFilters(permissionFilteredRepos, temporalFilteredRepos); // If needed, parse the query syntax into the query intermediate representation. - const query = request.queryType === 'string' ? await parseQuerySyntaxIntoIR({ + let query = request.queryType === 'string' ? await parseQuerySyntaxIntoIR({ query: request.query, options: request.options, prisma, }) : request.query; + // Apply branch filtering if gitRevision is specified + if (request.options.gitRevision) { + query = applyBranchFilter(query, request.options.gitRevision); + } + const zoektSearchRequest = await createZoektSearchRequest({ query, options: request.options, @@ -46,16 +62,33 @@ export const search = (request: SearchRequest) => sew(() => })); export const streamSearch = (request: SearchRequest) => sew(() => - withOptionalAuthV2(async ({ prisma, user }) => { - const repoSearchScope = await getAccessibleRepoNamesForUser({ user, prisma }); + withOptionalAuthV2(async ({ prisma, user, org }) => { + // Get repos filtered by permissions (if enabled) + const permissionFilteredRepos = await getAccessibleRepoNamesForUser({ user, prisma }); + + // Get repos filtered by temporal constraints (if specified) + const temporalFilteredRepos = await getTemporallyFilteredRepos({ + prisma, + org, + since: request.options.since, + until: request.options.until + }); + + // Combine filters: intersection of permission and temporal filters + const repoSearchScope = combineRepoFilters(permissionFilteredRepos, temporalFilteredRepos); // If needed, parse the query syntax into the query intermediate representation. - const query = request.queryType === 'string' ? await parseQuerySyntaxIntoIR({ + let query = request.queryType === 'string' ? await parseQuerySyntaxIntoIR({ query: request.query, options: request.options, prisma, }) : request.query; + // Apply branch filtering if gitRevision is specified + if (request.options.gitRevision) { + query = applyBranchFilter(query, request.options.gitRevision); + } + const zoektSearchRequest = await createZoektSearchRequest({ query, options: request.options, @@ -85,3 +118,106 @@ const getAccessibleRepoNamesForUser = async ({ user, prisma }: { user?: UserWith }); return accessibleRepos.map(repo => repo.name); } + +/** + * Returns a list of repository names filtered by temporal constraints (indexedAt). + * If no temporal constraints are specified, returns undefined. + * Note: This filters by when the repo was last indexed by Sourcebot, not by commit time. + */ +const getTemporallyFilteredRepos = async ({ + prisma, + org, + since, + until +}: { + prisma: PrismaClient, + org?: { id: number }, + since?: string, + until?: string +}) => { + // If no temporal filters are specified, return undefined (no filtering) + if (!since && !until) { + return undefined; + } + + // Validate date range if both dates are provided + if (since && until) { + const { validateDateRange } = await import('./dateUtils'); + const dateRangeError = validateDateRange(since, until); + if (dateRangeError) { + throw new Error(dateRangeError); + } + } + + const sinceDate = since ? toDbDate(since) : undefined; + const untilDate = until ? toDbDate(until) : undefined; + + const where: Prisma.RepoWhereInput = {}; + + // Add org filter if org is available + if (org) { + where.orgId = org.id; + } + + // Add temporal filters + where.indexedAt = {}; + if (sinceDate) { + where.indexedAt.gte = sinceDate; + } + if (untilDate) { + where.indexedAt.lte = untilDate; + } + + const repos = await prisma.repo.findMany({ + where, + select: { name: true } + }); + + return repos.map(repo => repo.name); +} + +/** + * Combines permission-based and temporal repo filters. + * Returns the intersection if both filters are present, otherwise returns whichever is defined. + */ +const combineRepoFilters = ( + permissionFiltered: string[] | undefined, + temporalFiltered: string[] | undefined +): string[] | undefined => { + // If neither filter is defined, no filtering + if (!permissionFiltered && !temporalFiltered) { + return undefined; + } + + // If only one filter is defined, use it + if (!permissionFiltered) { + return temporalFiltered; + } + if (!temporalFiltered) { + return permissionFiltered; + } + + // Both filters are defined: return intersection + const temporalSet = new Set(temporalFiltered); + return permissionFiltered.filter(repo => temporalSet.has(repo)); +} + +/** + * Applies branch filtering to a QueryIR by wrapping it with a branch constraint. + */ +const applyBranchFilter = (query: QueryIR, gitRevision: string): QueryIR => { + // Wrap the existing query with a branch filter using the 'and' operator + return { + and: { + children: [ + query, + { + branch: { + pattern: gitRevision, + exact: true + } + } + ] + } + }; +} diff --git a/packages/web/src/features/search/types.ts b/packages/web/src/features/search/types.ts index 90f501821..876fb99a0 100644 --- a/packages/web/src/features/search/types.ts +++ b/packages/web/src/features/search/types.ts @@ -89,6 +89,9 @@ export const searchOptionsSchema = z.object({ whole: z.boolean().optional(), // Whether to return the whole file as part of the response. isRegexEnabled: z.boolean().optional(), // Whether to enable regular expression search. isCaseSensitivityEnabled: z.boolean().optional(), // Whether to enable case sensitivity. + gitRevision: z.string().optional(), // Filter by git branch/revision. + since: z.string().optional(), // Filter repositories by indexed date (start). Filters by when the repo was last indexed by Sourcebot, not by commit time. + until: z.string().optional(), // Filter repositories by indexed date (end). Filters by when the repo was last indexed by Sourcebot, not by commit time. }); export type SearchOptions = z.infer; @@ -103,6 +106,7 @@ export const searchResponseSchema = z.object({ files: z.array(searchFileSchema), repositoryInfo: z.array(repositoryInfoSchema), isSearchExhaustive: z.boolean(), + isBranchFilteringEnabled: z.boolean().optional(), // Whether branch filtering is enabled for this search. }); export type SearchResponse = z.infer; @@ -162,3 +166,13 @@ export const fileSourceResponseSchema = z.object({ webUrl: z.string().optional(), }); export type FileSourceResponse = z.infer; + +export const searchCommitsRequestSchema = z.object({ + repoId: z.union([z.number(), z.string()]), + query: z.string().optional(), + since: z.string().optional(), + until: z.string().optional(), + author: z.string().optional(), + maxCount: z.number().int().positive().max(500).optional(), +}); +export type SearchCommitsRequest = z.infer;