Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

### Added

- Added config option `settings.maxFileSize` to control the maximum file size zoekt will index. ([#118](https://github.com/sourcebot-dev/sourcebot/pull/118))

## [2.6.0] - 2024-12-02

### Added
Expand Down
10 changes: 9 additions & 1 deletion packages/backend/src/constants.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import { Settings } from "./types.js";

/**
* The interval to reindex a given repository.
Expand All @@ -7,4 +8,11 @@ export const REINDEX_INTERVAL_MS = 1000 * 60 * 60;
/**
* The interval to re-sync the config.
*/
export const RESYNC_CONFIG_INTERVAL_MS = 1000 * 60 * 60 * 24;
export const RESYNC_CONFIG_INTERVAL_MS = 1000 * 60 * 60 * 24;

/**
* Default settings.
*/
export const DEFAULT_SETTINGS: Settings = {
maxFileSize: 2 * 1024 * 1024, // 2MB in bytes
}
32 changes: 32 additions & 0 deletions packages/backend/src/db.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import { expect, test } from 'vitest';
import { migration_addMaxFileSize, migration_addSettings, Schema } from './db';
import { DEFAULT_SETTINGS } from './constants';
import { DeepPartial } from './types';


test('migration_addSettings adds the `settings` field with defaults if it does not exist', () => {
const schema: DeepPartial<Schema> = {};

const migratedSchema = migration_addSettings(schema as Schema);
expect(migratedSchema).toStrictEqual({
settings: DEFAULT_SETTINGS,
});
});

test('migration_addMaxFileSize adds the `maxFileSize` field with the default value if it does not exist', () => {
const schema: DeepPartial<Schema> = {
settings: {},
}

const migratedSchema = migration_addMaxFileSize(schema as Schema);
expect(migratedSchema).toStrictEqual({
settings: {
maxFileSize: DEFAULT_SETTINGS.maxFileSize,
}
});
});

test('migration_addMaxFileSize will throw if `settings` is not defined', () => {
const schema: DeepPartial<Schema> = {};
expect(() => migration_addMaxFileSize(schema as Schema)).toThrow();
});
60 changes: 57 additions & 3 deletions packages/backend/src/db.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,13 @@
import { JSONFilePreset } from "lowdb/node";
import { type Low } from "lowdb";
import { AppContext, Repository } from "./types.js";
import { AppContext, Repository, Settings } from "./types.js";
import { DEFAULT_SETTINGS } from "./constants.js";
import { createLogger } from "./logger.js";

type Schema = {
const logger = createLogger('db');

export type Schema = {
settings: Settings,
repos: {
[key: string]: Repository;
}
Expand All @@ -11,9 +16,16 @@ type Schema = {
export type Database = Low<Schema>;

export const loadDB = async (ctx: AppContext): Promise<Database> => {
const db = await JSONFilePreset<Schema>(`${ctx.cachePath}/db.json`, { repos: {} });
const db = await JSONFilePreset<Schema>(`${ctx.cachePath}/db.json`, {
repos: {},
settings: DEFAULT_SETTINGS,
});

await applyMigrations(db);

return db;
}

export const updateRepository = async (repoId: string, data: Repository, db: Database) => {
db.data.repos[repoId] = {
...db.data.repos[repoId],
Expand All @@ -22,7 +34,49 @@ export const updateRepository = async (repoId: string, data: Repository, db: Dat
await db.write();
}

export const updateSettings = async (settings: Settings, db: Database) => {
db.data.settings = settings;
await db.write();
}

export const createRepository = async (repo: Repository, db: Database) => {
db.data.repos[repo.id] = repo;
await db.write();
}

export const applyMigrations = async (db: Database) => {
const log = (name: string) => {
logger.info(`Applying migration '${name}'`);
}

await db.update((schema) => {
// @NOTE: please ensure new migrations are added after older ones!
schema = migration_addSettings(schema, log);
schema = migration_addMaxFileSize(schema, log);
return schema;
});
}

/**
* @see: https://github.com/sourcebot-dev/sourcebot/pull/118
*/
export const migration_addSettings = (schema: Schema, log?: (name: string) => void) => {
if (!schema.settings) {
log?.("addSettings");
schema.settings = DEFAULT_SETTINGS;
}

return schema;
}

/**
* @see: https://github.com/sourcebot-dev/sourcebot/pull/118
*/
export const migration_addMaxFileSize = (schema: Schema, log?: (name: string) => void) => {
if (!schema.settings.maxFileSize) {
log?.("addMaxFileSize");
schema.settings.maxFileSize = DEFAULT_SETTINGS.maxFileSize;
}

return schema;
}
33 changes: 27 additions & 6 deletions packages/backend/src/main.test.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { expect, test } from 'vitest';
import { isRepoReindxingRequired } from './main';
import { Repository } from './types';
import { isAllRepoReindexingRequired, isRepoReindexingRequired } from './main';
import { Repository, Settings } from './types';

test('isRepoReindexingRequired should return false when no changes are made', () => {
const previous: Repository = {
Expand All @@ -15,7 +15,7 @@ test('isRepoReindexingRequired should return false when no changes are made', ()
};
const current = previous;

expect(isRepoReindxingRequired(previous, current)).toBe(false);
expect(isRepoReindexingRequired(previous, current)).toBe(false);
})

test('isRepoReindexingRequired should return true when git branches change', () => {
Expand All @@ -35,7 +35,7 @@ test('isRepoReindexingRequired should return true when git branches change', ()
branches: ['main', 'feature']
};

expect(isRepoReindxingRequired(previous, current)).toBe(true);
expect(isRepoReindexingRequired(previous, current)).toBe(true);
});

test('isRepoReindexingRequired should return true when git tags change', () => {
Expand All @@ -55,7 +55,7 @@ test('isRepoReindexingRequired should return true when git tags change', () => {
tags: ['v1.0', 'v2.0']
};

expect(isRepoReindxingRequired(previous, current)).toBe(true);
expect(isRepoReindexingRequired(previous, current)).toBe(true);
});

test('isRepoReindexingRequired should return true when local excludedPaths change', () => {
Expand All @@ -74,5 +74,26 @@ test('isRepoReindexingRequired should return true when local excludedPaths chang
excludedPaths: ['node_modules', 'dist']
};

expect(isRepoReindxingRequired(previous, current)).toBe(true);
expect(isRepoReindexingRequired(previous, current)).toBe(true);
});

test('isAllRepoReindexingRequired should return false when fileLimitSize has not changed', () => {
const previous: Settings = {
maxFileSize: 1000,
}
const current: Settings = {
...previous,
}
expect(isAllRepoReindexingRequired(previous, current)).toBe(false);
});

test('isAllRepoReindexingRequired should return true when fileLimitSize has changed', () => {
const previous: Settings = {
maxFileSize: 1000,
}
const current: Settings = {
...previous,
maxFileSize: 2000,
}
expect(isAllRepoReindexingRequired(previous, current)).toBe(true);
});
46 changes: 33 additions & 13 deletions packages/backend/src/main.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,20 +5,20 @@ import { getGitHubReposFromConfig } from "./github.js";
import { getGitLabReposFromConfig } from "./gitlab.js";
import { getGiteaReposFromConfig } from "./gitea.js";
import { getGerritReposFromConfig } from "./gerrit.js";
import { AppContext, LocalRepository, GitRepository, Repository } from "./types.js";
import { AppContext, LocalRepository, GitRepository, Repository, Settings } from "./types.js";
import { cloneRepository, fetchRepository } from "./git.js";
import { createLogger } from "./logger.js";
import { createRepository, Database, loadDB, updateRepository } from './db.js';
import { createRepository, Database, loadDB, updateRepository, updateSettings } from './db.js';
import { arraysEqualShallow, isRemotePath, measure } from "./utils.js";
import { REINDEX_INTERVAL_MS, RESYNC_CONFIG_INTERVAL_MS } from "./constants.js";
import { DEFAULT_SETTINGS, REINDEX_INTERVAL_MS, RESYNC_CONFIG_INTERVAL_MS } from "./constants.js";
import stripJsonComments from 'strip-json-comments';
import { indexGitRepository, indexLocalRepository } from "./zoekt.js";
import { getLocalRepoFromConfig, initLocalRepoFileWatchers } from "./local.js";
import { captureEvent } from "./posthog.js";

const logger = createLogger('main');

const syncGitRepository = async (repo: GitRepository, ctx: AppContext) => {
const syncGitRepository = async (repo: GitRepository, settings: Settings, ctx: AppContext) => {
let fetchDuration_s: number | undefined = undefined;
let cloneDuration_s: number | undefined = undefined;

Expand Down Expand Up @@ -46,7 +46,7 @@ const syncGitRepository = async (repo: GitRepository, ctx: AppContext) => {
}

logger.info(`Indexing ${repo.id}...`);
const { durationMs } = await measure(() => indexGitRepository(repo, ctx));
const { durationMs } = await measure(() => indexGitRepository(repo, settings, ctx));
const indexDuration_s = durationMs / 1000;
logger.info(`Indexed ${repo.id} in ${indexDuration_s}s`);

Expand All @@ -57,18 +57,21 @@ const syncGitRepository = async (repo: GitRepository, ctx: AppContext) => {
}
}

const syncLocalRepository = async (repo: LocalRepository, ctx: AppContext, signal?: AbortSignal) => {
const syncLocalRepository = async (repo: LocalRepository, settings: Settings, ctx: AppContext, signal?: AbortSignal) => {
logger.info(`Indexing ${repo.id}...`);
const { durationMs } = await measure(() => indexLocalRepository(repo, ctx, signal));
const { durationMs } = await measure(() => indexLocalRepository(repo, settings, ctx, signal));
const indexDuration_s = durationMs / 1000;
logger.info(`Indexed ${repo.id} in ${indexDuration_s}s`);
return {
indexDuration_s,
}
}

export const isRepoReindxingRequired = (previous: Repository, current: Repository) => {

/**
* Certain configuration changes (e.g., a branch is added) require
* a reindexing of the repository.
*/
export const isRepoReindexingRequired = (previous: Repository, current: Repository) => {
/**
* Checks if the any of the `revisions` properties have changed.
*/
Expand Down Expand Up @@ -100,6 +103,16 @@ export const isRepoReindxingRequired = (previous: Repository, current: Repositor
)
}

/**
* Certain settings changes (e.g., the file limit size is changed) require
* a reindexing of _all_ repositories.
*/
export const isAllRepoReindexingRequired = (previous: Settings, current: Settings) => {
return (
previous?.maxFileSize !== current?.maxFileSize
)
}

const syncConfig = async (configPath: string, db: Database, signal: AbortSignal, ctx: AppContext) => {
const configContent = await (async () => {
if (isRemotePath(configPath)) {
Expand All @@ -121,6 +134,13 @@ const syncConfig = async (configPath: string, db: Database, signal: AbortSignal,
// @todo: we should validate the configuration file's structure here.
const config = JSON.parse(stripJsonComments(configContent)) as SourcebotConfigurationSchema;

// Update the settings
const updatedSettings: Settings = {
maxFileSize: config.settings?.maxFileSize ?? DEFAULT_SETTINGS.maxFileSize,
}
const _isAllRepoReindexingRequired = isAllRepoReindexingRequired(db.data.settings, updatedSettings);
await updateSettings(updatedSettings, db);

// Fetch all repositories from the config file
let configRepos: Repository[] = [];
for (const repoConfig of config.repos ?? []) {
Expand Down Expand Up @@ -172,7 +192,7 @@ const syncConfig = async (configPath: string, db: Database, signal: AbortSignal,
for (const newRepo of configRepos) {
if (newRepo.id in db.data.repos) {
const existingRepo = db.data.repos[newRepo.id];
const isReindexingRequired = isRepoReindxingRequired(existingRepo, newRepo);
const isReindexingRequired = _isAllRepoReindexingRequired || isRepoReindexingRequired(existingRepo, newRepo);
if (isReindexingRequired) {
logger.info(`Marking ${newRepo.id} for reindexing due to configuration change.`);
}
Expand Down Expand Up @@ -244,7 +264,7 @@ export const main = async (context: AppContext) => {
const localRepos = Object.values(db.data.repos).filter(repo => repo.vcs === 'local');
initLocalRepoFileWatchers(localRepos, async (repo, signal) => {
logger.info(`Change detected to local repository ${repo.id}. Re-syncing...`);
await syncLocalRepository(repo, context, signal);
await syncLocalRepository(repo, db.data.settings, context, signal);
await db.update(({ repos }) => repos[repo.id].lastIndexedDate = new Date().toUTCString());
});
}
Expand Down Expand Up @@ -285,12 +305,12 @@ export const main = async (context: AppContext) => {
let cloneDuration_s: number | undefined;

if (repo.vcs === 'git') {
const stats = await syncGitRepository(repo, context);
const stats = await syncGitRepository(repo, db.data.settings, context);
indexDuration_s = stats.indexDuration_s;
fetchDuration_s = stats.fetchDuration_s;
cloneDuration_s = stats.cloneDuration_s;
} else if (repo.vcs === 'local') {
const stats = await syncLocalRepository(repo, context);
const stats = await syncLocalRepository(repo, db.data.settings, context);
indexDuration_s = stats.indexDuration_s;
}

Expand Down
10 changes: 10 additions & 0 deletions packages/backend/src/schemas/v2.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,21 @@ export type Repos = GitHubConfig | GitLabConfig | GiteaConfig | GerritConfig | L
*/
export interface SourcebotConfigurationSchema {
$schema?: string;
settings?: Settings;
/**
* Defines a collection of repositories from varying code hosts that Sourcebot should sync with.
*/
repos?: Repos[];
}
/**
* Global settings. These settings are applied to all repositories.
*/
export interface Settings {
/**
* The maximum size of a file (in bytes) to be indexed. Files that exceed this maximum will not be inexed. Defaults to 2MB (2097152 bytes).
*/
maxFileSize?: number;
}
export interface GitHubConfig {
/**
* GitHub Configuration
Expand Down
10 changes: 9 additions & 1 deletion packages/backend/src/types.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@

interface BaseRepository {
vcs: 'git' | 'local';
id: string;
Expand Down Expand Up @@ -42,3 +41,12 @@ export type AppContext = {

configPath: string;
}

export type Settings = {
maxFileSize: number;
}

// @see : https://stackoverflow.com/a/61132308
export type DeepPartial<T> = T extends object ? {
[P in keyof T]?: DeepPartial<T[P]>;
} : T;
Loading
Loading