diff --git a/src/app.module.ts b/src/app.module.ts index c67a71ac..5685c254 100644 --- a/src/app.module.ts +++ b/src/app.module.ts @@ -3,8 +3,10 @@ import { APP_GUARD } from '@nestjs/core'; import { ConfigModule } from '@nestjs/config'; import { TypeOrmModule } from '@nestjs/typeorm'; import { ScheduleModule } from '@nestjs/schedule'; + import { AppController } from './app.controller'; import { SearchModule } from './search/search.module'; +import { IndexOptimizationModule } from './database/index-optimization/index-optimization.module'; import { RateLimitingModule } from './rate-limiting/rate-limiting.module'; import { QuotaGuard } from './rate-limiting/guards/quota.guard'; import { getDatabaseConfig } from './config/database.config'; @@ -15,7 +17,6 @@ import { DataPipelineModule } from './data-pipeline/data-pipeline.module'; const featureFlags = loadFeatureFlags(); - @Module({ imports: [ ConfigModule.forRoot({ isGlobal: true }), @@ -23,6 +24,7 @@ const featureFlags = loadFeatureFlags(); ScheduleModule.forRoot(), SessionModule, SearchModule, + IndexOptimizationModule, // ✅ from feat branch ...(featureFlags.ENABLE_RATE_LIMITING ? [RateLimitingModule] : []), DebuggingModule, DataPipelineModule, @@ -32,4 +34,4 @@ const featureFlags = loadFeatureFlags(); ? [{ provide: APP_GUARD, useClass: QuotaGuard }] : [], }) -export class AppModule { } +export class AppModule {} \ No newline at end of file diff --git a/src/database/index-optimization/README.md b/src/database/index-optimization/README.md new file mode 100644 index 00000000..65579783 --- /dev/null +++ b/src/database/index-optimization/README.md @@ -0,0 +1,80 @@ +# Automatic Database Index Optimizer + +PostgreSQL-specific tooling that recommends, creates, monitors and retires +indexes by reading the catalog and the statistics collector (`pg_stat_*`). All +analysis is read-only; the only writes are explicit `CREATE INDEX` / +`DROP INDEX` actions, both gated behind configuration flags. + +## Capabilities + +| Acceptance criterion | Component | +| --------------------------------- | ------------------------------------------- | +| Query analysis for recommendations| `QueryAnalysisService.analyze()` | +| Automatic index creation | `IndexCreationService` | +| Index usage monitoring | `IndexUsageMonitorService` | +| Stale index removal | `StaleIndexService` | +| Scheduled orchestration | `IndexOptimizationService` (`@Cron` weekly) | + +## How recommendations are derived + +1. **Foreign-key columns without an index.** Postgres does not automatically + index FK columns — a frequent cause of slow joins and cascade operations. + The catalog is queried for FK columns whose leading index columns are not + already covered, yielding concrete, safe column suggestions. +2. **Sequential-scan activity.** `pg_stat_user_tables` seq/idx scan counts + score and prioritise the above, and flag heavily seq-scanned tables + (`HIGH_SEQ_SCAN`). +3. **Slow statements.** When `pg_stat_statements` is installed, slow queries + are surfaced for context via `GET .../slow-queries`. + +Generated DDL uses `CREATE INDEX CONCURRENTLY IF NOT EXISTS` so no long write +lock is taken. After creation the index's `indisvalid` flag is verified; a +failed concurrent build leaves an INVALID index, which is dropped automatically. + +## Stale index removal — safety + +An index is only eligible for removal when it is **not** a primary key, **not** +unique, **not** backing any constraint, has scan count ≤ `staleMinScans`, and is +larger than `staleMinSizeBytes`. Drops also use `CONCURRENTLY`. + +## Configuration (all optional) + +| Env var | Default | Purpose | +| -------------------------------- | ------- | ---------------------------------------- | +| `INDEX_OPT_ENABLED` | `false` | Enable the scheduled weekly cycle | +| `INDEX_OPT_DRY_RUN` | `true` | Analyse only; never execute DDL | +| `INDEX_OPT_AUTO_CREATE` | `false` | Allow automatic index creation | +| `INDEX_OPT_AUTO_DROP_STALE` | `false` | Allow automatic stale-index removal | +| `INDEX_OPT_SEQ_SCAN_THRESHOLD` | `1000` | Min seq scans before a table is a candidate | +| `INDEX_OPT_SEQ_SCAN_RATIO` | `0.5` | Min seq/idx scan ratio to flag a table | +| `INDEX_OPT_SLOW_QUERY_MS` | `200` | Mean exec time marking a statement slow | +| `INDEX_OPT_STALE_MIN_SIZE_BYTES` | `1MB` | Ignore stale indexes smaller than this | +| `INDEX_OPT_STALE_MIN_SCANS` | `0` | Scans at/below which an index is stale | +| `INDEX_OPT_MAX_CREATE_PER_RUN` | `3` | Cap on indexes created per cycle | +| `INDEX_OPT_SCHEMA` | `public`| Schema to operate on | + +Even with `INDEX_OPT_ENABLED=true`, creation/drops stay in dry-run until you +also set `INDEX_OPT_DRY_RUN=false` and the relevant `AUTO_*` flag. + +## API (admin only) + +| Method & path | Description | +| -------------------------------------------------- | --------------------------------- | +| `GET /database/index-optimization/recommendations`| Index recommendations | +| `GET /database/index-optimization/slow-queries` | Slow statements (if enabled) | +| `GET /database/index-optimization/usage` | Index usage statistics | +| `GET /database/index-optimization/stale` | Stale indexes eligible for removal| +| `GET /database/index-optimization/last-run` | Summary of the last cycle | +| `POST /database/index-optimization/run?apply=true` | Run a cycle (dry-run unless apply)| + +## Wiring + +```ts +@Module({ + imports: [ + ScheduleModule.forRoot(), // required for the weekly cron + IndexOptimizationModule, + ], +}) +export class AppModule {} +``` diff --git a/src/database/index-optimization/index-optimization.config.ts b/src/database/index-optimization/index-optimization.config.ts new file mode 100644 index 00000000..5e5135d7 --- /dev/null +++ b/src/database/index-optimization/index-optimization.config.ts @@ -0,0 +1,61 @@ +/** + * Centralised configuration for the automatic index optimizer, resolved from + * environment variables. Conservative defaults are chosen so the optimizer is + * safe to enable: it runs in dry-run and never auto-applies DDL unless a human + * opts in. + * + * Env vars (all optional): + * INDEX_OPT_ENABLED – master switch for the scheduled run (default false) + * INDEX_OPT_DRY_RUN – analyse/recommend only, never apply DDL (default true) + * INDEX_OPT_AUTO_CREATE – allow automatic index creation (default false) + * INDEX_OPT_AUTO_DROP_STALE – allow automatic stale-index removal (default false) + * INDEX_OPT_SEQ_SCAN_THRESHOLD – min seq scans before a table is a candidate (default 1000) + * INDEX_OPT_SEQ_SCAN_RATIO – min seq/idx scan ratio to flag a table (default 0.5) + * INDEX_OPT_SLOW_QUERY_MS – mean exec time (ms) marking a statement slow (default 200) + * INDEX_OPT_STALE_MIN_SIZE_BYTES – ignore stale indexes smaller than this (default 1MB) + * INDEX_OPT_STALE_MIN_SCANS – scans at/below which an index is stale (default 0) + * INDEX_OPT_MAX_CREATE_PER_RUN – cap on indexes created in one cycle (default 3) + * INDEX_OPT_SCHEMA – schema to operate on (default public) + */ +export interface IndexOptimizationConfig { + enabled: boolean; + dryRun: boolean; + autoCreate: boolean; + autoDropStale: boolean; + seqScanThreshold: number; + seqScanRatio: number; + slowQueryMs: number; + staleMinSizeBytes: number; + staleMinScans: number; + maxCreatePerRun: number; + schema: string; +} + +const bool = (value: string | undefined, fallback: boolean): boolean => + value === undefined ? fallback : value.toLowerCase() === 'true'; + +const int = (value: string | undefined, fallback: number): number => { + const parsed = parseInt(value ?? '', 10); + return Number.isFinite(parsed) ? parsed : fallback; +}; + +const num = (value: string | undefined, fallback: number): number => { + const parsed = Number(value); + return Number.isFinite(parsed) ? parsed : fallback; +}; + +export function resolveIndexOptimizationConfig(): IndexOptimizationConfig { + return { + enabled: bool(process.env.INDEX_OPT_ENABLED, false), + dryRun: bool(process.env.INDEX_OPT_DRY_RUN, true), + autoCreate: bool(process.env.INDEX_OPT_AUTO_CREATE, false), + autoDropStale: bool(process.env.INDEX_OPT_AUTO_DROP_STALE, false), + seqScanThreshold: int(process.env.INDEX_OPT_SEQ_SCAN_THRESHOLD, 1000), + seqScanRatio: num(process.env.INDEX_OPT_SEQ_SCAN_RATIO, 0.5), + slowQueryMs: num(process.env.INDEX_OPT_SLOW_QUERY_MS, 200), + staleMinSizeBytes: int(process.env.INDEX_OPT_STALE_MIN_SIZE_BYTES, 1024 * 1024), + staleMinScans: int(process.env.INDEX_OPT_STALE_MIN_SCANS, 0), + maxCreatePerRun: int(process.env.INDEX_OPT_MAX_CREATE_PER_RUN, 3), + schema: process.env.INDEX_OPT_SCHEMA ?? 'public', + }; +} diff --git a/src/database/index-optimization/index-optimization.controller.ts b/src/database/index-optimization/index-optimization.controller.ts new file mode 100644 index 00000000..2cc3fa7c --- /dev/null +++ b/src/database/index-optimization/index-optimization.controller.ts @@ -0,0 +1,75 @@ +import { Controller, Get, Post, Query, UseGuards } from '@nestjs/common'; +import { ApiTags, ApiOperation, ApiBearerAuth, ApiQuery } from '@nestjs/swagger'; +import { Roles } from '../../auth/decorators/roles.decorator'; +import { JwtAuthGuard } from '../../auth/guards/jwt-auth.guard'; +import { RolesGuard } from '../../auth/guards/roles.guard'; +import { UserRole } from '../../users/entities/user.entity'; +import { IndexOptimizationService } from './index-optimization.service'; +import { QueryAnalysisService } from './services/query-analysis.service'; +import { IndexUsageMonitorService } from './services/index-usage-monitor.service'; +import { StaleIndexService } from './services/stale-index.service'; + +/** + * Admin API for the database index optimizer. Mutating endpoints require an + * explicit `apply=true` flag so DDL is never executed by accident. + */ +@ApiTags('index-optimization') +@Controller('database/index-optimization') +@UseGuards(JwtAuthGuard, RolesGuard) +@ApiBearerAuth() +export class IndexOptimizationController { + constructor( + private readonly optimizer: IndexOptimizationService, + private readonly analysis: QueryAnalysisService, + private readonly usageMonitor: IndexUsageMonitorService, + private readonly staleIndex: StaleIndexService, + ) {} + + @Get('recommendations') + @Roles(UserRole.ADMIN) + @ApiOperation({ summary: 'Analyse the database and return index recommendations' }) + recommendations() { + return this.analysis.analyze(); + } + + @Get('slow-queries') + @Roles(UserRole.ADMIN) + @ApiOperation({ summary: 'List slow statements from pg_stat_statements (if enabled)' }) + slowQueries() { + return this.analysis.getSlowStatements(); + } + + @Get('usage') + @Roles(UserRole.ADMIN) + @ApiOperation({ summary: 'Get index usage statistics' }) + usage() { + return this.usageMonitor.getSnapshot(); + } + + @Get('stale') + @Roles(UserRole.ADMIN) + @ApiOperation({ summary: 'List indexes judged stale and eligible for removal' }) + stale() { + return this.staleIndex.findStaleIndexes(); + } + + @Get('last-run') + @Roles(UserRole.ADMIN) + @ApiOperation({ summary: 'Get the summary of the last optimization cycle' }) + lastRun() { + return this.optimizer.getLastRun() ?? { message: 'No run recorded yet' }; + } + + @Post('run') + @Roles(UserRole.ADMIN) + @ApiQuery({ + name: 'apply', + required: false, + type: Boolean, + description: 'When true, executes DDL instead of running in dry-run mode', + }) + @ApiOperation({ summary: 'Run a full optimization cycle (dry-run unless apply=true)' }) + run(@Query('apply') apply?: string) { + return this.optimizer.run(apply === 'true'); + } +} diff --git a/src/database/index-optimization/index-optimization.module.ts b/src/database/index-optimization/index-optimization.module.ts new file mode 100644 index 00000000..047f704c --- /dev/null +++ b/src/database/index-optimization/index-optimization.module.ts @@ -0,0 +1,39 @@ +import { Module } from '@nestjs/common'; +import { TypeOrmModule } from '@nestjs/typeorm'; +import { IndexOptimizationController } from './index-optimization.controller'; +import { IndexOptimizationService } from './index-optimization.service'; +import { QueryAnalysisService } from './services/query-analysis.service'; +import { IndexCreationService } from './services/index-creation.service'; +import { IndexUsageMonitorService } from './services/index-usage-monitor.service'; +import { StaleIndexService } from './services/stale-index.service'; + +/** + * IndexOptimizationModule wires the automatic database index optimizer: + * - QueryAnalysisService → index recommendations + * - IndexCreationService → automatic index creation + * - IndexUsageMonitorService → index usage monitoring + * - StaleIndexService → stale index removal + * - IndexOptimizationService → scheduled orchestration of the above + * + * Requires a configured TypeORM DataSource (PostgreSQL) and, for scheduling, + * ScheduleModule.forRoot() registered at the application root. The scheduled + * cycle is inert unless INDEX_OPT_ENABLED=true. + */ +@Module({ + imports: [TypeOrmModule.forFeature([])], + controllers: [IndexOptimizationController], + providers: [ + IndexOptimizationService, + QueryAnalysisService, + IndexCreationService, + IndexUsageMonitorService, + StaleIndexService, + ], + exports: [ + IndexOptimizationService, + QueryAnalysisService, + IndexUsageMonitorService, + StaleIndexService, + ], +}) +export class IndexOptimizationModule {} diff --git a/src/database/index-optimization/index-optimization.service.ts b/src/database/index-optimization/index-optimization.service.ts new file mode 100644 index 00000000..640f5fcd --- /dev/null +++ b/src/database/index-optimization/index-optimization.service.ts @@ -0,0 +1,97 @@ +import { Injectable, Logger } from '@nestjs/common'; +import { Cron, CronExpression } from '@nestjs/schedule'; +import { + resolveIndexOptimizationConfig, + IndexOptimizationConfig, +} from './index-optimization.config'; +import { QueryAnalysisService } from './services/query-analysis.service'; +import { IndexCreationService } from './services/index-creation.service'; +import { IndexUsageMonitorService } from './services/index-usage-monitor.service'; +import { StaleIndexService } from './services/stale-index.service'; +import { IOptimizationRunSummary } from './interfaces/index-optimization.interfaces'; + +/** + * Orchestrates a full index-optimization cycle: + * analyse → create recommended → monitor usage → remove stale + * + * Runs on a weekly schedule when INDEX_OPT_ENABLED=true, and can be triggered + * on demand via the controller. Each stage independently respects the dry-run + * and auto-create / auto-drop flags so an operator can dial in exactly how much + * autonomy the optimizer has. + */ +@Injectable() +export class IndexOptimizationService { + private readonly logger = new Logger(IndexOptimizationService.name); + private readonly config: IndexOptimizationConfig; + private lastRun?: IOptimizationRunSummary; + + constructor( + private readonly analysis: QueryAnalysisService, + private readonly creation: IndexCreationService, + private readonly usageMonitor: IndexUsageMonitorService, + private readonly staleIndex: StaleIndexService, + config?: IndexOptimizationConfig, + ) { + this.config = config ?? resolveIndexOptimizationConfig(); + } + + /** Scheduled weekly run; no-op unless explicitly enabled. */ + @Cron(CronExpression.EVERY_WEEK) + async scheduledRun(): Promise { + if (!this.config.enabled) { + this.logger.debug('Index optimizer disabled (INDEX_OPT_ENABLED=false)'); + return; + } + this.logger.log('Starting scheduled index optimization cycle'); + await this.run(); + } + + /** + * Execute a full cycle. + * @param force when true, applies DDL even if config is dry-run (used by the + * manual "apply" endpoint). Auto-create/auto-drop flags still gate + * destructive vs additive actions. + */ + async run(force = false): Promise { + const startedAt = new Date().toISOString(); + + // 1. Query analysis → recommendations. + const recommendations = await this.analysis.analyze(); + + // 2. Index creation (additive). Gated by autoCreate; dry-run unless forced. + const createDryRun = force ? false : this.config.dryRun || !this.config.autoCreate; + const created = await this.creation.createFromRecommendations( + recommendations, + createDryRun, + ); + + // 3. Usage monitoring snapshot (read-only). + await this.usageMonitor.sample(); + + // 4. Stale index removal (destructive). Gated by autoDropStale. + const dropDryRun = force ? false : this.config.dryRun || !this.config.autoDropStale; + const removedStale = await this.staleIndex.removeStaleIndexes(dropDryRun); + + const summary: IOptimizationRunSummary = { + startedAt, + finishedAt: new Date().toISOString(), + dryRun: createDryRun && dropDryRun, + recommendations, + created, + removedStale, + }; + + this.lastRun = summary; + this.logger.log( + `Index optimization complete: ${recommendations.length} recommendation(s), ` + + `${created.filter((c) => c.created).length} created, ` + + `${removedStale.filter((r) => r.dropped).length} stale removed`, + ); + return summary; + } + + /** Return the summary of the most recent run, if any. */ + getLastRun(): IOptimizationRunSummary | undefined { + return this.lastRun; + } +} diff --git a/src/database/index-optimization/interfaces/index-optimization.interfaces.ts b/src/database/index-optimization/interfaces/index-optimization.interfaces.ts new file mode 100644 index 00000000..ad494eb5 --- /dev/null +++ b/src/database/index-optimization/interfaces/index-optimization.interfaces.ts @@ -0,0 +1,87 @@ +/** + * Type definitions for the automatic database index optimizer. + * + * The optimizer is PostgreSQL-specific: it reads the catalog and the + * statistics collector (pg_stat_*) to recommend, create, monitor and retire + * indexes. All queries are read-only except the explicit create/drop actions. + */ + +/** Why the optimizer believes an index would help. */ +export enum RecommendationReason { + /** Table receives many sequential scans relative to index scans. */ + HIGH_SEQ_SCAN = 'high_seq_scan', + /** A frequently executed / slow statement filters on un-indexed columns. */ + SLOW_QUERY = 'slow_query', +} + +/** A single proposed index. */ +export interface IIndexRecommendation { + table: string; + columns: string[]; + reason: RecommendationReason; + /** Generated, deterministic index name (idx__). */ + suggestedName: string; + /** The DDL that would be executed to create it. */ + ddl: string; + /** Relative priority 0-100; higher means more impactful. */ + score: number; + /** Human-readable rationale for surfacing in dashboards/logs. */ + rationale: string; +} + +/** Result of attempting to create one recommended index. */ +export interface IIndexCreationResult { + suggestedName: string; + table: string; + ddl: string; + /** True when the index was actually created (false in dry-run/skip). */ + created: boolean; + skippedReason?: string; + error?: string; +} + +/** Usage statistics for a single existing index. */ +export interface IIndexUsageStat { + schema: string; + table: string; + indexName: string; + /** Number of index scans initiated on this index. */ + scans: number; + /** Index size in bytes. */ + sizeBytes: number; + isUnique: boolean; + isPrimary: boolean; + /** True when backing a constraint (PK/unique/FK) — never auto-dropped. */ + isConstraint: boolean; +} + +/** An index judged stale and eligible for removal. */ +export interface IStaleIndex { + schema: string; + table: string; + indexName: string; + scans: number; + sizeBytes: number; + /** The DROP DDL that would be executed. */ + ddl: string; + reason: string; +} + +/** Result of attempting to drop a stale index. */ +export interface IStaleIndexRemovalResult { + indexName: string; + table: string; + dropped: boolean; + skippedReason?: string; + error?: string; +} + +/** Summary returned by a full optimization cycle. */ +export interface IOptimizationRunSummary { + startedAt: string; + finishedAt: string; + dryRun: boolean; + recommendations: IIndexRecommendation[]; + created: IIndexCreationResult[]; + removedStale: IStaleIndexRemovalResult[]; +} diff --git a/src/database/index-optimization/services/index-creation.service.ts b/src/database/index-optimization/services/index-creation.service.ts new file mode 100644 index 00000000..7ff0fb39 --- /dev/null +++ b/src/database/index-optimization/services/index-creation.service.ts @@ -0,0 +1,133 @@ +import { Injectable, Logger } from '@nestjs/common'; +import { InjectDataSource } from '@nestjs/typeorm'; +import { DataSource } from 'typeorm'; +import { + resolveIndexOptimizationConfig, + IndexOptimizationConfig, +} from '../index-optimization.config'; +import { + IIndexCreationResult, + IIndexRecommendation, +} from '../interfaces/index-optimization.interfaces'; + +/** + * Applies index recommendations as real DDL. + * + * Safety properties: + * - Uses CREATE INDEX CONCURRENTLY so it never takes a long write lock. + * - Honours dry-run: when enabled, nothing is executed. + * - Caps the number of indexes created per run (maxCreatePerRun). + * - Verifies the resulting index is `valid`; a CONCURRENTLY build that fails + * leaves an INVALID index behind, which is dropped to avoid query planner + * surprises. + */ +@Injectable() +export class IndexCreationService { + private readonly logger = new Logger(IndexCreationService.name); + private readonly config: IndexOptimizationConfig; + + constructor( + @InjectDataSource() private readonly dataSource: DataSource, + config?: IndexOptimizationConfig, + ) { + this.config = config ?? resolveIndexOptimizationConfig(); + } + + /** + * Create indexes for the given recommendations. + * @param dryRun overrides the configured dry-run flag for this call. + */ + async createFromRecommendations( + recommendations: IIndexRecommendation[], + dryRun = this.config.dryRun, + ): Promise { + const results: IIndexCreationResult[] = []; + let createdCount = 0; + + for (const rec of recommendations) { + if (createdCount >= this.config.maxCreatePerRun) { + results.push(this.skip(rec, `per-run create limit (${this.config.maxCreatePerRun}) reached`)); + continue; + } + + if (dryRun) { + results.push(this.skip(rec, 'dry-run')); + continue; + } + + const result = await this.createOne(rec); + results.push(result); + if (result.created) createdCount++; + } + + return results; + } + + /** Execute a single recommendation's DDL with validity verification. */ + async createOne(rec: IIndexRecommendation): Promise { + try { + this.logger.log(`Creating index ${rec.suggestedName} on ${rec.table}`); + // CONCURRENTLY cannot run inside a transaction block; dataSource.query + // executes outside one by default. + await this.dataSource.query(rec.ddl); + + const valid = await this.isIndexValid(rec.suggestedName); + if (!valid) { + await this.dropInvalid(rec.suggestedName); + return { + suggestedName: rec.suggestedName, + table: rec.table, + ddl: rec.ddl, + created: false, + error: 'index build was invalid and has been dropped', + }; + } + + return { + suggestedName: rec.suggestedName, + table: rec.table, + ddl: rec.ddl, + created: true, + }; + } catch (err) { + this.logger.error( + `Failed to create index ${rec.suggestedName}: ${String(err)}`, + ); + return { + suggestedName: rec.suggestedName, + table: rec.table, + ddl: rec.ddl, + created: false, + error: err instanceof Error ? err.message : String(err), + }; + } + } + + private async isIndexValid(indexName: string): Promise { + const rows = (await this.dataSource.query( + `SELECT i.indisvalid AS valid + FROM pg_class c + JOIN pg_index i ON i.indexrelid = c.oid + WHERE c.relname = $1`, + [indexName], + )) as Array<{ valid: boolean }>; + return Boolean(rows[0]?.valid); + } + + private async dropInvalid(indexName: string): Promise { + this.logger.warn(`Dropping invalid index ${indexName}`); + await this.dataSource.query( + `DROP INDEX CONCURRENTLY IF EXISTS "${this.config.schema}"."${indexName}"`, + ); + } + + private skip(rec: IIndexRecommendation, reason: string): IIndexCreationResult { + return { + suggestedName: rec.suggestedName, + table: rec.table, + ddl: rec.ddl, + created: false, + skippedReason: reason, + }; + } +} diff --git a/src/database/index-optimization/services/index-usage-monitor.service.ts b/src/database/index-optimization/services/index-usage-monitor.service.ts new file mode 100644 index 00000000..3cba2fa9 --- /dev/null +++ b/src/database/index-optimization/services/index-usage-monitor.service.ts @@ -0,0 +1,87 @@ +import { Injectable, Logger } from '@nestjs/common'; +import { InjectDataSource } from '@nestjs/typeorm'; +import { DataSource } from 'typeorm'; +import { + resolveIndexOptimizationConfig, + IndexOptimizationConfig, +} from '../index-optimization.config'; +import { IIndexUsageStat } from '../interfaces/index-optimization.interfaces'; + +/** + * Reads index usage from pg_stat_user_indexes and the catalog so operators can + * see which indexes earn their keep. Also classifies each index (primary / + * unique / constraint-backed) so consumers like the stale-index detector know + * what is safe to touch. + */ +@Injectable() +export class IndexUsageMonitorService { + private readonly logger = new Logger(IndexUsageMonitorService.name); + private readonly config: IndexOptimizationConfig; + + /** Last sampled snapshot, kept for cheap health-check reads. */ + private lastSnapshot: IIndexUsageStat[] = []; + private lastSampledAt?: string; + + constructor( + @InjectDataSource() private readonly dataSource: DataSource, + config?: IndexOptimizationConfig, + ) { + this.config = config ?? resolveIndexOptimizationConfig(); + } + + /** Fetch fresh usage stats and cache them. */ + async sample(): Promise { + const rows = await this.dataSource.query( + `SELECT s.schemaname AS schema, + s.relname AS table, + s.indexrelname AS "indexName", + COALESCE(s.idx_scan, 0)::bigint AS scans, + pg_relation_size(s.indexrelid)::bigint AS "sizeBytes", + ix.indisunique AS "isUnique", + ix.indisprimary AS "isPrimary", + EXISTS ( + SELECT 1 FROM pg_constraint con + WHERE con.conindid = s.indexrelid + ) AS "isConstraint" + FROM pg_stat_user_indexes s + JOIN pg_index ix ON ix.indexrelid = s.indexrelid + WHERE s.schemaname = $1 + ORDER BY scans ASC`, + [this.config.schema], + ); + + const stats: IIndexUsageStat[] = (rows as any[]).map((r) => ({ + schema: r.schema, + table: r.table, + indexName: r.indexName, + scans: Number(r.scans), + sizeBytes: Number(r.sizeBytes), + isUnique: Boolean(r.isUnique), + isPrimary: Boolean(r.isPrimary), + isConstraint: Boolean(r.isConstraint), + })); + + this.lastSnapshot = stats; + this.lastSampledAt = new Date().toISOString(); + this.logger.debug(`Sampled usage for ${stats.length} indexes`); + return stats; + } + + /** Return the cached snapshot, sampling lazily if none exists yet. */ + async getSnapshot(): Promise<{ + sampledAt?: string; + indexes: IIndexUsageStat[]; + }> { + if (!this.lastSampledAt) await this.sample(); + return { sampledAt: this.lastSampledAt, indexes: this.lastSnapshot }; + } + + /** + * Indexes whose scan count is at or below the configured stale threshold, + * useful both for reporting and as input to stale-index removal. + */ + async findUnused(): Promise { + const stats = await this.sample(); + return stats.filter((s) => s.scans <= this.config.staleMinScans); + } +} diff --git a/src/database/index-optimization/services/query-analysis.service.ts b/src/database/index-optimization/services/query-analysis.service.ts new file mode 100644 index 00000000..11f7984d --- /dev/null +++ b/src/database/index-optimization/services/query-analysis.service.ts @@ -0,0 +1,243 @@ +import { Injectable, Logger } from '@nestjs/common'; +import { InjectDataSource } from '@nestjs/typeorm'; +import { DataSource } from 'typeorm'; +import { + resolveIndexOptimizationConfig, + IndexOptimizationConfig, +} from '../index-optimization.config'; +import { + IIndexRecommendation, + RecommendationReason, +} from '../interfaces/index-optimization.interfaces'; + +interface TableScanStat { + table: string; + seq_scan: number; + idx_scan: number; + n_live_tup: number; +} + +interface ExistingIndex { + table: string; + index: string; + columns: string[]; +} + +interface ForeignKeyColumns { + table: string; + columns: string[]; +} + +interface SlowStatement { + query: string; + calls: number; + mean_exec_ms: number; +} + +/** + * Analyses PostgreSQL catalog and statistics to recommend indexes. + * + * Two evidence sources are combined: + * 1. Foreign-key columns lacking a supporting index — Postgres does not index + * FK columns automatically, a very common cause of slow joins/cascades. + * These give concrete, safe column recommendations. + * 2. pg_stat_user_tables sequential-scan activity — used to score and + * prioritise the above, and to flag heavily seq-scanned tables. + * + * pg_stat_statements (when installed) is surfaced for slow-query context. + */ +@Injectable() +export class QueryAnalysisService { + private readonly logger = new Logger(QueryAnalysisService.name); + private readonly config: IndexOptimizationConfig; + + constructor( + @InjectDataSource() private readonly dataSource: DataSource, + config?: IndexOptimizationConfig, + ) { + this.config = config ?? resolveIndexOptimizationConfig(); + } + + /** Produce a prioritised list of index recommendations. */ + async analyze(): Promise { + const [scanStats, existingIndexes, fkColumns] = await Promise.all([ + this.getTableScanStats(), + this.getExistingIndexes(), + this.getForeignKeyColumns(), + ]); + + const scanByTable = new Map(scanStats.map((s) => [s.table, s])); + const recommendations: IIndexRecommendation[] = []; + + for (const fk of fkColumns) { + // Skip when an existing index already leads with these columns. + if (this.hasCoveringIndex(existingIndexes, fk.table, fk.columns)) { + continue; + } + + const stat = scanByTable.get(fk.table); + const isHotTable = this.isSeqScanHeavy(stat); + const score = this.scoreRecommendation(stat); + + recommendations.push({ + table: fk.table, + columns: fk.columns, + reason: isHotTable + ? RecommendationReason.HIGH_SEQ_SCAN + : RecommendationReason.SLOW_QUERY, + suggestedName: this.indexName(fk.table, fk.columns), + ddl: this.createIndexDdl(fk.table, fk.columns), + score, + rationale: this.buildRationale(fk, stat), + }); + } + + // Highest impact first. + recommendations.sort((a, b) => b.score - a.score); + this.logger.debug( + `Index analysis produced ${recommendations.length} recommendation(s)`, + ); + return recommendations; + } + + /** + * Return slow statements from pg_stat_statements for diagnostic context. + * Returns an empty array when the extension is not installed. + */ + async getSlowStatements(limit = 20): Promise { + if (!(await this.hasPgStatStatements())) { + this.logger.debug('pg_stat_statements not available; skipping slow-query analysis'); + return []; + } + const rows = await this.query( + `SELECT query, calls, mean_exec_time AS mean_exec_ms + FROM pg_stat_statements + WHERE mean_exec_time >= $1 + ORDER BY mean_exec_time DESC + LIMIT $2`, + [this.config.slowQueryMs, limit], + ); + return rows; + } + + // ─── Catalog / stats queries ──────────────────────────────────────────── + + private getTableScanStats(): Promise { + return this.query( + `SELECT relname AS table, + COALESCE(seq_scan, 0) AS seq_scan, + COALESCE(idx_scan, 0) AS idx_scan, + COALESCE(n_live_tup, 0) AS n_live_tup + FROM pg_stat_user_tables + WHERE schemaname = $1`, + [this.config.schema], + ); + } + + private getExistingIndexes(): Promise { + return this.query( + `SELECT t.relname AS table, + i.relname AS index, + array_agg(a.attname ORDER BY k.ord) AS columns + FROM pg_index ix + JOIN pg_class i ON i.oid = ix.indexrelid + JOIN pg_class t ON t.oid = ix.indrelid + JOIN pg_namespace ns ON ns.oid = t.relnamespace + JOIN unnest(ix.indkey) WITH ORDINALITY AS k(attnum, ord) ON true + JOIN pg_attribute a ON a.attrelid = t.oid AND a.attnum = k.attnum + WHERE ns.nspname = $1 AND k.attnum > 0 + GROUP BY t.relname, i.relname`, + [this.config.schema], + ); + } + + private getForeignKeyColumns(): Promise { + return this.query( + `SELECT cl.relname AS table, + array_agg(att.attname ORDER BY u.ord) AS columns + FROM pg_constraint con + JOIN unnest(con.conkey) WITH ORDINALITY AS u(attnum, ord) ON true + JOIN pg_attribute att ON att.attrelid = con.conrelid AND att.attnum = u.attnum + JOIN pg_class cl ON cl.oid = con.conrelid + JOIN pg_namespace ns ON ns.oid = cl.relnamespace + WHERE con.contype = 'f' AND ns.nspname = $1 + GROUP BY con.oid, cl.relname`, + [this.config.schema], + ); + } + + private async hasPgStatStatements(): Promise { + const rows = await this.query<{ exists: boolean }>( + `SELECT EXISTS ( + SELECT 1 FROM pg_extension WHERE extname = 'pg_stat_statements' + ) AS exists`, + ); + return Boolean(rows[0]?.exists); + } + + // ─── Heuristics ───────────────────────────────────────────────────────── + + /** True when an index already starts with exactly the FK column prefix. */ + private hasCoveringIndex( + indexes: ExistingIndex[], + table: string, + columns: string[], + ): boolean { + return indexes + .filter((ix) => ix.table === table) + .some((ix) => this.startsWith(ix.columns, columns)); + } + + private startsWith(indexCols: string[], wanted: string[]): boolean { + if (indexCols.length < wanted.length) return false; + return wanted.every((c, i) => indexCols[i] === c); + } + + private isSeqScanHeavy(stat?: TableScanStat): boolean { + if (!stat) return false; + const ratio = + stat.idx_scan === 0 ? Infinity : stat.seq_scan / stat.idx_scan; + return ( + stat.seq_scan >= this.config.seqScanThreshold && + ratio >= this.config.seqScanRatio + ); + } + + /** Score 0-100 weighted by seq-scan volume and table size. */ + private scoreRecommendation(stat?: TableScanStat): number { + if (!stat) return 25; + const scanComponent = Math.min( + 60, + (stat.seq_scan / Math.max(this.config.seqScanThreshold, 1)) * 30, + ); + const sizeComponent = Math.min(40, Math.log10(stat.n_live_tup + 1) * 10); + return Math.round(Math.min(100, 20 + scanComponent + sizeComponent)); + } + + private buildRationale(fk: ForeignKeyColumns, stat?: TableScanStat): string { + const cols = fk.columns.join(', '); + const base = `Foreign-key column(s) (${cols}) on "${fk.table}" have no supporting index`; + if (!stat) return `${base}.`; + return ( + `${base}; table has ${stat.seq_scan} sequential scans vs ` + + `${stat.idx_scan} index scans over ~${stat.n_live_tup} live rows.` + ); + } + + // ─── DDL helpers ────────────────────────────────────────────────────────── + + /** Deterministic, collision-resistant index name capped to Postgres' 63 chars. */ + indexName(table: string, columns: string[]): string { + const raw = `idx_${table}_${columns.join('_')}`; + return raw.length <= 63 ? raw : raw.slice(0, 63); + } + + createIndexDdl(table: string, columns: string[]): string { + const cols = columns.map((c) => `"${c}"`).join(', '); + return `CREATE INDEX CONCURRENTLY IF NOT EXISTS "${this.indexName(table, columns)}" ON "${this.config.schema}"."${table}" (${cols})`; + } + + private async query(sql: string, params: unknown[] = []): Promise { + return this.dataSource.query(sql, params) as Promise; + } +} diff --git a/src/database/index-optimization/services/stale-index.service.ts b/src/database/index-optimization/services/stale-index.service.ts new file mode 100644 index 00000000..0e21ac13 --- /dev/null +++ b/src/database/index-optimization/services/stale-index.service.ts @@ -0,0 +1,103 @@ +import { Injectable, Logger } from '@nestjs/common'; +import { InjectDataSource } from '@nestjs/typeorm'; +import { DataSource } from 'typeorm'; +import { + resolveIndexOptimizationConfig, + IndexOptimizationConfig, +} from '../index-optimization.config'; +import { IndexUsageMonitorService } from './index-usage-monitor.service'; +import { + IStaleIndex, + IStaleIndexRemovalResult, +} from '../interfaces/index-optimization.interfaces'; + +/** + * Detects and (optionally) removes stale indexes — those that have never been + * scanned and are large enough to be worth reclaiming. + * + * Hard safety rules: primary keys, unique indexes and any constraint-backed + * index are NEVER considered stale, because dropping them changes semantics or + * is outright disallowed. + */ +@Injectable() +export class StaleIndexService { + private readonly logger = new Logger(StaleIndexService.name); + private readonly config: IndexOptimizationConfig; + + constructor( + @InjectDataSource() private readonly dataSource: DataSource, + private readonly usageMonitor: IndexUsageMonitorService, + config?: IndexOptimizationConfig, + ) { + this.config = config ?? resolveIndexOptimizationConfig(); + } + + /** Identify indexes eligible for removal. */ + async findStaleIndexes(): Promise { + const unused = await this.usageMonitor.findUnused(); + + return unused + .filter( + (idx) => + !idx.isPrimary && + !idx.isUnique && + !idx.isConstraint && + idx.sizeBytes >= this.config.staleMinSizeBytes, + ) + .map((idx) => ({ + schema: idx.schema, + table: idx.table, + indexName: idx.indexName, + scans: idx.scans, + sizeBytes: idx.sizeBytes, + ddl: `DROP INDEX CONCURRENTLY IF EXISTS "${idx.schema}"."${idx.indexName}"`, + reason: + `Index has ${idx.scans} scans (≤ ${this.config.staleMinScans}) and ` + + `occupies ${(idx.sizeBytes / 1024 / 1024).toFixed(1)} MB.`, + })); + } + + /** + * Remove stale indexes. + * @param dryRun overrides the configured dry-run flag for this call. + */ + async removeStaleIndexes( + dryRun = this.config.dryRun, + ): Promise { + const stale = await this.findStaleIndexes(); + const results: IStaleIndexRemovalResult[] = []; + + for (const idx of stale) { + if (dryRun) { + results.push({ + indexName: idx.indexName, + table: idx.table, + dropped: false, + skippedReason: 'dry-run', + }); + continue; + } + results.push(await this.dropOne(idx)); + } + + return results; + } + + private async dropOne(idx: IStaleIndex): Promise { + try { + this.logger.log(`Dropping stale index ${idx.indexName} on ${idx.table}`); + await this.dataSource.query(idx.ddl); + return { indexName: idx.indexName, table: idx.table, dropped: true }; + } catch (err) { + this.logger.error( + `Failed to drop stale index ${idx.indexName}: ${String(err)}`, + ); + return { + indexName: idx.indexName, + table: idx.table, + dropped: false, + error: err instanceof Error ? err.message : String(err), + }; + } + } +}