diff --git a/CHANGELOG.md b/CHANGELOG.md index 62c4435..4808573 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,7 @@ ## Unreleased - Added JVM semantic role mapping from Java annotations, imports, inheritance, interfaces, and method signatures. +- Added Ruby and Rails feature mapping while excluding legacy Rails secrets from reviewable config. - Added selected package script mapping for Node workspace packages. - Detected Java/Kotlin language and default Gradle build/test commands for root Gradle projects. - Added FastAPI route feature mapping and kept root/web Python project detection in sync. diff --git a/README.md b/README.md index 687a687..fc5ed97 100644 --- a/README.md +++ b/README.md @@ -53,6 +53,7 @@ validation commands and records a patch attempt under `.clawpatch/`. - Java/Kotlin Gradle source groups and root Gradle build/test commands - JVM semantic roles from Java code evidence such as annotations, imports, interfaces, inheritance, and method signatures +- Ruby project metadata, executables, source groups, RSpec/Minitest suites - Rust `src/main.rs`, `src/bin/*.rs`, `src/lib.rs`, `crates/*`, and `tests/*.rs` - Python project metadata, console scripts, bounded source groups, pytest suites, diff --git a/docs/feature-mapping.md b/docs/feature-mapping.md index c442d75..c7022ea 100644 --- a/docs/feature-mapping.md +++ b/docs/feature-mapping.md @@ -36,6 +36,8 @@ Supported deterministic mappers today: - Python project metadata, console scripts, root app files, bounded source groups, pytest suites, and Flask/FastAPI routes - JVM semantic role groups from Java annotations, imports, inheritance, interfaces, and method signatures +- Ruby project metadata, executables, source groups, RSpec/Minitest suites, + Rails configs, routes, views, assets, and database files - Rust Cargo commands, libraries, workspace crates, and integration tests - SwiftPM executable targets, library targets, and test suites - nested SwiftPM packages @@ -68,6 +70,11 @@ handlers, and FastAPI `@*.get(...)` / `@*.api_route(...)` handlers. Flask and FastAPI route methods are read from list, tuple, or set literals. FastAPI paths can be positional strings or literal `path=` keywords. +Ruby mapping covers project metadata, executables, source groups, RSpec and +Minitest suites, and Rails app structure. Rails legacy `config/secrets.yml` is +not mapped as reviewable config because it can contain provider-sensitive +secrets. + Known gaps: - no Express/Fastify/Hono route mapper yet diff --git a/docs/quickstart.md b/docs/quickstart.md index b69f152..627d81e 100644 --- a/docs/quickstart.md +++ b/docs/quickstart.md @@ -48,6 +48,7 @@ This discovers reviewable features: - Java/Kotlin Gradle modules - Python packages, console scripts, Flask routes, and pytest suites - JVM semantic role groups +- Ruby packages, Rails apps, executables, and tests - Rust crates and binaries - SwiftPM targets and tests - Config files diff --git a/src/detect.ts b/src/detect.ts index aed426f..701967f 100644 --- a/src/detect.ts +++ b/src/detect.ts @@ -150,6 +150,9 @@ async function languageDefaultCommands( ) { return gradleDefaultCommands(root); } + if (languages.includes("ruby")) { + return rubyDefaultCommands(root); + } return { typecheck: null, @@ -241,10 +244,14 @@ async function detectPackageManagers(root: string): Promise { if (!found.some((name) => pythonPackageManagers.has(name)) && (await isPythonProject(root))) { found.push((await pathExists(join(root, "requirements.txt"))) ? "pip" : "python"); } + if ((await isRubyProject(root)) && !found.some((name) => rubyPackageManagers.has(name))) { + found.push((await pathExists(join(root, "Gemfile"))) ? "bundler" : "ruby"); + } return found; } const pythonPackageManagers = new Set(["uv", "poetry", "pdm", "hatch", "pip", "python"]); +const rubyPackageManagers = new Set(["bundler", "ruby"]); async function isRootGradleProject(root: string): Promise { return ( @@ -336,6 +343,39 @@ function pythonRunCommand(runner: string | null, command: string): string { return command; } +async function rubyDefaultCommands(root: string): Promise { + const source = await rubyDependencySource(root); + const hasBundle = await pathExists(join(root, "Gemfile")); + const hasRspec = /\brspec\b/iu.test(source) || (await containsRubySpecFile(root, 5)); + const hasMinitest = /\bminitest\b/iu.test(source) || (await containsRubyTestFile(root, 5)); + const hasRubocop = + /\brubocop\b/iu.test(source) || + (await pathExists(join(root, ".rubocop.yml"))) || + (await pathExists(join(root, ".rubocop_todo.yml"))); + const run = hasBundle ? "bundle exec " : ""; + return { + typecheck: null, + lint: hasRubocop ? `${run}rubocop` : null, + format: null, + test: hasRspec ? `${run}rspec` : hasMinitest ? `${run}rake test` : null, + }; +} + +async function rubyDependencySource(root: string): Promise { + const chunks: string[] = []; + for (const path of ["Gemfile", "gems.rb"]) { + if (await pathExists(join(root, path))) { + chunks.push(await readFile(join(root, path), "utf8")); + } + } + for (const entry of await readdir(root).catch(() => [])) { + if (entry.endsWith(".gemspec")) { + chunks.push(await readFile(join(root, entry), "utf8")); + } + } + return chunks.join("\n"); +} + async function pythonProjectInfo(root: string): Promise { const info: PythonProjectInfo = { dependencies: new Set(), @@ -713,9 +753,29 @@ async function detectFrameworks(root: string, pkg: PackageJson | null): Promise< } } } + for (const name of await detectRubyFrameworks(root)) { + if (!frameworks.includes(name)) { + frameworks.push(name); + } + } + return uniqueStrings(frameworks); +} + +async function detectRubyFrameworks(root: string): Promise { + const source = await rubyDependencySource(root); + const frameworks: string[] = []; + for (const name of ["jekyll", "rails", "sinatra"]) { + if (new RegExp(`\\b${name}\\b`, "iu").test(source)) { + frameworks.push(name); + } + } return frameworks; } +function uniqueStrings(values: string[]): string[] { + return [...new Set(values)]; +} + function dependencyNames(pkg: PackageJson | null): Set { const names = new Set(); for (const field of [pkg?.dependencies, pkg?.devDependencies]) { @@ -740,6 +800,10 @@ async function detectLanguages(root: string): Promise { ["python", "setup.py"], ["python", "setup.cfg"], ["python", "requirements.txt"], + ["ruby", "Gemfile"], + ["ruby", "gems.rb"], + ["ruby", "Rakefile"], + ["ruby", "config.ru"], ]; const languages: string[] = []; for (const [language, file] of checks) { @@ -753,6 +817,9 @@ async function detectLanguages(root: string): Promise { if (!languages.includes("java") && (await containsReviewableJavaFile(root))) { languages.push("java"); } + if (!languages.includes("ruby") && (await isRubyProject(root))) { + languages.push("ruby"); + } if ( !languages.includes("swift") && ((await containsFileNamed(root, "Package.swift", 5)) || @@ -800,6 +867,19 @@ async function isPythonProject(root: string): Promise { ); } +async function isRubyProject(root: string): Promise { + if ( + (await pathExists(join(root, "Gemfile"))) || + (await pathExists(join(root, "gems.rb"))) || + (await pathExists(join(root, "Rakefile"))) || + (await pathExists(join(root, "config.ru"))) || + (await containsFileWithExtension(root, ".gemspec", 1)) + ) { + return true; + } + return containsReviewableRubyFile(root); +} + async function containsReviewablePythonFile(root: string): Promise { if (await containsRootReviewablePythonFile(root)) { return true; @@ -887,6 +967,23 @@ async function collectPythonFrameworkScanFiles( } } +async function containsReviewableRubyFile(root: string): Promise { + for (const prefix of ["app", "lib", "scripts", "exe", "bin"]) { + if (await containsFileWithExtension(join(root, prefix), ".rb", 4)) { + return true; + } + } + return false; +} + +async function containsRubySpecFile(root: string, maxDepth: number): Promise { + return containsFileMatching(root, maxDepth, (entry) => entry.endsWith("_spec.rb")); +} + +async function containsRubyTestFile(root: string, maxDepth: number): Promise { + return containsFileMatching(root, maxDepth, (entry) => entry.endsWith("_test.rb")); +} + async function containsFileNamed(root: string, name: string, maxDepth: number): Promise { return containsFileMatching(root, maxDepth, (entry) => entry === name); } @@ -947,6 +1044,8 @@ function shouldSkipSearchEntry(entry: string): boolean { ".mypy_cache", ".ruff_cache", ".pytest_cache", + ".bundle", + "vendor", "fixtures", "__fixtures__", "testdata", diff --git a/src/mapper.test.ts b/src/mapper.test.ts index 6f90dcb..c03a369 100644 --- a/src/mapper.test.ts +++ b/src/mapper.test.ts @@ -181,6 +181,202 @@ describe("mapFeatures", () => { expect(cli?.summary).toContain("source src/cli.ts"); }); + it("maps Ruby metadata, executables, source groups, and tests", async () => { + const root = await fixtureRoot("clawpatch-map-ruby-"); + await writeFixture( + root, + "Gemfile", + "source 'https://rubygems.org'\ngem 'rspec'\ngem 'rubocop'\n", + ); + await writeFixture( + root, + "fixture.gemspec", + "Gem::Specification.new do |spec|\n spec.name = 'fixture-ruby'\n spec.add_dependency 'redis'\nend\n", + ); + await writeFixture(root, "Rakefile", "task :default\n"); + await writeFixture(root, "exe/fixture", "#!/usr/bin/env ruby\nputs 'ok'\n"); + await writeFixture(root, "script/helper.rb", "#!/usr/bin/env ruby\nputs 'helper'\n"); + await writeFixture(root, "lib/fixture.rb", "module Fixture\nend\n"); + await writeFixture( + root, + "lib/fixture/client.rb", + "module Fixture\n class Client\n end\nend\n", + ); + for (let index = 0; index < 12; index += 1) { + await writeFixture( + root, + `lib/fixture/type/type${String(index).padStart(2, "0")}.rb`, + "module Fixture\nend\n", + ); + } + await writeFixture(root, "spec/fixture/client_spec.rb", "RSpec.describe Fixture::Client\n"); + await writeFixture(root, "vendor/bundle/ignored.rb", "module Ignored\nend\n"); + + const project = await detectProject(root); + const result = await mapFeatures(root, project, []); + const titles = result.features.map((feature) => feature.title); + const rubyProject = result.features.find( + (feature) => feature.title === "Ruby project fixture-ruby", + ); + const cli = result.features.find((feature) => feature.title === "Ruby CLI command fixture"); + const source = result.features.find((feature) => feature.title === "Ruby source lib/fixture"); + + expect(project.detected.languages).toContain("ruby"); + expect(project.detected.packageManagers).toContain("bundler"); + expect(project.detected.commands).toMatchObject({ + lint: "bundle exec rubocop", + test: "bundle exec rspec", + }); + expect(titles).toContain("Ruby project fixture-ruby"); + expect(titles).toContain("Ruby CLI command fixture"); + expect(titles).toContain("Ruby CLI command helper.rb"); + expect(titles).toContain("Ruby Rake tasks"); + expect(titles).toContain("Ruby source lib"); + expect(titles).toContain("Ruby source lib/fixture"); + expect(titles).toContain("Ruby source lib/fixture/type"); + expect(titles).toContain("Ruby test suite spec"); + expect(rubyProject?.ownedFiles).toContainEqual({ + path: "fixture.gemspec", + reason: "ruby project metadata", + }); + expect(rubyProject?.trustBoundaries).toEqual( + expect.arrayContaining(["database", "network", "serialization"]), + ); + expect(cli?.entrypoints[0]?.path).toBe("exe/fixture"); + expect(source?.ownedFiles.map((ref) => ref.path)).toContain("lib/fixture/client.rb"); + expect(source?.tests).toEqual([ + { path: "spec/fixture/client_spec.rb", command: "bundle exec rspec" }, + ]); + expect( + result.features.flatMap((feature) => feature.ownedFiles.map((ref) => ref.path)), + ).not.toContain("vendor/bundle/ignored.rb"); + }); + + it("maps Gemfile-only Jekyll sites without mistaking dependencies for project names", async () => { + const root = await fixtureRoot("clawpatch-map-jekyll-"); + await writeFixture( + root, + "Gemfile", + "source 'https://rubygems.org'\ngem 'jekyll'\ngem 'jekyll-feed'\ngem 'hive-ruby'\n", + ); + await writeFixture(root, "_config.yml", "title: Docs\n"); + await writeFixture(root, "index.md", "---\nlayout: home\n---\n"); + await writeFixture(root, "_layouts/default.html", "{{ content }}\n"); + await writeFixture(root, "_includes/header.html", "
\n"); + await writeFixture(root, "_sass/site.scss", "body { color: black; }\n"); + await writeFixture(root, "assets/main.scss", "---\n---\n@import 'site';\n"); + await writeFixture(root, "_posts/2021-01-01-one.md", "---\ntitle: One\n---\n"); + await writeFixture(root, "_posts/2022-01-01-two.md", "---\ntitle: Two\n---\n"); + await writeFixture(root, "_topics/ruby.md", "---\ntitle: Ruby\n---\n"); + + const project = await detectProject(root); + const result = await mapFeatures(root, project, []); + const titles = result.features.map((feature) => feature.title); + const rubyProject = result.features.find( + (feature) => feature.title === `Ruby project ${root.split("/").at(-1)}`, + ); + const siteConfig = result.features.find( + (feature) => feature.title === "Jekyll site configuration", + ); + + expect(project.detected.frameworks).toContain("jekyll"); + expect(titles).toContain(`Ruby project ${root.split("/").at(-1)}`); + expect(titles).not.toContain("Ruby project jekyll"); + expect(titles).toContain("Jekyll site configuration"); + expect(titles).toContain("Jekyll theme _layouts"); + expect(titles).toContain("Jekyll theme _includes"); + expect(titles).toContain("Jekyll theme _sass"); + expect(titles).toContain("Jekyll content _posts/2021"); + expect(titles).toContain("Jekyll content _posts/2022"); + expect(titles).toContain("Jekyll content _topics"); + expect(rubyProject?.entrypoints[0]?.symbol).toBeNull(); + expect(siteConfig?.ownedFiles.map((ref) => ref.path)).toContain("index.md"); + }); + + it("maps Rails app structure and skips common Rails binstubs", async () => { + const root = await fixtureRoot("clawpatch-map-rails-"); + await writeFixture( + root, + "package.json", + JSON.stringify({ name: "rails-webpacker-shell", dependencies: { "@rails/ujs": "1.0.0" } }), + ); + await writeFixture(root, "Gemfile", "source 'https://rubygems.org'\ngem 'rails'\ngem 'pg'\n"); + await writeFixture(root, "config/application.rb", "module FixtureRails\nend\n"); + await writeFixture(root, "config/routes.rb", "Rails.application.routes.draw do\nend\n"); + await writeFixture(root, "config/secrets.yml", "redacted: placeholder\n"); + await writeFixture( + root, + "config/environments/test.rb", + "Rails.application.configure do\nend\n", + ); + await writeFixture( + root, + "config/initializers/filter.rb", + "Rails.application.config.filter_parameters += [:password]\n", + ); + await writeFixture(root, "db/schema.rb", "ActiveRecord::Schema.define do\nend\n"); + await writeFixture( + root, + "db/migrate/20200101000000_create_widgets.rb", + "class CreateWidgets < ActiveRecord::Migration[6.1]\nend\n", + ); + await writeFixture( + root, + "bin/rails", + "#!/usr/bin/env ruby\nAPP_PATH = '../config/application'\n", + ); + await writeFixture( + root, + "app/controllers/widgets_controller.rb", + "class WidgetsController < ApplicationController\nend\n", + ); + await writeFixture(root, "app/models/widget.rb", "class Widget < ApplicationRecord\nend\n"); + await writeFixture(root, "app/views/widgets/index.html.haml", "%h1 Widgets\n"); + await writeFixture(root, "app/views/widgets/index.json.jbuilder", "json.widgets []\n"); + await writeFixture(root, "app/assets/javascripts/widgets.coffee", "console.log 'widgets'\n"); + await writeFixture(root, "app/assets/stylesheets/widgets.scss", ".widgets { color: black; }\n"); + await writeFixture(root, "src/client.ts", "export function client() {}\n"); + await writeFixture(root, "lib/client.ts", "export function libClient() {}\n"); + await writeFixture(root, "pages/home.tsx", "export function Home() { return null; }\n"); + await writeFixture( + root, + "test/controllers/widgets_controller_test.rb", + "class WidgetsControllerTest\nend\n", + ); + + const project = await detectProject(root); + const result = await mapFeatures(root, project, []); + const titles = result.features.map((feature) => feature.title); + const referencedFiles = result.features.flatMap((feature) => [ + ...feature.ownedFiles.map((ref) => ref.path), + ...feature.contextFiles.map((ref) => ref.path), + ]); + const rubyProject = result.features.find( + (feature) => feature.title === `Ruby project ${root.split("/").at(-1)}`, + ); + const railsConfig = result.features.find( + (feature) => feature.title === "Rails application configuration", + ); + + expect(project.detected.frameworks).toContain("rails"); + expect(titles).not.toContain("Ruby CLI command rails"); + expect(titles).not.toContain("Node source app"); + expect(titles).not.toContain("Node source app/assets"); + expect(titles).toContain("Node source src"); + expect(titles).toContain("Node source lib"); + expect(titles).toContain("Node source pages"); + expect(titles).toContain("Rails application configuration"); + expect(titles).toContain("Rails database schema and migrations"); + expect(titles).toContain("Rails views app/views"); + expect(titles).toContain("Rails assets app/assets"); + expect(rubyProject?.trustBoundaries).toEqual( + expect.arrayContaining(["database", "network", "serialization"]), + ); + expect(railsConfig?.ownedFiles.map((ref) => ref.path)).toContain("config/routes.rb"); + expect(railsConfig?.ownedFiles.map((ref) => ref.path)).not.toContain("config/secrets.yml"); + expect(referencedFiles).not.toContain("config/secrets.yml"); + }); + it("maps workspace packages and splits large Node source groups", async () => { const root = await fixtureRoot("clawpatch-node-workspace-map-"); await writeFixture( diff --git a/src/mapper.ts b/src/mapper.ts index 897089f..677be7e 100644 --- a/src/mapper.ts +++ b/src/mapper.ts @@ -7,6 +7,7 @@ import { gradleSeeds } from "./mappers/gradle.js"; import { nextSeeds } from "./mappers/next.js"; import { nodeSeeds } from "./mappers/node.js"; import { pythonSeeds } from "./mappers/python.js"; +import { rubySeeds } from "./mappers/ruby.js"; import { rustSeeds } from "./mappers/rust.js"; import { nearbyTests } from "./mappers/shared.js"; import { swiftSeeds } from "./mappers/swift.js"; @@ -25,6 +26,7 @@ const featureMappers: FeatureMapper[] = [ { name: "next", map: nextSeeds }, { name: "go", map: goSeeds }, { name: "python", map: pythonSeeds }, + { name: "ruby", map: rubySeeds }, { name: "rust", map: rustSeeds }, { name: "swift", map: swiftSeeds }, { name: "apple", map: appleSeeds }, diff --git a/src/mappers/node.ts b/src/mappers/node.ts index 9cddfca..7d53939 100644 --- a/src/mappers/node.ts +++ b/src/mappers/node.ts @@ -146,11 +146,13 @@ async function sourceGroupSeeds( const testFiles = await packageTestFiles(root, info); const seeds: FeatureSeed[] = []; - for (const sourceRoot of packageSourceRoots(info.root)) { + for (const sourceRoot of await packageSourceRoots(root, info)) { if (!(await pathExists(join(root, sourceRoot)))) { continue; } - const files = (await walk(root, [sourceRoot])).filter(isReviewableNodeSourceFile); + const files = (await walk(root, [sourceRoot])).filter( + (path) => isReviewableNodeSourceFile(path) && !isRailsExcludedNodeSourcePath(info, path), + ); if (files.length === 0) { continue; } @@ -493,18 +495,39 @@ async function packageContextFiles(root: string, info: PackageInfo): Promise packageRelativePath(packageRoot, dir)); +async function packageSourceRoots(root: string, info: PackageInfo): Promise { + if (await isRailsPackage(root, info.root)) { + return [ + ...new Set( + [...sourceDirectories, "app/javascript", "app/packs", "app/frontend"].map((dir) => + packageRelativePath(info.root, dir), + ), + ), + ].filter((path) => !pathMatchesPrefix(path, packageRelativePath(info.root, "app/assets"))); + } + return sourceDirectories.map((dir) => packageRelativePath(info.root, dir)); +} + +function isRailsExcludedNodeSourcePath(info: PackageInfo, path: string): boolean { + return pathMatchesPrefix(path, packageRelativePath(info.root, "app/assets")); } async function packageTestFiles(root: string, info: PackageInfo): Promise { const prefixes = [ - ...packageSourceRoots(info.root), + ...(await packageSourceRoots(root, info)), ...testDirectories.map((dir) => packageRelativePath(info.root, dir)), ]; return (await walk(root, prefixes)).filter(isNodeTestPath).slice(0, 200); } +async function isRailsPackage(root: string, packageRoot: string): Promise { + return ( + packageRoot === "." && + (await pathExists(join(root, "Gemfile"))) && + (await pathExists(join(root, "config/application.rb"))) + ); +} + function partitionSourceFiles( sourceRoot: string, files: string[], diff --git a/src/mappers/ruby.ts b/src/mappers/ruby.ts new file mode 100644 index 0000000..6e848e4 --- /dev/null +++ b/src/mappers/ruby.ts @@ -0,0 +1,805 @@ +import { readFile, readdir } from "node:fs/promises"; +import { basename, dirname, join } from "node:path"; +import { pathExists } from "../fs.js"; +import { + isSafeDirectory, + isSafeFile, + packageKind, + packageTrustBoundaries, + pathMatchesPrefix, + shouldSkip, + walk, +} from "./shared.js"; +import { FeatureSeed, SeedFileRef, SeedTestRef } from "./types.js"; +import { TrustBoundary } from "../types.js"; + +type SourceGroup = { + label: string; + files: string[]; +}; + +type RubyProjectInfo = { + name: string | null; + dependencies: Set; + hasRspec: boolean; + hasMinitest: boolean; +}; + +const metadataFiles = ["Gemfile", "gems.rb", "Rakefile", "config.ru"] as const; +const sourceRoots = ["app", "lib", "scripts"] as const; +const executableRoots = ["exe", "bin", "script"] as const; +const railsBinstubs = new Set(["bundle", "rails", "rake", "setup", "spring", "yarn"]); +const sourceGroupMaxOwnedFiles = 12; +const sourceGroupMaxTests = 8; +const jekyllContentMaxOwnedFiles = 24; + +export async function rubySeeds(root: string): Promise { + if (!(await isRubyProject(root))) { + return []; + } + const info = await rubyProjectInfo(root); + const projectFiles = await rubyMetadataFiles(root); + const testFiles = await rubyTestFiles(root); + const testCommand = await rubyTestCommand(root, info, testFiles); + const railsApp = await isRailsApp(root, info); + const seeds: FeatureSeed[] = []; + + if (projectFiles.length > 0) { + seeds.push({ + title: `Ruby project ${info.name ?? basename(root)}`, + summary: `Ruby project metadata in ${projectFiles.join(", ")}.`, + kind: packageKind(info.name ?? basename(root)), + source: "ruby-project", + confidence: "medium", + entryPath: projectFiles[0] ?? "Gemfile", + symbol: info.name, + route: null, + command: null, + ownedFiles: projectFiles.map((path) => ({ path, reason: "ruby project metadata" })), + contextFiles: await rubyProjectContextFiles(root, projectFiles), + tags: ["ruby", "package"], + trustBoundaries: rubyTrustBoundaries(info.name ?? basename(root), info.dependencies), + skipNearbyTests: true, + }); + } + + for (const executable of await rubyExecutables(root, railsApp)) { + const tests = associatedTests([executable], testFiles, testCommand); + seeds.push({ + title: `Ruby CLI command ${basename(executable)}`, + summary: `Ruby executable ${executable}.`, + kind: "cli-command", + source: "ruby-executable", + confidence: "high", + entryPath: executable, + symbol: null, + route: null, + command: basename(executable), + ownedFiles: [{ path: executable, reason: "ruby executable" }], + contextFiles: tests.map((test) => ({ path: test.path, reason: "associated test" })), + tests, + tags: ["ruby", "cli"], + trustBoundaries: ["user-input", "filesystem", "process-exec"], + testCommand, + skipNearbyTests: true, + }); + } + + if (projectFiles.includes("Rakefile")) { + seeds.push({ + title: "Ruby Rake tasks", + summary: "Ruby Rakefile task definitions.", + kind: "release", + source: "ruby-rakefile", + confidence: "medium", + entryPath: "Rakefile", + symbol: null, + route: null, + command: "rake", + ownedFiles: [{ path: "Rakefile", reason: "rake task definitions" }], + contextFiles: [], + tests: [], + tags: ["ruby", "rake"], + trustBoundaries: ["filesystem", "process-exec"], + skipNearbyTests: true, + }); + } + + for (const group of await rubySourceGroups(root)) { + const tests = associatedTests(group.files, testFiles, testCommand); + seeds.push({ + title: `Ruby source ${group.label}`, + summary: + group.files.length === 1 + ? `Ruby source file ${group.files[0]}.` + : `Ruby source group ${group.label} with ${group.files.length} files.`, + kind: packageKind(group.label), + source: "ruby-source-group", + confidence: "medium", + entryPath: group.label, + symbol: group.label, + route: null, + command: null, + ownedFiles: group.files.map((path) => ({ path, reason: `source group ${group.label}` })), + contextFiles: tests.map((test) => ({ path: test.path, reason: "associated test" })), + tests, + tags: ["ruby", "source-group"], + trustBoundaries: rubyTrustBoundaries(group.label, info.dependencies), + testCommand, + skipNearbyTests: true, + }); + } + + seeds.push(...(await jekyllSeeds(root, info))); + seeds.push(...(await railsSeeds(root, info))); + + for (const testSuite of standaloneTestSuites(testFiles, testCommand)) { + seeds.push(testSuite); + } + + return seeds; +} + +async function isRubyProject(root: string): Promise { + return ( + (await pathExists(join(root, "Gemfile"))) || + (await pathExists(join(root, "gems.rb"))) || + (await pathExists(join(root, "Rakefile"))) || + (await pathExists(join(root, "config.ru"))) || + (await rubyGemspecs(root)).length > 0 || + (await containsReviewableRubySource(root)) + ); +} + +async function rubyProjectInfo(root: string): Promise { + const source = await rubyDependencySource(root); + return { + name: rubyProjectName(source), + dependencies: rubyDependencyNames(source), + hasRspec: /\brspec\b/iu.test(source), + hasMinitest: /\bminitest\b/iu.test(source), + }; +} + +async function rubyMetadataFiles(root: string): Promise { + const files: string[] = []; + for (const path of metadataFiles) { + if (await pathExists(join(root, path))) { + files.push(path); + } + } + files.push(...(await rubyGemspecs(root))); + return files.toSorted(); +} + +async function rubyGemspecs(root: string): Promise { + const entries = await readdir(root, { withFileTypes: true }).catch(() => []); + return entries + .filter((entry) => entry.isFile() && entry.name.endsWith(".gemspec")) + .map((entry) => entry.name) + .toSorted(); +} + +async function rubyDependencySource(root: string): Promise { + const chunks: string[] = []; + for (const path of [...metadataFiles, ...(await rubyGemspecs(root))]) { + if (await pathExists(join(root, path))) { + chunks.push(await readFile(join(root, path), "utf8")); + } + } + return chunks.join("\n"); +} + +function rubyProjectName(source: string): string | null { + return /^\s*(?:spec|s)\.name\s*=\s*["']([^"']+)["']/mu.exec(source)?.[1] ?? null; +} + +function rubyDependencyNames(source: string): Set { + const names = new Set(); + for (const line of source.split("\n")) { + const match = + /^\s*(?:gem|s\.add_dependency|s\.add_development_dependency|spec\.add_dependency|spec\.add_development_dependency)\s*\(?\s*["']([^"']+)["']/u.exec( + line, + ); + if (match?.[1] !== undefined) { + names.add(match[1].toLowerCase()); + } + } + return names; +} + +function rubyTrustBoundaries(name: string, dependencies: Set): TrustBoundary[] { + const boundaries = new Set(packageTrustBoundaries(name)); + const text = `${name} ${[...dependencies].join(" ")}`; + if (/\b(redis|sequel|pg|mysql|sqlite|activerecord)\b/iu.test(text)) { + boundaries.add("database"); + boundaries.add("network"); + boundaries.add("serialization"); + } + if (/\b(faraday|http|net-http|mechanize|rest-client|hive|steem|rpc|api)\b/iu.test(text)) { + boundaries.add("network"); + boundaries.add("external-api"); + boundaries.add("serialization"); + } + if (/\b(json|oj|msgpack|yaml|xml)\b/iu.test(text)) { + boundaries.add("serialization"); + } + return [...boundaries]; +} + +function uniqueTrustBoundaries(values: TrustBoundary[]): TrustBoundary[] { + return [...new Set(values)]; +} + +async function rubyProjectContextFiles( + root: string, + ownedMetadataFiles: readonly string[], +): Promise { + const refs: SeedFileRef[] = []; + const owned = new Set(ownedMetadataFiles); + for (const path of ["Gemfile.lock", "gems.locked", ".rubocop.yml", "README.md"]) { + if (!owned.has(path) && (await pathExists(join(root, path)))) { + refs.push({ path, reason: "ruby project context" }); + } + } + return refs; +} + +async function jekyllSeeds(root: string, info: RubyProjectInfo): Promise { + if (!(await isJekyllSite(root, info))) { + return []; + } + const seeds: FeatureSeed[] = []; + const trustBoundaries = rubyTrustBoundaries("jekyll site", info.dependencies); + const rootPages = await jekyllRootPages(root); + const configFiles = await existingFiles(root, ["_config.yml", "CNAME", "authors.json"]); + if (configFiles.length > 0 || rootPages.length > 0) { + seeds.push({ + title: "Jekyll site configuration", + summary: "Jekyll configuration and top-level site pages.", + kind: "config", + source: "jekyll-site-config", + confidence: "high", + entryPath: configFiles[0] ?? rootPages[0] ?? "_config.yml", + symbol: null, + route: null, + command: null, + ownedFiles: [ + ...configFiles.map((path) => ({ path, reason: "jekyll site configuration" })), + ...rootPages.map((path) => ({ path, reason: "top-level jekyll page" })), + ], + contextFiles: [], + tags: ["ruby", "jekyll", "site"], + trustBoundaries, + skipNearbyTests: true, + }); + } + + const themeFiles = await jekyllThemeFiles(root); + for (const group of jekyllThemeGroups(themeFiles)) { + seeds.push({ + title: `Jekyll theme ${group.label}`, + summary: `Jekyll layouts, includes, Sass, or static assets with ${group.files.length} file(s).`, + kind: "ui-flow", + source: "jekyll-theme", + confidence: "high", + entryPath: group.label, + symbol: group.label, + route: null, + command: null, + ownedFiles: group.files.map((path) => ({ path, reason: "jekyll theme file" })), + contextFiles: configFiles.map((path) => ({ path, reason: "jekyll site configuration" })), + tags: ["ruby", "jekyll", "theme"], + trustBoundaries, + skipNearbyTests: true, + }); + } + + for (const group of await jekyllContentGroups(root)) { + seeds.push({ + title: `Jekyll content ${group.label}`, + summary: `Jekyll Markdown content group ${group.label} with ${group.files.length} file(s).`, + kind: "route", + source: "jekyll-content", + confidence: "high", + entryPath: group.label, + symbol: group.label, + route: null, + command: null, + ownedFiles: group.files.map((path) => ({ path, reason: `jekyll content ${group.label}` })), + contextFiles: configFiles.map((path) => ({ path, reason: "jekyll site configuration" })), + tags: ["ruby", "jekyll", "content"], + trustBoundaries, + skipNearbyTests: true, + }); + } + + return seeds; +} + +async function isJekyllSite(root: string, info: RubyProjectInfo): Promise { + return (await pathExists(join(root, "_config.yml"))) && info.dependencies.has("jekyll"); +} + +async function railsSeeds(root: string, info: RubyProjectInfo): Promise { + if (!(await isRailsApp(root, info))) { + return []; + } + const trustBoundaries = rubyTrustBoundaries("rails app", info.dependencies); + const seeds: FeatureSeed[] = []; + const configFiles = await railsConfigFiles(root); + if (configFiles.length > 0) { + seeds.push({ + title: "Rails application configuration", + summary: "Rails routes, environments, initializers, and application configuration.", + kind: "config", + source: "rails-config", + confidence: "high", + entryPath: "config/application.rb", + symbol: null, + route: null, + command: null, + ownedFiles: configFiles.map((path) => ({ path, reason: "rails configuration" })), + contextFiles: [], + tags: ["ruby", "rails", "config"], + trustBoundaries, + skipNearbyTests: true, + }); + } + + const dbFiles = await railsDatabaseFiles(root); + if (dbFiles.length > 0) { + seeds.push({ + title: "Rails database schema and migrations", + summary: `Rails database files with ${dbFiles.length} migration/schema file(s).`, + kind: "service", + source: "rails-database", + confidence: "high", + entryPath: dbFiles[0] ?? "db", + symbol: null, + route: null, + command: null, + ownedFiles: dbFiles.map((path) => ({ path, reason: "rails database file" })), + contextFiles: [], + tags: ["ruby", "rails", "database"], + trustBoundaries: uniqueTrustBoundaries([...trustBoundaries, "database"]), + skipNearbyTests: true, + }); + } + + for (const group of await railsViewGroups(root)) { + seeds.push({ + title: `Rails views ${group.label}`, + summary: `Rails view/template group ${group.label} with ${group.files.length} file(s).`, + kind: "ui-flow", + source: "rails-views", + confidence: "high", + entryPath: group.label, + symbol: group.label, + route: null, + command: null, + ownedFiles: group.files.map((path) => ({ path, reason: `rails views ${group.label}` })), + contextFiles: configFiles + .filter((path) => path === "config/routes.rb") + .map((path) => ({ path, reason: "rails routes" })), + tags: ["ruby", "rails", "views"], + trustBoundaries, + skipNearbyTests: true, + }); + } + + for (const group of await railsAssetGroups(root)) { + seeds.push({ + title: `Rails assets ${group.label}`, + summary: `Rails asset group ${group.label} with ${group.files.length} file(s).`, + kind: "ui-flow", + source: "rails-assets", + confidence: "high", + entryPath: group.label, + symbol: group.label, + route: null, + command: null, + ownedFiles: group.files.map((path) => ({ path, reason: `rails assets ${group.label}` })), + contextFiles: [], + tags: ["ruby", "rails", "assets"], + trustBoundaries, + skipNearbyTests: true, + }); + } + + return seeds; +} + +async function isRailsApp(root: string, info: RubyProjectInfo): Promise { + return info.dependencies.has("rails") && (await pathExists(join(root, "config/application.rb"))); +} + +async function railsConfigFiles(root: string): Promise { + const files = await existingFiles(root, [ + "config/application.rb", + "config/routes.rb", + "config/environment.rb", + "config/database.yml", + "config/boot.rb", + ]); + for (const prefix of ["config/environments", "config/initializers", "config/locales"]) { + if (!(await isSafeDirectory(root, join(root, prefix)))) { + continue; + } + files.push( + ...(await walk(root, [prefix])).filter( + (path) => /\.(rb|ya?ml)$/u.test(path) && !rubyShouldSkip(path), + ), + ); + } + return uniquePaths(files); +} + +async function railsDatabaseFiles(root: string): Promise { + if (!(await isSafeDirectory(root, join(root, "db")))) { + return []; + } + return (await walk(root, ["db"])) + .filter((path) => /\.(rb|ya?ml)$/u.test(path) && !rubyShouldSkip(path)) + .toSorted(); +} + +async function railsViewGroups(root: string): Promise { + if (!(await isSafeDirectory(root, join(root, "app/views")))) { + return []; + } + const files = (await walk(root, ["app/views"])).filter( + (path) => /\.(erb|haml|slim|builder|jbuilder|coffee)$/u.test(path) && !rubyShouldSkip(path), + ); + return partitionSourceFiles("app/views", files, jekyllContentMaxOwnedFiles); +} + +async function railsAssetGroups(root: string): Promise { + if (!(await isSafeDirectory(root, join(root, "app/assets")))) { + return []; + } + const files = (await walk(root, ["app/assets"])).filter( + (path) => + /\.(js|coffee|css|scss|sass)$/u.test(path) && + !rubyShouldSkip(path) && + !path.includes("/images/"), + ); + return partitionSourceFiles("app/assets", files, jekyllContentMaxOwnedFiles); +} + +async function existingFiles(root: string, candidates: string[]): Promise { + const files: string[] = []; + for (const candidate of candidates) { + if (await pathExists(join(root, candidate))) { + files.push(candidate); + } + } + return files; +} + +async function jekyllRootPages(root: string): Promise { + const entries = await readdir(root, { withFileTypes: true }).catch(() => []); + return entries + .filter((entry) => entry.isFile()) + .map((entry) => entry.name) + .filter((path) => /\.(md|html|json)$/u.test(path)) + .filter((path) => !["README.md", "Gemfile.lock"].includes(path)) + .toSorted(); +} + +async function jekyllThemeFiles(root: string): Promise { + const files: string[] = []; + for (const prefix of ["_layouts", "_includes", "_sass", "assets"]) { + if (!(await isSafeDirectory(root, join(root, prefix)))) { + continue; + } + files.push( + ...(await walk(root, [prefix])).filter( + (path) => + /\.(html|liquid|scss|sass|css|js)$/u.test(path) && + !rubyShouldSkip(path) && + !path.startsWith("assets/images/"), + ), + ); + } + return uniquePaths(files); +} + +function jekyllThemeGroups(files: string[]): SourceGroup[] { + const groups = new Map(); + for (const file of files) { + const label = file.split("/")[0] ?? "assets"; + groups.set(label, [...(groups.get(label) ?? []), file]); + } + return [...groups.entries()] + .toSorted(([left], [right]) => left.localeCompare(right)) + .flatMap(([label, groupedFiles]) => + chunkFiles(label, groupedFiles.toSorted(), jekyllContentMaxOwnedFiles), + ); +} + +async function jekyllContentGroups(root: string): Promise { + const groups: SourceGroup[] = []; + const posts = await jekyllPosts(root); + for (const [year, files] of groupByPostYear(posts)) { + groups.push(...chunkFiles(`_posts/${year}`, files, jekyllContentMaxOwnedFiles)); + } + for (const prefix of ["_topics"]) { + if (!(await isSafeDirectory(root, join(root, prefix)))) { + continue; + } + const files = (await walk(root, [prefix])).filter( + (path) => /\.(md|html)$/u.test(path) && !rubyShouldSkip(path), + ); + groups.push(...partitionSourceFiles(prefix, files, jekyllContentMaxOwnedFiles)); + } + return groups; +} + +async function jekyllPosts(root: string): Promise { + if (!(await isSafeDirectory(root, join(root, "_posts")))) { + return []; + } + return (await walk(root, ["_posts"])) + .filter((path) => /^_posts\/\d{4}-\d{2}-\d{2}-.+\.md$/u.test(path)) + .toSorted(); +} + +function groupByPostYear(posts: string[]): Map { + const groups = new Map(); + for (const post of posts) { + const year = /^_posts\/(\d{4})-/u.exec(post)?.[1] ?? "unknown"; + groups.set(year, [...(groups.get(year) ?? []), post]); + } + return new Map([...groups.entries()].toSorted(([left], [right]) => left.localeCompare(right))); +} + +async function rubyExecutables(root: string, skipRailsBinstubs: boolean): Promise { + const executables: string[] = []; + for (const executableRoot of executableRoots) { + if (!(await isSafeDirectory(root, join(root, executableRoot)))) { + continue; + } + for (const path of await walk(root, [executableRoot])) { + if (skipRailsBinstubs && executableRoot === "bin" && railsBinstubs.has(basename(path))) { + continue; + } + if (!rubyShouldSkip(path) && (path.endsWith(".rb") || (await hasRubyShebang(root, path)))) { + executables.push(path); + } + } + } + return executables.toSorted(); +} + +async function hasRubyShebang(root: string, path: string): Promise { + if (!(await isSafeFile(root, join(root, path)))) { + return false; + } + const head = (await readFile(join(root, path), "utf8").catch(() => "")).slice(0, 160); + return /^#!.*\bruby\b/u.test(head); +} + +async function rubySourceGroups(root: string): Promise { + const groups: SourceGroup[] = []; + for (const sourceRoot of sourceRoots) { + if (!(await isSafeDirectory(root, join(root, sourceRoot)))) { + continue; + } + const files = (await walk(root, [sourceRoot])).filter(isReviewableRubySourceFile); + groups.push(...partitionSourceFiles(sourceRoot, files, sourceGroupMaxOwnedFiles)); + } + return groups; +} + +async function rubyTestFiles(root: string): Promise { + const files = (await walk(root, ["spec", "test", ...sourceRoots])) + .filter(isRubyTestPath) + .filter((path) => !rubyShouldSkip(path) && !isRubyFixturePath(path)); + return uniquePaths(files).slice(0, 200); +} + +async function rubyTestCommand( + root: string, + info: RubyProjectInfo, + testFiles: string[], +): Promise { + const run = (await pathExists(join(root, "Gemfile"))) ? "bundle exec " : ""; + if (info.hasRspec || testFiles.some((path) => path.endsWith("_spec.rb"))) { + return `${run}rspec`; + } + if (info.hasMinitest || testFiles.some((path) => path.endsWith("_test.rb"))) { + return `${run}rake test`; + } + return null; +} + +function standaloneTestSuites(testFiles: string[], command: string | null): FeatureSeed[] { + const groups = new Map(); + for (const path of testFiles) { + const root = path.startsWith("spec/") + ? "spec" + : path.startsWith("test/") + ? "test" + : dirname(path); + groups.set(root, [...(groups.get(root) ?? []), path]); + } + return [...groups.entries()] + .toSorted(([left], [right]) => left.localeCompare(right)) + .map(([label, files]) => ({ + title: `Ruby test suite ${label}`, + summary: `Ruby test files in ${label}.`, + kind: "test-suite", + source: "ruby-test-suite", + confidence: "medium", + entryPath: label, + symbol: label, + route: null, + command: null, + ownedFiles: files.map((path) => ({ path, reason: "ruby test file" })), + contextFiles: [], + tests: files.map((path) => ({ path, command })), + tags: ["ruby", "test"], + trustBoundaries: [], + testCommand: command, + skipNearbyTests: true, + })); +} + +function partitionSourceFiles( + sourceRoot: string, + files: string[], + maxFiles: number, +): SourceGroup[] { + return partitionAt(sourceRoot, files.toSorted(), maxFiles, 0); +} + +function partitionAt( + sourceRoot: string, + files: string[], + maxFiles: number, + depth: number, +): SourceGroup[] { + if (files.length === 0) { + return []; + } + if (files.length <= maxFiles) { + return [{ label: commonLabel(sourceRoot, files, depth), files }]; + } + const directFiles: string[] = []; + const buckets = new Map(); + for (const file of files) { + const relativePath = file.slice(sourceRoot.length + 1); + const parts = relativePath.split("/"); + if (parts.length <= depth + 1) { + directFiles.push(file); + continue; + } + const segment = parts[depth]; + if (segment === undefined) { + directFiles.push(file); + continue; + } + buckets.set(segment, [...(buckets.get(segment) ?? []), file]); + } + const groups = chunkFiles(currentLabel(sourceRoot, files, depth), directFiles, maxFiles); + for (const [segment, bucketFiles] of [...buckets.entries()].toSorted(([left], [right]) => + left.localeCompare(right), + )) { + if (bucketFiles.length <= maxFiles) { + groups.push({ + label: `${sourceRoot}/${bucketPrefix(bucketFiles, sourceRoot, depth, segment)}`, + files: bucketFiles, + }); + } else { + groups.push(...partitionAt(sourceRoot, bucketFiles, maxFiles, depth + 1)); + } + } + return groups; +} + +function chunkFiles(label: string, files: string[], maxFiles: number): SourceGroup[] { + const groups: SourceGroup[] = []; + for (let index = 0; index < files.length; index += maxFiles) { + const part = Math.floor(index / maxFiles) + 1; + groups.push({ + label: files.length <= maxFiles ? label : `${label}#${part}`, + files: files.slice(index, index + maxFiles), + }); + } + return groups; +} + +function currentLabel(sourceRoot: string, files: string[], depth: number): string { + if (depth === 0) { + return sourceRoot; + } + const first = files[0]; + if (first === undefined) { + return sourceRoot; + } + const parts = first + .slice(sourceRoot.length + 1) + .split("/") + .slice(0, depth); + return parts.length === 0 ? sourceRoot : `${sourceRoot}/${parts.join("/")}`; +} + +function commonLabel(sourceRoot: string, files: string[], depth: number): string { + if (depth === 0) { + const first = files[0]; + return files.length === 1 && first !== undefined && !first.startsWith(`${sourceRoot}/`) + ? first + : sourceRoot; + } + if (files.length === 1) { + return files[0] ?? sourceRoot; + } + return currentLabel(sourceRoot, files, depth); +} + +function bucketPrefix(files: string[], sourceRoot: string, depth: number, segment: string): string { + const first = files[0]; + if (first === undefined || depth === 0) { + return segment; + } + const parts = first + .slice(sourceRoot.length + 1) + .split("/") + .slice(0, depth); + return [...parts, segment].join("/"); +} + +function associatedTests(files: string[], tests: string[], command: string | null): SeedTestRef[] { + const fileStems = new Set(files.map((file) => basename(file).replace(/\.rb$/u, ""))); + const dirs = new Set(files.map((file) => dirname(file))); + return tests + .filter((test) => { + const testStem = basename(test) + .replace(/_spec\.rb$/u, "") + .replace(/_test\.rb$/u, "") + .replace(/\.rb$/u, ""); + return [...dirs].some((dir) => pathMatchesPrefix(test, dir)) || fileStems.has(testStem); + }) + .slice(0, sourceGroupMaxTests) + .map((path) => ({ path, command })); +} + +function isReviewableRubySourceFile(path: string): boolean { + return ( + path.endsWith(".rb") && + !isRubyTestPath(path) && + !rubyShouldSkip(path) && + !isRubyFixturePath(path) && + !/(^|\/)[^/]*(?:generated|\.gen)\.rb$/iu.test(path) + ); +} + +function isRubyTestPath(path: string): boolean { + const name = basename(path); + return path.endsWith(".rb") && (name.endsWith("_spec.rb") || name.endsWith("_test.rb")); +} + +function isRubyFixturePath(path: string): boolean { + return /(^|\/)(__fixtures__|fixtures|testdata)(\/|$)/u.test(path); +} + +function rubyShouldSkip(path: string): boolean { + return shouldSkip(path) || /(^|\/)(\.bundle|vendor\/bundle|tmp|log)(\/|$)/u.test(path); +} + +async function containsReviewableRubySource(root: string): Promise { + for (const sourceRoot of [...sourceRoots, ...executableRoots]) { + if (!(await isSafeDirectory(root, join(root, sourceRoot)))) { + continue; + } + if ((await walk(root, [sourceRoot])).some(isReviewableRubySourceFile)) { + return true; + } + } + return false; +} + +function uniquePaths(paths: string[]): string[] { + return [...new Set(paths)].toSorted(); +}