From 0d46b241bc03e6f83b1cb0e02b038ae2b68844ad Mon Sep 17 00:00:00 2001 From: Harlan Wilton Date: Mon, 1 Dec 2025 20:50:27 +1100 Subject: [PATCH 1/2] chore: build errors --- client/composables/shiki.ts | 4 ++-- src/devtools.ts | 5 +++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/client/composables/shiki.ts b/client/composables/shiki.ts index 8d89f31c..e528fa44 100644 --- a/client/composables/shiki.ts +++ b/client/composables/shiki.ts @@ -1,5 +1,5 @@ -import type { MaybeRef } from '@vueuse/core' import type { HighlighterCore } from 'shiki' +import type { Ref } from 'vue' import { createHighlighterCore } from 'shiki/core' import { createJavaScriptRegexEngine } from 'shiki/engine/javascript' import { computed, ref, toValue } from 'vue' @@ -76,7 +76,7 @@ export async function loadShiki() { return shiki.value } -export function renderCodeHighlight(code: MaybeRef, lang: 'json' | 'html' | 'bash' | 'robots-txt') { +export function renderCodeHighlight(code: string | Ref, lang: 'json' | 'html' | 'bash' | 'robots-txt') { return computed(() => { const colorMode = devtools.value?.colorMode || 'light' return shiki.value!.codeToHtml(toValue(code), { diff --git a/src/devtools.ts b/src/devtools.ts index 330983f8..84fcfe21 100644 --- a/src/devtools.ts +++ b/src/devtools.ts @@ -24,8 +24,9 @@ export function setupDevToolsUI(options: ModuleOptions, resolve: Resolver['resol // In local development, start a separate Nuxt Server and proxy to serve the client else { nuxt.hook('vite:extendConfig', (config) => { - config.server = config.server || {} - config.server.proxy = config.server.proxy || {} + // @ts-expect-error - config.server may be readonly but we need to set it + config.server ??= {} + config.server.proxy ??= {} config.server.proxy[DEVTOOLS_UI_ROUTE] = { target: `http://localhost:${DEVTOOLS_UI_LOCAL_PORT}${DEVTOOLS_UI_ROUTE}`, changeOrigin: true, From b352cd18074c776e377724ae2e77653859073f10 Mon Sep 17 00:00:00 2001 From: Harlan Wilton Date: Mon, 1 Dec 2025 20:54:20 +1100 Subject: [PATCH 2/2] feat: improved `Content-Signal` support --- docs/content/2.guides/1.robots-txt.md | 30 +- docs/content/2.guides/2.ai-directives.md | 360 +++++++++++++++++ docs/content/2.guides/3.nuxt-config.md | 54 +-- docs/content/3.api/1.config.md | 6 +- src/runtime/types.ts | 47 ++- src/util.ts | 152 +++++++- test/unit/contentSignal.test.ts | 477 +++++++++++++++++++++++ test/unit/generateRobotsTxt.test.ts | 72 +++- test/unit/robotsTxtParser.test.ts | 34 +- test/unit/robotsTxtValidator.test.ts | 43 +- 10 files changed, 1207 insertions(+), 68 deletions(-) create mode 100644 docs/content/2.guides/2.ai-directives.md create mode 100644 test/unit/contentSignal.test.ts diff --git a/docs/content/2.guides/1.robots-txt.md b/docs/content/2.guides/1.robots-txt.md index 369be002..251616ba 100644 --- a/docs/content/2.guides/1.robots-txt.md +++ b/docs/content/2.guides/1.robots-txt.md @@ -60,37 +60,29 @@ The following rules are parsed from your `robots.txt` file: - `Disallow` - An array of paths to disallow for the user-agent. - `Allow` - An array of paths to allow for the user-agent. - `Sitemap` - An array of sitemap URLs to include in the generated sitemap. -- `Content-Usage` / `Content-Signal` - Directives for expressing AI usage preferences (see [Content Signals](#content-signals) below). +- `Content-Usage` / `Content-Signal` - Directives for expressing AI usage preferences (see [AI Directives](#ai-directives) below). This parsed data will be shown for environments that are `indexable`. -## Content Signals +## AI Directives -Content Signals allow you to express preferences about how AI systems should interact with your content. Both `Content-Usage` and `Content-Signal` directives are supported: +AI Directives allow you to control how AI systems, search engines, and automated tools interact with your content. Two standards are supported: -### Content-Usage (IETF Standard) +- **Content-Usage** - IETF standard (`bots`, `train-ai`, `ai-output`, `search`) with `y`/`n` values +- **Content-Signal** - Cloudflare implementation (`search`, `ai-input`, `ai-train`) with `yes`/`no` values -The `Content-Usage` directive follows the [IETF AI Preferences specification](https://datatracker.ietf.org/doc/draft-ietf-aipref-attach/): +### Quick Example ```txt [robots.txt] User-agent: * Allow: / -Content-Usage: ai=n -Content-Usage: /public/ train-ai=y -Content-Usage: /restricted/ ai=n train-ai=n +Content-Usage: bots=y, train-ai=n +Content-Signal: ai-train=no, search=yes ``` -### Content-Signal (Cloudflare Implementation) - -The `Content-Signal` directive is [Cloudflare's implementation](https://blog.cloudflare.com/content-signals-policy/), widely deployed across millions of domains: - -```txt [robots.txt] -User-agent: * -Allow: / -Content-Signal: ai-train=no, search=yes, ai-input=yes -``` - -Both directives are parsed identically and output as `Content-Usage` in the generated robots.txt. Use whichever format matches your preferences or existing tooling. +::alert{type="info"} +See the [**AI Directives Guide**](/docs/robots/guides/ai-directives) for complete documentation, examples, validation rules, and best practices. +:: ## Conflicting `public/robots.txt` diff --git a/docs/content/2.guides/2.ai-directives.md b/docs/content/2.guides/2.ai-directives.md new file mode 100644 index 00000000..90c0098a --- /dev/null +++ b/docs/content/2.guides/2.ai-directives.md @@ -0,0 +1,360 @@ +--- +title: AI Directives +description: Control how AI systems interact with your content using Content-Usage and Content-Signal directives. +--- + +AI Directives allow you to express preferences about how AI systems, search engines, and automated tools should interact with your content. Two standards are supported: + +- **Content-Usage** - IETF standard with broader automation categories +- **Content-Signal** - Cloudflare's widely-deployed implementation focused on AI use cases + +Both can be used together in your robots.txt file and are enforced through the robots.txt protocol. + +::alert{type="warning"} +**Important:** AI directives rely on voluntary compliance by crawlers and AI systems. They are not enforced by web servers and should be combined with other protection methods for sensitive content. +:: + +## Content-Usage (IETF aipref-vocab) + +The Content-Usage directive follows the [IETF AI Preferences specification](https://ietf-wg-aipref.github.io/drafts/draft-ietf-aipref-vocab.html), providing a standardized way to express automation preferences. + +### Categories + +| Category | Description | Example Use Case | +|----------|-------------|------------------| +| `bots` | Automated Processing | General bot crawling and indexing | +| `train-ai` | Foundation Model Production | Training large language models | +| `ai-output` | AI Output | Content used in AI-generated responses | +| `search` | Search | Search engine indexing and results | + +### Values + +- `y` - Allow this category of use +- `n` - Disallow this category of use + +### Syntax + +```txt [robots.txt] +User-agent: * +Content-Usage: =[, =] +Content-Usage: /path/ =[, =] +``` + +### Examples + +#### Block AI Training Globally + +```txt [robots.txt] +User-agent: * +Allow: / +Content-Usage: train-ai=n +``` + +#### Allow Bots, Block AI Training + +```txt [robots.txt] +User-agent: * +Allow: / +Content-Usage: bots=y, train-ai=n +``` + +#### Path-Specific Rules + +```txt [robots.txt] +User-agent: * +Allow: / +Content-Usage: train-ai=n +Content-Usage: /docs/ train-ai=y, ai-output=y +Content-Usage: /api/ bots=n, train-ai=n, ai-output=n +``` + +### Programmatic Configuration + +**Object Format (Recommended)** - Type-safe with autocomplete: + +```ts [nuxt.config.ts] +export default defineNuxtConfig({ + robots: { + groups: [ + { + userAgent: '*', + allow: '/', + contentUsage: { + bots: 'y', + 'train-ai': 'n' + } + } + ] + } +}) +``` + +**String Format** - Backwards compatible, supports path-specific rules: + +```ts [nuxt.config.ts] +export default defineNuxtConfig({ + robots: { + groups: [ + { + userAgent: '*', + allow: '/', + contentUsage: [ + 'bots=y, train-ai=n', + '/docs/ train-ai=y', + '/api/ bots=n, train-ai=n' + ] + } + ] + } +}) +``` + +## Content-Signal (Cloudflare/IETF aipref-contentsignals) + +Content-Signal is [Cloudflare's implementation](https://blog.cloudflare.com/content-signals-policy/) based on [IETF aipref-contentsignals](https://www.ietf.org/archive/id/draft-romm-aipref-contentsignals-00.html), deployed across millions of domains. + +### Categories + +| Category | Description | Example Use Case | +|----------|-------------|------------------| +| `search` | Search Applications | Indexing for search results and snippets | +| `ai-input` | AI Input | RAG, grounding, generative AI search answers | +| `ai-train` | AI Training | Training or fine-tuning AI models | + +### Values + +- `yes` - Allow this category of use +- `no` - Disallow this category of use + +### Syntax + +```txt [robots.txt] +User-agent: * +Content-Signal: =[, =] +Content-Signal: /path/ =[, =] +``` + +### Examples + +#### Block AI Training, Allow Search + +```txt [robots.txt] +User-agent: * +Allow: / +Content-Signal: ai-train=no, search=yes +``` + +#### Block All AI Usage + +```txt [robots.txt] +User-agent: * +Allow: / +Content-Signal: ai-train=no, ai-input=no, search=yes +``` + +#### Path-Specific Rules + +```txt [robots.txt] +User-agent: * +Allow: / +Content-Signal: ai-train=no, search=yes +Content-Signal: /docs/ ai-input=yes +Content-Signal: /api/ ai-train=no, ai-input=no, search=no +``` + +### Programmatic Configuration + +**Object Format (Recommended)** - Type-safe with autocomplete: + +```ts [nuxt.config.ts] +export default defineNuxtConfig({ + robots: { + groups: [ + { + userAgent: '*', + allow: '/', + contentSignal: { + 'ai-train': 'no', + search: 'yes' + } + } + ] + } +}) +``` + +**String Format** - Backwards compatible, supports path-specific rules: + +```ts [nuxt.config.ts] +export default defineNuxtConfig({ + robots: { + groups: [ + { + userAgent: '*', + allow: '/', + contentSignal: [ + 'ai-train=no, search=yes', + '/docs/ ai-input=yes', + '/api/ ai-train=no, ai-input=no' + ] + } + ] + } +}) +``` + +## Using Both Together + +You can use both Content-Usage and Content-Signal in the same robots.txt for comprehensive coverage: + +```txt [robots.txt] +User-agent: * +Allow: / +Content-Usage: bots=y, train-ai=n +Content-Signal: ai-train=no, search=yes +``` + +::code-group + +```ts [Object Format (Recommended)] +export default defineNuxtConfig({ + robots: { + groups: [ + { + userAgent: '*', + allow: '/', + contentUsage: { + bots: 'y', + 'train-ai': 'n' + }, + contentSignal: { + 'ai-train': 'no', + search: 'yes' + } + } + ] + } +}) +``` + +```ts [String Format] +export default defineNuxtConfig({ + robots: { + groups: [ + { + userAgent: '*', + allow: '/', + contentUsage: ['bots=y, train-ai=n'], + contentSignal: ['ai-train=no, search=yes'] + } + ] + } +}) +``` + +:: + +## Common Patterns + +### Block All AI Training + +::code-group + +```txt [Content-Usage] +User-agent: * +Allow: / +Content-Usage: train-ai=n +``` + +```txt [Content-Signal] +User-agent: * +Allow: / +Content-Signal: ai-train=no +``` + +:: + +### Allow Search, Block AI + +::code-group + +```txt [Content-Usage] +User-agent: * +Allow: / +Content-Usage: search=y, train-ai=n, ai-output=n +``` + +```txt [Content-Signal] +User-agent: * +Allow: / +Content-Signal: search=yes, ai-train=no, ai-input=no +``` + +:: + +### Documentation-Only Training + +::code-group + +```txt [Content-Usage] +User-agent: * +Allow: / +Content-Usage: train-ai=n +Content-Usage: /docs/ train-ai=y +``` + +```txt [Content-Signal] +User-agent: * +Allow: / +Content-Signal: ai-train=no +Content-Signal: /docs/ ai-train=yes +``` + +:: + +## Bot-Specific Rules + +You can target specific AI bots with user-agent rules: + +```txt [robots.txt] +# Block GPTBot from training +User-agent: GPTBot +Disallow: / +Content-Signal: ai-train=no + +# Block Claude from training +User-agent: Claude-Web +Disallow: / +Content-Usage: train-ai=n + +# Allow all other bots with restrictions +User-agent: * +Allow: / +Content-Usage: train-ai=n +Content-Signal: ai-train=no, search=yes +``` + +```ts [nuxt.config.ts] +export default defineNuxtConfig({ + robots: { + groups: [ + { + userAgent: 'GPTBot', + disallow: '/', + contentSignal: ['ai-train=no'] + }, + { + userAgent: 'Claude-Web', + disallow: '/', + contentUsage: ['train-ai=n'] + }, + { + userAgent: '*', + allow: '/', + contentUsage: ['train-ai=n'], + contentSignal: ['ai-train=no, search=yes'] + } + ] + } +}) +``` diff --git a/docs/content/2.guides/3.nuxt-config.md b/docs/content/2.guides/3.nuxt-config.md index a25db50c..b6e8baea 100644 --- a/docs/content/2.guides/3.nuxt-config.md +++ b/docs/content/2.guides/3.nuxt-config.md @@ -66,43 +66,53 @@ Disallow: /admin Allow: /admin/login ``` -## Content Signals Configuration +## AI Directives Configuration -You can configure Content Signals (AI usage preferences) programmatically using the `contentUsage` option in your groups: +Configure AI usage preferences using `contentUsage` and `contentSignal`: -```ts [nuxt.config.ts] +::code-group + +```ts [Object Format (Recommended)] export default defineNuxtConfig({ robots: { groups: [ { userAgent: '*', allow: '/', - contentUsage: [ - 'ai=n', // Disable AI usage globally - '/docs/ train-ai=y', // Allow AI training on docs - '/api/ ai=n train-ai=n' // Disable all AI usage for API - ] + contentUsage: { + bots: 'y', + 'train-ai': 'n' + }, + contentSignal: { + 'ai-train': 'no', + search: 'yes' + } } ] } }) ``` -This will generate: - -```robots-txt [robots.txt] -User-agent: * -Allow: / -Content-Usage: ai=n -Content-Usage: /docs/ train-ai=y -Content-Usage: /api/ ai=n train-ai=n +```ts [String Format] +export default defineNuxtConfig({ + robots: { + groups: [ + { + userAgent: '*', + allow: '/', + contentUsage: ['bots=y, train-ai=n'], + contentSignal: ['ai-train=no, search=yes'] + } + ] + } +}) ``` -### Content-Usage Options - -The `contentUsage` field accepts an array of strings with the following formats: +:: -- **Global preferences**: `'ai=n'`, `'train-ai=y'` -- **Path-specific preferences**: `'/path/ ai=n'`, `'/docs/ train-ai=y'` +- **`contentUsage`** - IETF standard: `bots`, `train-ai`, `ai-output`, `search` with `y`/`n` +- **`contentSignal`** - Cloudflare: `search`, `ai-input`, `ai-train` with `yes`/`no` -See the [Content Signals guide](/docs/robots/guides/robots-txt#content-signals) for more detailed information about Content Signals and supported AI preferences. +::alert{type="info"} +**Object format** provides type safety and autocomplete. **String format** supports path-specific rules. See the [**AI Directives Guide**](/docs/robots/guides/ai-directives) for complete documentation. +:: diff --git a/docs/content/3.api/1.config.md b/docs/content/3.api/1.config.md index 0547c813..08be41d9 100644 --- a/docs/content/3.api/1.config.md +++ b/docs/content/3.api/1.config.md @@ -41,7 +41,8 @@ export default defineNuxtConfig({ userAgent: ['AdsBot-Google-Mobile', 'AdsBot-Google-Mobile-Apps'], disallow: ['/admin'], allow: ['/admin/login'], - contentUsage: ['ai=n', '/docs/ train-ai=y'], + contentUsage: { bots: 'y', 'train-ai': 'n' }, + contentSignal: { 'ai-train': 'no', search: 'yes' }, comments: 'Allow Google AdsBot to index the login page but no-admin pages' }, ] @@ -56,7 +57,8 @@ Each group object supports the following properties: - `userAgent?: string | string[]` - The user agent(s) to apply rules to. Defaults to `['*']` - `disallow?: string | string[]` - Paths to disallow for the user agent(s) - `allow?: string | string[]` - Paths to allow for the user agent(s) -- `contentUsage?: string | string[]` - Content Signals for AI usage preferences (see [Content Signals guide](/docs/robots/guides/robots-txt#content-signals)) +- `contentUsage?: string | string[] | Partial` - IETF Content-Usage directives for AI preferences. Valid categories: `bots`, `train-ai`, `ai-output`, `search`. Values: `y`/`n`. Use object format for type safety (see [AI Directives guide](/docs/robots/guides/ai-directives)) +- `contentSignal?: string | string[] | Partial` - Cloudflare Content-Signal directives for AI preferences. Valid categories: `search`, `ai-input`, `ai-train`. Values: `yes`/`no`. Use object format for type safety (see [AI Directives guide](/docs/robots/guides/ai-directives)) - `comment?: string | string[]` - Comments to include in the robots.txt file ## `sitemap: MaybeArray`{lang="ts"} diff --git a/src/runtime/types.ts b/src/runtime/types.ts index c56c7f7c..2470f815 100644 --- a/src/runtime/types.ts +++ b/src/runtime/types.ts @@ -41,6 +41,46 @@ export interface RobotDirectives { */ export type RobotsValue = boolean | string | Partial +/** + * Content-Usage preference value (y = allow, n = disallow) + * @see https://datatracker.ietf.org/doc/draft-ietf-aipref-vocab/ + */ +export type ContentUsageValue = 'y' | 'n' + +/** + * Content-Signal preference value (yes = allow, no = disallow) + * @see https://www.ietf.org/archive/id/draft-romm-aipref-contentsignals-00.html + */ +export type ContentSignalValue = 'yes' | 'no' + +/** + * Content-Usage categories (IETF aipref-vocab) + * @see https://ietf-wg-aipref.github.io/drafts/draft-ietf-aipref-vocab.html + */ +export interface ContentUsagePreferences { + /** Automated Processing */ + 'bots'?: ContentUsageValue + /** Foundation Model Production */ + 'train-ai'?: ContentUsageValue + /** AI Output */ + 'ai-output'?: ContentUsageValue + /** Search */ + 'search'?: ContentUsageValue +} + +/** + * Content-Signal categories (IETF aipref-contentsignals) + * @see https://www.ietf.org/archive/id/draft-romm-aipref-contentsignals-00.html + */ +export interface ContentSignalPreferences { + /** Search */ + 'search'?: ContentSignalValue + /** AI Input (RAG, grounding, generative AI search) */ + 'ai-input'?: ContentSignalValue + /** AI Training (training or fine-tuning models) */ + 'ai-train'?: ContentSignalValue +} + export interface ParsedRobotsTxt { groups: RobotsGroupResolved[] sitemaps: string[] @@ -55,7 +95,8 @@ export interface GoogleInput { disallow?: Arrayable allow?: Arrayable userAgent?: Arrayable - contentUsage?: Arrayable + contentUsage?: Arrayable | Partial + contentSignal?: Arrayable | Partial // nuxt-simple-robots internals _skipI18n?: boolean } @@ -72,8 +113,10 @@ export interface RobotsGroupResolved { host?: string // yandex only cleanParam?: string[] - // content signals / AI preferences + // https://datatracker.ietf.org/doc/draft-ietf-aipref-attach/ contentUsage?: string[] + // https://contentsignals.org/ + contentSignal?: string[] // nuxt-simple-robots internals _skipI18n?: boolean // runtime optimization diff --git a/src/util.ts b/src/util.ts index b924d175..5647f90b 100644 --- a/src/util.ts +++ b/src/util.ts @@ -47,6 +47,7 @@ export function parseRobotsTxt(s: string): ParsedRobotsTxt { allow: [], userAgent: [], contentUsage: [], + contentSignal: [], } let ln = -1 // read the contents @@ -76,6 +77,7 @@ export function parseRobotsTxt(s: string): ParsedRobotsTxt { allow: [], userAgent: [], contentUsage: [], + contentSignal: [], } createNewGroup = false } @@ -111,10 +113,13 @@ export function parseRobotsTxt(s: string): ParsedRobotsTxt { } break case 'content-usage': - case 'content-signal': currentGroup.contentUsage = currentGroup.contentUsage || [] currentGroup.contentUsage.push(val) break + case 'content-signal': + currentGroup.contentSignal = currentGroup.contentSignal || [] + currentGroup.contentSignal.push(val) + break default: errors.push(`L${ln}: Unknown directive ${rule} `) break @@ -148,24 +153,39 @@ function validateGroupRules(group: ParsedRobotsTxt['groups'][number], errors: st // Validate Content-Usage directives if (group.contentUsage) { + const validCategories = ['bots', 'train-ai', 'ai-output', 'search'] + const validValues = ['y', 'n'] + group.contentUsage.forEach((rule) => { if (rule === '') { errors.push(`Content-Usage rule cannot be empty.`) return } - // Basic validation for Content-Usage format // Format can be: "preference" or "/path preference" const parts = rule.trim().split(/\s+/) if (parts.length === 1) { - // Global preference like "ai=n" or "train-ai=n" + // Global preference like "bots=y" or "train-ai=n" if (!parts[0]?.includes('=')) { - errors.push(`Content-Usage rule "${rule}" must contain a preference assignment (e.g., "ai=n").`) + errors.push(`Content-Usage rule "${rule}" must contain a preference assignment (e.g., "train-ai=n").`) + return } + + // Validate category and value + const preferences = parts[0].split(',').map(p => p.trim()) + preferences.forEach((pref) => { + const [category, value] = pref.split('=').map(s => s.trim()) + if (!validCategories.includes(category || '')) { + errors.push(`Content-Usage category "${category}" is invalid. Valid categories: ${validCategories.join(', ')}.`) + } + if (!validValues.includes(value || '')) { + errors.push(`Content-Usage value "${value}" for "${category}" is invalid. Valid values: y, n.`) + } + }) } else if (parts.length >= 2) { - // Path-specific preference like "/path ai=n" + // Path-specific preference like "/path train-ai=n" const path = parts[0] const preference = parts.slice(1).join(' ') @@ -173,7 +193,81 @@ function validateGroupRules(group: ParsedRobotsTxt['groups'][number], errors: st errors.push(`Content-Usage path "${path}" must start with a \`/\`.`) } if (!preference.includes('=')) { - errors.push(`Content-Usage preference "${preference}" must contain an assignment (e.g., "ai=n").`) + errors.push(`Content-Usage preference "${preference}" must contain an assignment (e.g., "train-ai=n").`) + } + else { + // Validate category and value in path-specific rules + const preferences = preference.split(',').map(p => p.trim()) + preferences.forEach((pref) => { + const [category, value] = pref.split('=').map(s => s.trim()) + if (!validCategories.includes(category || '')) { + errors.push(`Content-Usage category "${category}" is invalid. Valid categories: ${validCategories.join(', ')}.`) + } + if (!validValues.includes(value || '')) { + errors.push(`Content-Usage value "${value}" for "${category}" is invalid. Valid values: y, n.`) + } + }) + } + } + }) + } + + // Validate Content-Signal directives + if (group.contentSignal) { + const validCategories = ['search', 'ai-input', 'ai-train'] + const validValues = ['yes', 'no'] + + group.contentSignal.forEach((rule) => { + if (rule === '') { + errors.push(`Content-Signal rule cannot be empty.`) + return + } + + // Format can be: "preference" or "/path preference" + const parts = rule.trim().split(/\s+/) + + if (parts.length === 1) { + // Global preference like "ai-train=no" or "search=yes" + if (!parts[0]?.includes('=')) { + errors.push(`Content-Signal rule "${rule}" must contain a preference assignment (e.g., "ai-train=no").`) + return + } + + // Validate category and value + const preferences = parts[0].split(',').map(p => p.trim()) + preferences.forEach((pref) => { + const [category, value] = pref.split('=').map(s => s.trim()) + if (!validCategories.includes(category || '')) { + errors.push(`Content-Signal category "${category}" is invalid. Valid categories: ${validCategories.join(', ')}.`) + } + if (!validValues.includes(value || '')) { + errors.push(`Content-Signal value "${value}" for "${category}" is invalid. Valid values: yes, no.`) + } + }) + } + else if (parts.length >= 2) { + // Path-specific preference like "/path ai-train=no" + const path = parts[0] + const preference = parts.slice(1).join(' ') + + if (!path?.startsWith('/')) { + errors.push(`Content-Signal path "${path}" must start with a \`/\`.`) + } + if (!preference.includes('=')) { + errors.push(`Content-Signal preference "${preference}" must contain an assignment (e.g., "ai-train=no").`) + } + else { + // Validate category and value in path-specific rules + const preferences = preference.split(',').map(p => p.trim()) + preferences.forEach((pref) => { + const [category, value] = pref.split('=').map(s => s.trim()) + if (!validCategories.includes(category || '')) { + errors.push(`Content-Signal category "${category}" is invalid. Valid categories: ${validCategories.join(', ')}.`) + } + if (!validValues.includes(value || '')) { + errors.push(`Content-Signal value "${value}" for "${category}" is invalid. Valid values: yes, no.`) + } + }) } } }) @@ -264,6 +358,40 @@ export function asArray(v: any) { return typeof v === 'undefined' ? [] : (Array.isArray(v) ? v : [v]) } +/** + * Convert ContentUsagePreferences object to string format + */ +function contentUsageToString(prefs: Record): string { + return Object.entries(prefs) + .filter(([_, value]) => value !== undefined) + .map(([key, value]) => `${key}=${value}`) + .join(', ') +} + +/** + * Normalize contentUsage/contentSignal to string array format + */ +function normalizeContentPreferences(value: any): string[] { + if (!value) + return [] + + // If it's already an array of strings, filter and return + if (Array.isArray(value)) + return value.filter(rule => Boolean(rule)) + + // If it's an object (ContentUsagePreferences or ContentSignalPreferences) + if (typeof value === 'object' && !Array.isArray(value)) { + const str = contentUsageToString(value) + return str ? [str] : [] + } + + // If it's a single string + if (typeof value === 'string') + return value ? [value] : [] + + return [] +} + export function normalizeGroup(group: RobotsGroupInput | RobotsGroupResolved): RobotsGroupResolved { // quick renormalization check if ((group as RobotsGroupResolved)._normalized) { @@ -278,13 +406,15 @@ export function normalizeGroup(group: RobotsGroupInput | RobotsGroupResolved): R } const disallow = asArray(group.disallow) // we can have empty disallow const allow = asArray(group.allow).filter(rule => Boolean(rule)) - const contentUsage = asArray(group.contentUsage).filter(rule => Boolean(rule)) + const contentUsage = normalizeContentPreferences(group.contentUsage) + const contentSignal = normalizeContentPreferences(group.contentSignal) return { ...group, userAgent: group.userAgent ? asArray(group.userAgent) : ['*'], disallow, allow, contentUsage, + contentSignal, _indexable: !disallow.includes('/'), _rules: [ ...disallow.filter(Boolean).map(r => ({ pattern: r, allow: false })), @@ -317,12 +447,16 @@ export function generateRobotsTxt({ groups, sitemaps }: { groups: RobotsGroupRes for (const cleanParam of group.cleanParam || []) lines.push(`Clean-param: ${cleanParam}`) - // content signals / AI preferences - // Both Content-Usage (IETF) and Content-Signal (Cloudflare) are accepted as input, output as Content-Usage + // AI preferences - IETF aipref-vocab // See: https://datatracker.ietf.org/doc/draft-ietf-aipref-attach/ for (const contentUsage of group.contentUsage || []) lines.push(`Content-Usage: ${contentUsage}`) + // AI preferences - IETF aipref-contentsignals + // See: https://www.ietf.org/archive/id/draft-romm-aipref-contentsignals-00.html + for (const contentSignal of group.contentSignal || []) + lines.push(`Content-Signal: ${contentSignal}`) + lines.push('') // seperator } // add sitemaps diff --git a/test/unit/contentSignal.test.ts b/test/unit/contentSignal.test.ts new file mode 100644 index 00000000..a67c7c14 --- /dev/null +++ b/test/unit/contentSignal.test.ts @@ -0,0 +1,477 @@ +import type { ContentSignalPreferences, ContentUsagePreferences } from '../../src/runtime/types' +import { describe, expect, it } from 'vitest' +import { generateRobotsTxt, normalizeGroup, parseRobotsTxt, validateRobots } from '../../src/runtime/util' + +describe('contentSignal', () => { + describe('type safety', () => { + it('contentUsagePreferences type', () => { + const prefs: ContentUsagePreferences = { + 'bots': 'y', + 'train-ai': 'n', + 'ai-output': 'y', + 'search': 'n', + } + expect(prefs).toBeDefined() + }) + + it('contentSignalPreferences type', () => { + const prefs: ContentSignalPreferences = { + 'search': 'yes', + 'ai-input': 'no', + 'ai-train': 'no', + } + expect(prefs).toBeDefined() + }) + }) + + describe('parsing', () => { + it('parses Content-Signal separately from Content-Usage', () => { + const robotsTxt = ` +User-Agent: * +Allow: / +Content-Usage: bots=y, train-ai=n +Content-Signal: ai-train=no, search=yes + ` + const parsed = parseRobotsTxt(robotsTxt) + expect(parsed.groups[0]?.contentUsage).toEqual(['bots=y, train-ai=n']) + expect(parsed.groups[0]?.contentSignal).toEqual(['ai-train=no, search=yes']) + }) + + it('handles path-specific Content-Signal rules', () => { + const robotsTxt = ` +User-Agent: * +Allow: / +Content-Signal: /api/ ai-train=no +Content-Signal: /public/ search=yes, ai-input=yes + ` + const parsed = parseRobotsTxt(robotsTxt) + expect(parsed.groups[0]?.contentSignal).toEqual([ + '/api/ ai-train=no', + '/public/ search=yes, ai-input=yes', + ]) + }) + + it('handles multiple Content-Signal directives', () => { + const robotsTxt = ` +User-Agent: * +Allow: / +Content-Signal: ai-train=no +Content-Signal: search=yes +Content-Signal: ai-input=no + ` + const parsed = parseRobotsTxt(robotsTxt) + expect(parsed.groups[0]?.contentSignal).toEqual([ + 'ai-train=no', + 'search=yes', + 'ai-input=no', + ]) + }) + + it('handles mixed case directive names', () => { + const robotsTxt = ` +User-Agent: * +Allow: / +content-signal: ai-train=no +Content-Signal: search=yes +CONTENT-SIGNAL: ai-input=no + ` + const parsed = parseRobotsTxt(robotsTxt) + expect(parsed.groups[0]?.contentSignal).toEqual([ + 'ai-train=no', + 'search=yes', + 'ai-input=no', + ]) + }) + }) + + describe('validation', () => { + it('validates Content-Signal categories', () => { + const { errors } = validateRobots({ + errors: [], + sitemaps: [], + groups: [ + { + allow: ['/'], + comment: [], + disallow: [], + userAgent: ['*'], + contentSignal: [ + 'invalid-category=yes', + ], + }, + ], + }) + expect(errors).toContain('Content-Signal category "invalid-category" is invalid. Valid categories: search, ai-input, ai-train.') + }) + + it('validates Content-Signal values', () => { + const { errors } = validateRobots({ + errors: [], + sitemaps: [], + groups: [ + { + allow: ['/'], + comment: [], + disallow: [], + userAgent: ['*'], + contentSignal: [ + 'ai-train=y', + 'search=n', + ], + }, + ], + }) + expect(errors).toContain('Content-Signal value "y" for "ai-train" is invalid. Valid values: yes, no.') + expect(errors).toContain('Content-Signal value "n" for "search" is invalid. Valid values: yes, no.') + }) + + it('validates Content-Signal must have equals sign', () => { + const { errors } = validateRobots({ + errors: [], + sitemaps: [], + groups: [ + { + allow: ['/'], + comment: [], + disallow: [], + userAgent: ['*'], + contentSignal: [ + 'ai-train', + ], + }, + ], + }) + expect(errors).toContain('Content-Signal rule "ai-train" must contain a preference assignment (e.g., "ai-train=no").') + }) + + it('validates path must start with slash', () => { + const { errors } = validateRobots({ + errors: [], + sitemaps: [], + groups: [ + { + allow: ['/'], + comment: [], + disallow: [], + userAgent: ['*'], + contentSignal: [ + 'api/ ai-train=no', + ], + }, + ], + }) + expect(errors).toContain('Content-Signal path "api/" must start with a `/`.') + }) + + it('validates empty Content-Signal rules', () => { + const { errors } = validateRobots({ + errors: [], + sitemaps: [], + groups: [ + { + allow: ['/'], + comment: [], + disallow: [], + userAgent: ['*'], + contentSignal: [ + '', + ], + }, + ], + }) + expect(errors).toContain('Content-Signal rule cannot be empty.') + }) + + it('validates multiple preferences in one line', () => { + const { errors } = validateRobots({ + errors: [], + sitemaps: [], + groups: [ + { + allow: ['/'], + comment: [], + disallow: [], + userAgent: ['*'], + contentSignal: [ + 'ai-train=no, search=yes, invalid=yes', + ], + }, + ], + }) + expect(errors).toContain('Content-Signal category "invalid" is invalid. Valid categories: search, ai-input, ai-train.') + }) + + it('accepts valid Content-Signal rules', () => { + const { errors } = validateRobots({ + errors: [], + sitemaps: [], + groups: [ + { + allow: ['/'], + comment: [], + disallow: [], + userAgent: ['*'], + contentSignal: [ + 'ai-train=no', + 'search=yes', + 'ai-input=no', + '/api/ ai-train=no', + '/public/ search=yes, ai-input=yes', + ], + }, + ], + }) + expect(errors).toHaveLength(0) + }) + }) + + describe('normalization', () => { + it('normalizes contentSignal from string to array', () => { + const group = { + userAgent: ['*'], + allow: ['/'], + disallow: [], + comment: [], + contentSignal: 'ai-train=no', + } + // @ts-expect-error - testing runtime normalization + const normalized = normalizeGroup(group) + expect(normalized.contentSignal).toEqual(['ai-train=no']) + }) + + it('normalizes contentSignal from object to array', () => { + const group = { + userAgent: ['*'], + allow: ['/'], + disallow: [], + comment: [], + contentSignal: { + 'ai-train': 'no', + 'search': 'yes', + 'ai-input': 'no', + }, + } + const normalized = normalizeGroup(group) + expect(normalized.contentSignal).toEqual(['ai-train=no, search=yes, ai-input=no']) + }) + + it('normalizes contentUsage from object to array', () => { + const group = { + userAgent: ['*'], + allow: ['/'], + disallow: [], + comment: [], + contentUsage: { + 'bots': 'y', + 'train-ai': 'n', + }, + } + const normalized = normalizeGroup(group) + expect(normalized.contentUsage).toEqual(['bots=y, train-ai=n']) + }) + + it('handles partial object preferences', () => { + const group = { + userAgent: ['*'], + allow: ['/'], + disallow: [], + comment: [], + contentSignal: { + 'ai-train': 'no', + }, + } + const normalized = normalizeGroup(group) + expect(normalized.contentSignal).toEqual(['ai-train=no']) + }) + + it('filters empty contentSignal rules', () => { + const group = { + userAgent: ['*'], + allow: ['/'], + disallow: [], + comment: [], + contentSignal: ['ai-train=no', '', 'search=yes'], + } + const normalized = normalizeGroup(group) + expect(normalized.contentSignal).toEqual(['ai-train=no', 'search=yes']) + }) + + it('handles undefined contentSignal', () => { + const group = { + userAgent: ['*'], + allow: ['/'], + disallow: [], + comment: [], + } + const normalized = normalizeGroup(group) + expect(normalized.contentSignal).toEqual([]) + }) + }) + + describe('generation', () => { + it('generates Content-Signal directives', () => { + const robotsData = { + groups: [ + { + userAgent: ['*'], + allow: ['/'], + disallow: [], + comment: [], + contentSignal: [ + 'ai-train=no', + 'search=yes', + 'ai-input=no', + ], + }, + ], + sitemaps: [], + } + + const generated = generateRobotsTxt(robotsData) + expect(generated).toContain('Content-Signal: ai-train=no') + expect(generated).toContain('Content-Signal: search=yes') + expect(generated).toContain('Content-Signal: ai-input=no') + }) + + it('generates both Content-Usage and Content-Signal', () => { + const robotsData = { + groups: [ + { + userAgent: ['*'], + allow: ['/'], + disallow: [], + comment: [], + contentUsage: [ + 'bots=y', + 'train-ai=n', + ], + contentSignal: [ + 'ai-train=no', + 'search=yes', + ], + }, + ], + sitemaps: [], + } + + const generated = generateRobotsTxt(robotsData) + expect(generated).toContain('Content-Usage: bots=y') + expect(generated).toContain('Content-Usage: train-ai=n') + expect(generated).toContain('Content-Signal: ai-train=no') + expect(generated).toContain('Content-Signal: search=yes') + }) + + it('maintains order of directives', () => { + const robotsData = { + groups: [ + { + userAgent: ['*'], + allow: ['/'], + disallow: [], + comment: [], + contentUsage: ['bots=y'], + contentSignal: ['ai-train=no'], + }, + ], + sitemaps: [], + } + + const generated = generateRobotsTxt(robotsData) + const lines = generated.split('\n') + const contentUsageIndex = lines.findIndex(line => line.startsWith('Content-Usage:')) + const contentSignalIndex = lines.findIndex(line => line.startsWith('Content-Signal:')) + expect(contentUsageIndex).toBeLessThan(contentSignalIndex) + }) + + it('generates from object format', () => { + const robotsData = { + groups: [ + { + userAgent: ['*'], + allow: ['/'], + disallow: [], + comment: [], + contentUsage: { + 'bots': 'y', + 'train-ai': 'n', + }, + contentSignal: { + 'ai-train': 'no', + 'search': 'yes', + }, + }, + ], + sitemaps: [], + } + + const normalized = normalizeGroup(robotsData.groups[0]) + const generated = generateRobotsTxt({ groups: [normalized], sitemaps: [] }) + expect(generated).toContain('Content-Usage: bots=y, train-ai=n') + expect(generated).toContain('Content-Signal: ai-train=no, search=yes') + }) + }) + + describe('round-trip parsing and generation', () => { + it('parses and regenerates Content-Signal correctly', () => { + const original = `User-agent: * +Allow: / +Content-Signal: ai-train=no +Content-Signal: search=yes, ai-input=yes + +` + const parsed = parseRobotsTxt(original) + const generated = generateRobotsTxt(parsed) + expect(generated.trim()).toEqual(original.trim()) + }) + + it('parses and regenerates mixed Content-Usage and Content-Signal', () => { + const original = `User-agent: * +Allow: / +Content-Usage: bots=y +Content-Signal: ai-train=no + +` + const parsed = parseRobotsTxt(original) + const generated = generateRobotsTxt(parsed) + expect(generated.trim()).toEqual(original.trim()) + }) + }) + + describe('edge cases', () => { + it('handles Content-Signal with spaces in values', () => { + const robotsTxt = ` +User-Agent: * +Allow: / +Content-Signal: ai-train=no, search=yes + ` + const parsed = parseRobotsTxt(robotsTxt) + expect(parsed.groups[0]?.contentSignal).toEqual(['ai-train=no, search=yes']) + }) + + it('handles Content-Signal with comments', () => { + const robotsTxt = ` +User-Agent: * +Allow: / +Content-Signal: ai-train=no # no AI training + ` + const parsed = parseRobotsTxt(robotsTxt) + // Comments are stripped during parsing + expect(parsed.groups[0]?.contentSignal).toEqual(['ai-train=no']) + }) + + it('handles multiple user agents with different Content-Signal rules', () => { + const robotsTxt = ` +User-Agent: GPTBot +Disallow: /api/ +Content-Signal: ai-train=no + +User-Agent: * +Allow: / +Content-Signal: ai-train=yes + ` + const parsed = parseRobotsTxt(robotsTxt) + expect(parsed.groups[0]?.contentSignal).toEqual(['ai-train=no']) + expect(parsed.groups[1]?.contentSignal).toEqual(['ai-train=yes']) + }) + }) +}) diff --git a/test/unit/generateRobotsTxt.test.ts b/test/unit/generateRobotsTxt.test.ts index 643a5e60..46b11e68 100644 --- a/test/unit/generateRobotsTxt.test.ts +++ b/test/unit/generateRobotsTxt.test.ts @@ -53,9 +53,9 @@ describe('generateRobotsTxt', () => { disallow: [], comment: [], contentUsage: [ - 'ai=n', + 'bots=y', '/public/ train-ai=y', - '/restricted/ ai=n train-ai=n', + '/restricted/ train-ai=n, ai-output=n', ], }, ], @@ -66,9 +66,73 @@ describe('generateRobotsTxt', () => { expect(generated).toMatchInlineSnapshot(` "User-agent: * Allow: / - Content-Usage: ai=n + Content-Usage: bots=y Content-Usage: /public/ train-ai=y - Content-Usage: /restricted/ ai=n train-ai=n + Content-Usage: /restricted/ train-ai=n, ai-output=n + + Sitemap: https://example.com/sitemap.xml" + `) + }) + + it('content-signal generation', () => { + const robotsData = { + groups: [ + { + userAgent: ['*'], + allow: ['/'], + disallow: [], + comment: [], + contentSignal: [ + 'ai-train=no', + '/public/ search=yes, ai-input=yes', + '/restricted/ ai-train=no, ai-input=no', + ], + }, + ], + sitemaps: ['https://example.com/sitemap.xml'], + } + + const generated = generateRobotsTxt(robotsData) + expect(generated).toMatchInlineSnapshot(` + "User-agent: * + Allow: / + Content-Signal: ai-train=no + Content-Signal: /public/ search=yes, ai-input=yes + Content-Signal: /restricted/ ai-train=no, ai-input=no + + Sitemap: https://example.com/sitemap.xml" + `) + }) + + it('mixed content-usage and content-signal generation', () => { + const robotsData = { + groups: [ + { + userAgent: ['*'], + allow: ['/'], + disallow: [], + comment: [], + contentUsage: [ + 'bots=y', + 'train-ai=n', + ], + contentSignal: [ + 'ai-train=no', + 'search=yes', + ], + }, + ], + sitemaps: ['https://example.com/sitemap.xml'], + } + + const generated = generateRobotsTxt(robotsData) + expect(generated).toMatchInlineSnapshot(` + "User-agent: * + Allow: / + Content-Usage: bots=y + Content-Usage: train-ai=n + Content-Signal: ai-train=no + Content-Signal: search=yes Sitemap: https://example.com/sitemap.xml" `) diff --git a/test/unit/robotsTxtParser.test.ts b/test/unit/robotsTxtParser.test.ts index 1d3eaf34..4ee42be4 100644 --- a/test/unit/robotsTxtParser.test.ts +++ b/test/unit/robotsTxtParser.test.ts @@ -13,6 +13,7 @@ describe('robotsTxtParser', () => { { "allow": [], "comment": [], + "contentSignal": [], "contentUsage": [], "disallow": [ "/wp-json/", @@ -28,6 +29,7 @@ describe('robotsTxtParser', () => { { "allow": [], "comment": [], + "contentSignal": [], "contentUsage": [], "disallow": [ "/", @@ -68,6 +70,7 @@ describe('robotsTxtParser', () => { "/api/ui-extensions/", ], "comment": [], + "contentSignal": [], "contentUsage": [], "disallow": [ "/config", @@ -107,6 +110,7 @@ describe('robotsTxtParser', () => { "/api/ui-extensions/", ], "comment": [], + "contentSignal": [], "contentUsage": [], "disallow": [ "/config", @@ -146,6 +150,7 @@ describe('robotsTxtParser', () => { "/api/ui-extensions/", ], "comment": [], + "contentSignal": [], "contentUsage": [], "disallow": [ "/config", @@ -185,6 +190,7 @@ describe('robotsTxtParser', () => { "/api/ui-extensions/", ], "comment": [], + "contentSignal": [], "contentUsage": [], "disallow": [ "/config", @@ -237,6 +243,7 @@ describe('robotsTxtParser', () => { { "allow": [], "comment": [], + "contentSignal": [], "contentUsage": [], "disallow": [ "", @@ -264,6 +271,7 @@ describe('robotsTxtParser', () => { { "allow": [], "comment": [], + "contentSignal": [], "contentUsage": [], "disallow": [ "/cdn-cgi/challenge-platform/", @@ -279,6 +287,7 @@ describe('robotsTxtParser', () => { "s /forum/showthread.php", ], "comment": [], + "contentSignal": [], "contentUsage": [], "disallow": [ "", @@ -307,6 +316,7 @@ describe('robotsTxtParser', () => { "/bar", ], "comment": [], + "contentSignal": [], "contentUsage": [], "disallow": [ "/foo", @@ -320,6 +330,7 @@ describe('robotsTxtParser', () => { "/boo", ], "comment": [], + "contentSignal": [], "contentUsage": [], "disallow": [ "/baz", @@ -331,6 +342,7 @@ describe('robotsTxtParser', () => { { "allow": [], "comment": [], + "contentSignal": [], "contentUsage": [], "disallow": [ "/invalid", @@ -342,6 +354,7 @@ describe('robotsTxtParser', () => { { "allow": [], "comment": [], + "contentSignal": [], "contentUsage": [], "disallow": [ "/star", @@ -380,6 +393,7 @@ Unknown: /bar "/", ], "comment": [], + "contentSignal": [], "contentUsage": [], "disallow": [], "userAgent": [ @@ -396,9 +410,9 @@ Unknown: /bar const robotsTxt = ` User-Agent: * Allow: / -Content-Usage: ai=n +Content-Usage: bots=y Content-Usage: /public/ train-ai=y -Content-Usage: /restricted/ ai=n train-ai=n +Content-Usage: /restricted/ train-ai=n, ai-output=n ` expect(parseRobotsTxt(robotsTxt)).toMatchInlineSnapshot(` { @@ -409,10 +423,11 @@ Content-Usage: /restricted/ ai=n train-ai=n "/", ], "comment": [], + "contentSignal": [], "contentUsage": [ - "ai=n", + "bots=y", "/public/ train-ai=y", - "/restricted/ ai=n train-ai=n", + "/restricted/ train-ai=n, ai-output=n", ], "disallow": [], "userAgent": [ @@ -439,7 +454,9 @@ Content-Usage: const robotsTxt = ` User-Agent: * Allow: / -Content-Signal: ai-train=no, search=yes, ai-input=yes +Content-Signal: ai-train=no +Content-Signal: /public/ search=yes, ai-input=yes +Content-Signal: /restricted/ ai-train=no, ai-input=no ` expect(parseRobotsTxt(robotsTxt)).toMatchInlineSnapshot(` { @@ -450,9 +467,12 @@ Content-Signal: ai-train=no, search=yes, ai-input=yes "/", ], "comment": [], - "contentUsage": [ - "ai-train=no, search=yes, ai-input=yes", + "contentSignal": [ + "ai-train=no", + "/public/ search=yes, ai-input=yes", + "/restricted/ ai-train=no, ai-input=no", ], + "contentUsage": [], "disallow": [], "userAgent": [ "*", diff --git a/test/unit/robotsTxtValidator.test.ts b/test/unit/robotsTxtValidator.test.ts index 5e9a437d..7a74b394 100644 --- a/test/unit/robotsTxtValidator.test.ts +++ b/test/unit/robotsTxtValidator.test.ts @@ -60,20 +60,57 @@ describe('robotsTxtValidator', () => { disallow: [], userAgent: ['*'], contentUsage: [ - 'ai=n', + 'bots=y', '/public/ train-ai=y', 'invalid-preference', - 'invalid-path ai=n', + 'invalid-path train-ai=n', '', + 'invalid-cat=y', + 'train-ai=maybe', ], }, ], }) expect(errors).toMatchInlineSnapshot(` [ - "Content-Usage rule "invalid-preference" must contain a preference assignment (e.g., "ai=n").", + "Content-Usage rule "invalid-preference" must contain a preference assignment (e.g., "train-ai=n").", "Content-Usage path "invalid-path" must start with a \`/\`.", "Content-Usage rule cannot be empty.", + "Content-Usage category "invalid-cat" is invalid. Valid categories: bots, train-ai, ai-output, search.", + "Content-Usage value "maybe" for "train-ai" is invalid. Valid values: y, n.", + ] + `) + }) + + it('content-signal validation', () => { + const { errors } = validateRobots({ + errors: [], + sitemaps: [], + groups: [ + { + allow: ['/'], + comment: [], + disallow: [], + userAgent: ['*'], + contentSignal: [ + 'ai-train=no', + '/public/ search=yes, ai-input=yes', + 'invalid-preference', + 'invalid-path ai-train=no', + '', + 'invalid-cat=yes', + 'ai-train=maybe', + ], + }, + ], + }) + expect(errors).toMatchInlineSnapshot(` + [ + "Content-Signal rule "invalid-preference" must contain a preference assignment (e.g., "ai-train=no").", + "Content-Signal path "invalid-path" must start with a \`/\`.", + "Content-Signal rule cannot be empty.", + "Content-Signal category "invalid-cat" is invalid. Valid categories: search, ai-input, ai-train.", + "Content-Signal value "maybe" for "ai-train" is invalid. Valid values: yes, no.", ] `) })