diff --git a/src/http/routes/object/createObject.ts b/src/http/routes/object/createObject.ts index 78a959981..b3c87e85e 100644 --- a/src/http/routes/object/createObject.ts +++ b/src/http/routes/object/createObject.ts @@ -32,6 +32,7 @@ interface createObjectRequestInterface extends RequestGenericInterface { 'content-type': string 'cache-control'?: string 'x-upsert'?: string + 'x-robots-tag'?: string } } diff --git a/src/http/routes/object/getObject.ts b/src/http/routes/object/getObject.ts index 4c4a96902..2e4b01080 100644 --- a/src/http/routes/object/getObject.ts +++ b/src/http/routes/object/getObject.ts @@ -71,10 +71,13 @@ async function requestHandler( if (bucket.public) { // request is authenticated but we still use the superUser as we don't need to check RLS - obj = await request.storage.asSuperUser().from(bucketName).findObject(objectName, 'id, version') + obj = await request.storage + .asSuperUser() + .from(bucketName) + .findObject(objectName, 'id, version, metadata') } else { // request is authenticated use RLS - obj = await request.storage.from(bucketName).findObject(objectName, 'id, version') + obj = await request.storage.from(bucketName).findObject(objectName, 'id, version, metadata') } return request.storage.renderer('asset').render(request, response, { @@ -82,6 +85,7 @@ async function requestHandler( key: s3Key, version: obj.version, download, + xRobotsTag: obj.metadata?.['xRobotsTag'] as string | undefined, signal: request.signals.disconnect.signal, }) } @@ -95,6 +99,7 @@ export default async function routes(fastify: FastifyInstance) { // @todo add success response schema here schema: { params: getObjectParamsSchema, + querystring: getObjectQuerySchema, headers: { $ref: 'authSchema#' }, summary, response: { '4xx': { $ref: 'errorSchema#', description: 'Error response' } }, diff --git a/src/http/routes/object/getPublicObject.ts b/src/http/routes/object/getPublicObject.ts index 7b4a63f30..efa626ebc 100644 --- a/src/http/routes/object/getPublicObject.ts +++ b/src/http/routes/object/getPublicObject.ts @@ -38,6 +38,7 @@ export default async function routes(fastify: FastifyInstance) { exposeHeadRoute: false, schema: { params: getPublicObjectParamsSchema, + querystring: getObjectQuerySchema, summary, response: { '4xx': { $ref: 'errorSchema#', description: 'Error response' } }, tags: ['object'], @@ -55,7 +56,10 @@ export default async function routes(fastify: FastifyInstance) { request.storage.asSuperUser().findBucket(bucketName, 'id,public', { isPublic: true, }), - request.storage.asSuperUser().from(bucketName).findObject(objectName, 'id,version'), + request.storage + .asSuperUser() + .from(bucketName) + .findObject(objectName, 'id,version,metadata'), ]) // send the object from s3 @@ -70,6 +74,7 @@ export default async function routes(fastify: FastifyInstance) { key: s3Key, version: obj.version, download, + xRobotsTag: obj.metadata?.['xRobotsTag'] as string | undefined, signal: request.signals.disconnect.signal, }) } diff --git a/src/http/routes/object/getSignedObject.ts b/src/http/routes/object/getSignedObject.ts index fec7f34e2..11ab950b5 100644 --- a/src/http/routes/object/getSignedObject.ts +++ b/src/http/routes/object/getSignedObject.ts @@ -83,7 +83,7 @@ export default async function routes(fastify: FastifyInstance) { const obj = await request.storage .asSuperUser() .from(bucketName) - .findObject(objParts.join('/'), 'id,version') + .findObject(objParts.join('/'), 'id,version,metadata') return request.storage.renderer('asset').render(request, response, { bucket: storageS3Bucket, @@ -91,6 +91,7 @@ export default async function routes(fastify: FastifyInstance) { version: obj.version, download, expires: new Date(exp * 1000).toUTCString(), + xRobotsTag: obj.metadata?.['xRobotsTag'] as string | undefined, signal: request.signals.disconnect.signal, }) } diff --git a/src/http/routes/object/updateObject.ts b/src/http/routes/object/updateObject.ts index d68d04669..4887bb189 100644 --- a/src/http/routes/object/updateObject.ts +++ b/src/http/routes/object/updateObject.ts @@ -29,6 +29,7 @@ interface updateObjectRequestInterface extends RequestGenericInterface { 'content-type': string 'cache-control'?: string 'x-upsert'?: string + 'x-robots-tag'?: string } } diff --git a/src/http/routes/render/renderAuthenticatedImage.ts b/src/http/routes/render/renderAuthenticatedImage.ts index e681ffcd2..3673d3f56 100644 --- a/src/http/routes/render/renderAuthenticatedImage.ts +++ b/src/http/routes/render/renderAuthenticatedImage.ts @@ -51,7 +51,9 @@ export default async function routes(fastify: FastifyInstance) { const { bucketName } = request.params const objectName = request.params['*'] - const obj = await request.storage.from(bucketName).findObject(objectName, 'id,version') + const obj = await request.storage + .from(bucketName) + .findObject(objectName, 'id,version,metadata') const s3Key = request.storage.location.getKeyLocation({ tenantId: request.tenantId, @@ -73,6 +75,7 @@ export default async function routes(fastify: FastifyInstance) { key: s3Key, version: obj.version, download, + xRobotsTag: obj.metadata?.['xRobotsTag'] as string | undefined, signal: request.signals.disconnect.signal, }) } diff --git a/src/http/routes/render/renderPublicImage.ts b/src/http/routes/render/renderPublicImage.ts index 64f175e18..10f82a2bc 100644 --- a/src/http/routes/render/renderPublicImage.ts +++ b/src/http/routes/render/renderPublicImage.ts @@ -55,7 +55,10 @@ export default async function routes(fastify: FastifyInstance) { request.storage.asSuperUser().findBucket(bucketName, 'id,public', { isPublic: true, }), - request.storage.asSuperUser().from(bucketName).findObject(objectName, 'id,version'), + request.storage + .asSuperUser() + .from(bucketName) + .findObject(objectName, 'id,version,metadata'), ]) const s3Key = `${request.tenantId}/${bucketName}/${objectName}` @@ -74,6 +77,7 @@ export default async function routes(fastify: FastifyInstance) { key: s3Key, version: obj.version, download, + xRobotsTag: obj.metadata?.['xRobotsTag'] as string | undefined, signal: request.signals.disconnect.signal, }) } diff --git a/src/http/routes/render/renderSignedImage.ts b/src/http/routes/render/renderSignedImage.ts index b451b7907..a2bb67e8e 100644 --- a/src/http/routes/render/renderSignedImage.ts +++ b/src/http/routes/render/renderSignedImage.ts @@ -83,7 +83,7 @@ export default async function routes(fastify: FastifyInstance) { const obj = await request.storage .asSuperUser() .from(bucketName) - .findObject(objParts.join('/'), 'id,version') + .findObject(objParts.join('/'), 'id,version,metadata') const renderer = request.storage.renderer('image') as ImageRenderer @@ -102,6 +102,7 @@ export default async function routes(fastify: FastifyInstance) { version: obj.version, download, expires: new Date(exp * 1000).toUTCString(), + xRobotsTag: obj.metadata?.['xRobotsTag'] as string | undefined, signal: request.signals.disconnect.signal, }) } diff --git a/src/internal/errors/codes.ts b/src/internal/errors/codes.ts index e1c63dd88..07f4177d8 100644 --- a/src/internal/errors/codes.ts +++ b/src/internal/errors/codes.ts @@ -235,6 +235,14 @@ export const ERRORS = { message: `mime type ${mimeType} is not supported`, }), + InvalidXRobotsTag: (message: string) => + new StorageBackendError({ + error: 'invalid_x_robots_tag', + code: ErrorCode.InvalidRequest, + httpStatusCode: 400, + message: `Invalid X-Robots-Tag header: ${message}`, + }), + InvalidRange: () => new StorageBackendError({ error: 'invalid_range', diff --git a/src/storage/backend/adapter.ts b/src/storage/backend/adapter.ts index 944662290..a897283be 100644 --- a/src/storage/backend/adapter.ts +++ b/src/storage/backend/adapter.ts @@ -31,6 +31,7 @@ export type ObjectMetadata = { eTag: string contentRange?: string httpStatusCode?: number + xRobotsTag?: string } export type UploadPart = { diff --git a/src/storage/renderer/renderer.ts b/src/storage/renderer/renderer.ts index bb6d0f38e..2a9c83b2b 100644 --- a/src/storage/renderer/renderer.ts +++ b/src/storage/renderer/renderer.ts @@ -3,6 +3,7 @@ import { ObjectMetadata } from '../backend' import { Readable } from 'stream' import { getConfig } from '../../config' import { Obj } from '../schemas' +import { validateXRobotsTag } from '@storage/validators/x-robots-tag' export interface RenderOptions { bucket: string @@ -10,6 +11,7 @@ export interface RenderOptions { version: string | undefined download?: string expires?: string + xRobotsTag?: string object?: Obj signal?: AbortSignal } @@ -73,6 +75,15 @@ export abstract class Renderer { data: AssetResponse, options: RenderOptions ) { + let xRobotsTag = 'none' + if (options.xRobotsTag) { + try { + // allow overriding x-robots-tag header only with valid values + validateXRobotsTag(options.xRobotsTag) + xRobotsTag = options.xRobotsTag + } catch {} + } + response .status(data.metadata.httpStatusCode ?? 200) .header('Accept-Ranges', 'bytes') @@ -80,6 +91,7 @@ export abstract class Renderer { .header('ETag', data.metadata.eTag) .header('Content-Length', data.metadata.contentLength) .header('Last-Modified', data.metadata.lastModified?.toUTCString()) + .header('X-Robots-Tag', xRobotsTag) if (options.expires) { response.header('Expires', options.expires) diff --git a/src/storage/uploader.ts b/src/storage/uploader.ts index 4bad81cd3..efa3b8a7d 100644 --- a/src/storage/uploader.ts +++ b/src/storage/uploader.ts @@ -12,6 +12,7 @@ import { getConfig } from '../config' import { logger, logSchema } from '@internal/monitoring' import { Readable } from 'stream' import { StorageObjectLocator } from '@storage/locator' +import { validateXRobotsTag } from './validators/x-robots-tag' const { storageS3Bucket, uploadFileSizeLimitStandard } = getConfig() @@ -20,7 +21,8 @@ interface FileUpload { mimeType: string cacheControl: string isTruncated: () => boolean - userMetadata?: Record + xRobotsTag?: string + userMetadata?: Record } export interface UploadRequest { @@ -112,6 +114,10 @@ export class Uploader { request.signal ) + if (request.file.xRobotsTag) { + objectMetadata.xRobotsTag = request.file.xRobotsTag + } + if (file.isTruncated()) { throw ERRORS.EntityTooLarge() } @@ -301,9 +307,14 @@ export async function fileUploadFromRequest( } ): Promise { const contentType = request.headers['content-type'] + const xRobotsTag = request.headers['x-robots-tag'] as string | undefined + + if (xRobotsTag) { + validateXRobotsTag(xRobotsTag) + } let body: Readable - let userMetadata: Record | undefined + let userMetadata: Record | undefined let mimeType: string let isTruncated: () => boolean let maxFileSize = 0 @@ -349,7 +360,7 @@ export async function fileUploadFromRequest( try { userMetadata = JSON.parse(customMd) - } catch (e) { + } catch { // no-op } } @@ -388,6 +399,7 @@ export async function fileUploadFromRequest( isTruncated, userMetadata, maxFileSize, + xRobotsTag, } } @@ -395,7 +407,7 @@ export function parseUserMetadata(metadata: string) { try { const json = Buffer.from(metadata, 'base64').toString('utf8') return JSON.parse(json) as Record - } catch (e) { + } catch { // no-op return undefined } diff --git a/src/storage/validators/x-robots-tag.ts b/src/storage/validators/x-robots-tag.ts new file mode 100644 index 000000000..5f3ce914f --- /dev/null +++ b/src/storage/validators/x-robots-tag.ts @@ -0,0 +1,188 @@ +import { ERRORS } from '@internal/errors' + +const SIMPLE_RULES = [ + 'all', + 'noindex', + 'nofollow', + 'none', + 'nosnippet', + 'indexifembedded', + 'notranslate', + 'noimageindex', +] as const + +const PARAMETRIC_RULES = [ + 'max-snippet', + 'max-image-preview', + 'max-video-preview', + 'unavailable_after', +] as const + +const simpleRulesPattern = SIMPLE_RULES.join('|') +const parametricRulesPattern = PARAMETRIC_RULES.join('|') +const SIMPLE_RULE_REGEX = new RegExp(`^(${simpleRulesPattern})$`) +const PARAMETRIC_RULE_REGEX = new RegExp(`^(${parametricRulesPattern}):\\s*(.*)$`) +const PARAMETRIC_RULE_START_REGEX = new RegExp(`^(${parametricRulesPattern}):`) +const VALID_IMAGE_PREVIEW_VALUES = new Set(['none', 'standard', 'large']) + +/** + * Validates the X-Robots-Tag header value according to MDN specification + * @see https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Robots-Tag + * + * @param value - The X-Robots-Tag header value to validate + * @throws {Error} If the header value is invalid + */ +export function validateXRobotsTag(value: string): void { + if (!value || typeof value !== 'string') { + throw ERRORS.InvalidXRobotsTag('X-Robots-Tag header value must be a non-empty string') + } + + const trimmedValue = value.trim() + if (!trimmedValue) { + throw ERRORS.InvalidXRobotsTag('X-Robots-Tag header value must be a non-empty string') + } + + const parts = splitRules(trimmedValue) + + for (const part of parts) { + if (!part) { + throw ERRORS.InvalidXRobotsTag('X-Robots-Tag header contains empty rule') + } + + // Check if this is a parametric rule + const parametricMatch = part.match(PARAMETRIC_RULE_REGEX) + if (parametricMatch) { + const [, ruleName, ruleValue] = parametricMatch + validateParametricRule(ruleName, ruleValue.trim(), VALID_IMAGE_PREVIEW_VALUES) + continue + } + + // Check if this is a simple rule + if (SIMPLE_RULE_REGEX.test(part)) { + continue + } + + // Check if this has a colon (could be user agent prefix) + const colonIndex = part.indexOf(':') + if (colonIndex !== -1) { + const beforeColon = part.substring(0, colonIndex).trim() + const afterColon = part.substring(colonIndex + 1).trim() + + if (!afterColon) { + throw ERRORS.InvalidXRobotsTag( + `X-Robots-Tag user agent "${beforeColon}" has no rules specified` + ) + } + + // Recursively validate user agent rules + validateXRobotsTag(afterColon) + continue + } + + throw ERRORS.InvalidXRobotsTag(`Invalid X-Robots-Tag rule: "${part}"`) + } +} + +/** + * Splits rules by comma, handling parametric rules with dates that contain commas + */ +function splitRules(value: string): string[] { + const parts: string[] = [] + let remaining = value + + while (remaining) { + remaining = remaining.trim() + if (!remaining) break + + const match = remaining.match(PARAMETRIC_RULE_START_REGEX) + if (match) { + const ruleName = match[1] + + // For unavailable_after, extract date value (may contain commas) + if (ruleName === 'unavailable_after') { + // Build regex to find end of date by looking for comma + known rule or user agent + const endPattern = new RegExp( + `unavailable_after:\\s*(.+?)(?:,\\s*(?:${simpleRulesPattern}|${parametricRulesPattern}|[a-zA-Z0-9_-]+:)|$)` + ) + const dateEndMatch = remaining.match(endPattern) + + if (dateEndMatch) { + const fullRule = `unavailable_after: ${dateEndMatch[1].trim()}` + parts.push(fullRule) + remaining = remaining.substring(fullRule.length).replace(/^,\s*/, '').trim() + } else { + parts.push(remaining) + remaining = '' + } + continue + } + } + + // Default: split by comma (for other parametric rules and simple rules) + const nextComma = remaining.indexOf(',') + if (nextComma === -1) { + parts.push(remaining) + remaining = '' + } else { + parts.push(remaining.substring(0, nextComma).trim()) + remaining = remaining.substring(nextComma + 1).trim() + } + } + + return parts +} + +/** + * Validates a parametric rule value + */ +function validateParametricRule( + ruleName: string, + ruleValue: string, + validImagePreviewValues: Set +): void { + if (!ruleValue) { + throw ERRORS.InvalidXRobotsTag(`X-Robots-Tag rule "${ruleName}" requires a value`) + } + + switch (ruleName) { + case 'max-snippet': { + const num = parseInt(ruleValue, 10) + if (isNaN(num) || num < 0) { + throw ERRORS.InvalidXRobotsTag( + `X-Robots-Tag "max-snippet" value must be a non-negative number, got: "${ruleValue}"` + ) + } + break + } + + case 'max-image-preview': { + if (!validImagePreviewValues.has(ruleValue)) { + throw ERRORS.InvalidXRobotsTag( + `X-Robots-Tag "max-image-preview" value must be one of: none, standard, large, got: "${ruleValue}"` + ) + } + break + } + + case 'max-video-preview': { + const num = parseInt(ruleValue, 10) + if (isNaN(num) || num < -1) { + throw ERRORS.InvalidXRobotsTag( + `X-Robots-Tag "max-video-preview" value must be a number >= -1, got: "${ruleValue}"` + ) + } + break + } + + case 'unavailable_after': { + // Check if it's a valid date string (try parsing it) + const date = new Date(ruleValue) + if (isNaN(date.getTime())) { + throw ERRORS.InvalidXRobotsTag( + `X-Robots-Tag "unavailable_after" value must be a valid date, got: "${ruleValue}"` + ) + } + break + } + } +} diff --git a/src/test/bucket.test.ts b/src/test/bucket.test.ts index 89d8e7be7..70985f1dc 100644 --- a/src/test/bucket.test.ts +++ b/src/test/bucket.test.ts @@ -361,6 +361,7 @@ describe('testing public bucket functionality', () => { url: `/object/public/public-bucket/favicon.ico`, }) expect(publicResponse.statusCode).toBe(200) + expect(publicResponse.headers['x-robots-tag']).toBe('none') expect(publicResponse.headers['etag']).toBe('abc') expect(publicResponse.headers['last-modified']).toBe('Thu, 12 Aug 2021 16:00:00 GMT') diff --git a/src/test/object.test.ts b/src/test/object.test.ts index 71bf5a203..a745e1c10 100644 --- a/src/test/object.test.ts +++ b/src/test/object.test.ts @@ -61,6 +61,7 @@ describe('testing GET object', () => { }) expect(response.statusCode).toBe(200) expect(response.headers['etag']).toBe('abc') + expect(response.headers['x-robots-tag']).toBe('none') expect(response.headers['last-modified']).toBe('Thu, 12 Aug 2021 16:00:00 GMT') expect(S3Backend.prototype.getObject).toBeCalled() }) @@ -2119,6 +2120,7 @@ describe('testing retrieving signed URL', () => { url: `/object/sign/${urlToSign}?token=${jwtToken}`, }) expect(response.statusCode).toBe(200) + expect(response.headers['x-robots-tag']).toBe('none') expect(response.headers['etag']).toBe('abc') expect(response.headers['last-modified']).toBe('Thu, 12 Aug 2021 16:00:00 GMT') }) @@ -2521,3 +2523,157 @@ describe('testing list objects', () => { expect(responseJSON[1].name).toBe('sadcat-upload23.png') }) }) + +describe('x-robots-tag header', () => { + const X_ROBOTS_TEST_BUCKET = 'X_ROBOTS_TEST_BUCKET' + beforeAll(async () => { + appInstance = app() + await appInstance.inject({ + method: 'POST', + url: `/bucket`, + headers: { + authorization: `Bearer ${await serviceKeyAsync}`, + }, + payload: { + name: X_ROBOTS_TEST_BUCKET, + }, + }) + await appInstance.close() + }) + + afterAll(async () => { + appInstance = app() + await appInstance.inject({ + method: 'POST', + url: `/bucket/${X_ROBOTS_TEST_BUCKET}/empty`, + headers: { + authorization: `Bearer ${await serviceKeyAsync}`, + }, + }) + await appInstance.inject({ + method: 'DELETE', + url: `/bucket/${X_ROBOTS_TEST_BUCKET}`, + headers: { + authorization: `Bearer ${await serviceKeyAsync}`, + }, + }) + await appInstance.close() + }) + + test('defaults x-robots-tag header to none if not specified', async () => { + const objPath = `${X_ROBOTS_TEST_BUCKET}/test-file-1.txt` + + const createResponse = await appInstance.inject({ + method: 'POST', + url: `/object/${objPath}`, + payload: new File(['test'], 'file.txt'), + headers: { + authorization: `Bearer ${await serviceKeyAsync}`, + }, + }) + expect(createResponse.statusCode).toBe(200) + + const response = await appInstance.inject({ + method: 'GET', + url: `/object/authenticated/${objPath}`, + headers: { + authorization: `Bearer ${await serviceKeyAsync}`, + }, + }) + expect(response.statusCode).toBe(200) + expect(response.headers['x-robots-tag']).toBe('none') + }) + + test('uses provided x-robots-tag header if set', async () => { + const objPath = `${X_ROBOTS_TEST_BUCKET}/test-file-2.txt` + + const createResponse = await appInstance.inject({ + method: 'POST', + url: `/object/${objPath}`, + payload: new File(['test'], 'file.txt'), + headers: { + authorization: `Bearer ${await serviceKeyAsync}`, + 'x-robots-tag': 'all', + }, + }) + expect(createResponse.statusCode).toBe(200) + + const response = await appInstance.inject({ + method: 'GET', + url: `/object/authenticated/${objPath}`, + headers: { + authorization: `Bearer ${await serviceKeyAsync}`, + }, + }) + expect(response.statusCode).toBe(200) + expect(response.headers['x-robots-tag']).toBe('all') + }) + + test('updates x-robots-tag header on upsert', async () => { + const objPath = `${X_ROBOTS_TEST_BUCKET}/test-file-3.txt` + + const createResponse = await appInstance.inject({ + method: 'POST', + url: `/object/${objPath}`, + payload: new File(['test'], 'file.txt'), + headers: { + authorization: `Bearer ${await serviceKeyAsync}`, + 'x-robots-tag': 'max-snippet: 10, notranslate', + }, + }) + expect(createResponse.statusCode).toBe(200) + + const response = await appInstance.inject({ + method: 'GET', + url: `/object/authenticated/${objPath}`, + headers: { + authorization: `Bearer ${await serviceKeyAsync}`, + }, + }) + expect(response.statusCode).toBe(200) + expect(response.headers['x-robots-tag']).toBe('max-snippet: 10, notranslate') + + const createResponse2 = await appInstance.inject({ + method: 'POST', + url: `/object/${objPath}`, + payload: new File(['test'], 'file.txt'), + headers: { + authorization: `Bearer ${await serviceKeyAsync}`, + 'x-upsert': 'true', + 'x-robots-tag': 'nofollow', + }, + }) + expect(createResponse2.statusCode).toBe(200) + + const response2 = await appInstance.inject({ + method: 'GET', + url: `/object/authenticated/${objPath}`, + headers: { + authorization: `Bearer ${await serviceKeyAsync}`, + }, + }) + expect(response2.statusCode).toBe(200) + expect(response2.headers['x-robots-tag']).toBe('nofollow') + }) + + test('rejects invalid x-robots-tag header with proper error', async () => { + const objPath = `${X_ROBOTS_TEST_BUCKET}/test-file-invalid.txt` + + const createResponse = await appInstance.inject({ + method: 'POST', + url: `/object/${objPath}`, + payload: new File(['test'], 'file.txt'), + headers: { + authorization: `Bearer ${await serviceKeyAsync}`, + 'x-robots-tag': 'invalidrule', + }, + }) + + expect(createResponse.statusCode).toBe(400) + expect(createResponse.json()).toMatchObject({ + statusCode: '400', + error: 'invalid_x_robots_tag', + message: 'Invalid X-Robots-Tag header: Invalid X-Robots-Tag rule: "invalidrule"', + }) + }) +}) diff --git a/src/test/validators.test.ts b/src/test/validators.test.ts new file mode 100644 index 000000000..cda5cb1c9 --- /dev/null +++ b/src/test/validators.test.ts @@ -0,0 +1,303 @@ +import { validateXRobotsTag } from '../storage/validators/x-robots-tag' + +describe('validateXRobotsTag', () => { + describe('invalid inputs', () => { + it('should throw error for empty string', () => { + expect(() => validateXRobotsTag('')).toThrow( + 'X-Robots-Tag header value must be a non-empty string' + ) + }) + + it('should throw error for whitespace-only string', () => { + expect(() => validateXRobotsTag(' ')).toThrow( + 'X-Robots-Tag header value must be a non-empty string' + ) + }) + + it('should throw error for non-string value', () => { + expect(() => validateXRobotsTag(null as unknown as string)).toThrow( + 'X-Robots-Tag header value must be a non-empty string' + ) + }) + + it('should throw error for undefined', () => { + expect(() => validateXRobotsTag(undefined as unknown as string)).toThrow( + 'X-Robots-Tag header value must be a non-empty string' + ) + }) + + it('should throw error for empty rule in comma-separated list', () => { + expect(() => validateXRobotsTag('noindex, , nofollow')).toThrow( + 'X-Robots-Tag header contains empty rule' + ) + }) + + it('should throw error for invalid rule', () => { + expect(() => validateXRobotsTag('invalidrule')).toThrow( + 'Invalid X-Robots-Tag rule: "invalidrule"' + ) + }) + }) + + describe('valid simple rules', () => { + it('should accept "all"', () => { + expect(() => validateXRobotsTag('all')).not.toThrow() + }) + + it('should accept "noindex"', () => { + expect(() => validateXRobotsTag('noindex')).not.toThrow() + }) + + it('should accept "nofollow"', () => { + expect(() => validateXRobotsTag('nofollow')).not.toThrow() + }) + + it('should accept "none"', () => { + expect(() => validateXRobotsTag('none')).not.toThrow() + }) + + it('should accept "nosnippet"', () => { + expect(() => validateXRobotsTag('nosnippet')).not.toThrow() + }) + + it('should accept "indexifembedded"', () => { + expect(() => validateXRobotsTag('indexifembedded')).not.toThrow() + }) + + it('should accept "notranslate"', () => { + expect(() => validateXRobotsTag('notranslate')).not.toThrow() + }) + + it('should accept "noimageindex"', () => { + expect(() => validateXRobotsTag('noimageindex')).not.toThrow() + }) + }) + + describe('multiple rules', () => { + it('should accept multiple valid rules separated by commas', () => { + expect(() => validateXRobotsTag('noindex, nofollow')).not.toThrow() + }) + + it('should accept multiple rules with extra whitespace', () => { + expect(() => validateXRobotsTag('noindex, nofollow, noimageindex')).not.toThrow() + }) + + it('should accept rules with trailing comma and whitespace', () => { + expect(() => validateXRobotsTag('noindex, nofollow, ')).not.toThrow() + }) + + it('should accept single rule with trailing comma', () => { + expect(() => validateXRobotsTag('noindex,')).not.toThrow() + }) + + it('should throw for invalid rule in multiple rules', () => { + expect(() => validateXRobotsTag('noindex, invalidrule, nofollow')).toThrow( + 'Invalid X-Robots-Tag rule: "invalidrule"' + ) + }) + }) + + describe('max-snippet parametric rule', () => { + it('should accept valid max-snippet with number', () => { + expect(() => validateXRobotsTag('max-snippet: 50')).not.toThrow() + }) + + it('should accept max-snippet with 0', () => { + expect(() => validateXRobotsTag('max-snippet: 0')).not.toThrow() + }) + + it('should throw for max-snippet with negative number', () => { + expect(() => validateXRobotsTag('max-snippet: -5')).toThrow( + 'X-Robots-Tag "max-snippet" value must be a non-negative number' + ) + }) + + it('should throw for max-snippet with non-numeric value', () => { + expect(() => validateXRobotsTag('max-snippet: abc')).toThrow( + 'X-Robots-Tag "max-snippet" value must be a non-negative number' + ) + }) + + it('should throw for max-snippet without value', () => { + expect(() => validateXRobotsTag('max-snippet:')).toThrow( + 'X-Robots-Tag rule "max-snippet" requires a value' + ) + }) + + it('should throw for max-snippet with whitespace-only value', () => { + expect(() => validateXRobotsTag('max-snippet: ')).toThrow( + 'X-Robots-Tag rule "max-snippet" requires a value' + ) + }) + }) + + describe('max-image-preview parametric rule', () => { + it('should accept "none"', () => { + expect(() => validateXRobotsTag('max-image-preview: none')).not.toThrow() + }) + + it('should accept "standard"', () => { + expect(() => validateXRobotsTag('max-image-preview: standard')).not.toThrow() + }) + + it('should accept "large"', () => { + expect(() => validateXRobotsTag('max-image-preview: large')).not.toThrow() + }) + + it('should throw for invalid value', () => { + expect(() => validateXRobotsTag('max-image-preview: invalid')).toThrow( + 'X-Robots-Tag "max-image-preview" value must be one of: none, standard, large' + ) + }) + + it('should throw for missing value', () => { + expect(() => validateXRobotsTag('max-image-preview:')).toThrow( + 'X-Robots-Tag rule "max-image-preview" requires a value' + ) + }) + }) + + describe('max-video-preview parametric rule', () => { + it('should accept positive number', () => { + expect(() => validateXRobotsTag('max-video-preview: 30')).not.toThrow() + }) + + it('should accept 0', () => { + expect(() => validateXRobotsTag('max-video-preview: 0')).not.toThrow() + }) + + it('should accept -1 (no limit)', () => { + expect(() => validateXRobotsTag('max-video-preview: -1')).not.toThrow() + }) + + it('should throw for number less than -1', () => { + expect(() => validateXRobotsTag('max-video-preview: -2')).toThrow( + 'X-Robots-Tag "max-video-preview" value must be a number >= -1' + ) + }) + + it('should throw for non-numeric value', () => { + expect(() => validateXRobotsTag('max-video-preview: abc')).toThrow( + 'X-Robots-Tag "max-video-preview" value must be a number >= -1' + ) + }) + + it('should throw for missing value', () => { + expect(() => validateXRobotsTag('max-video-preview:')).toThrow( + 'X-Robots-Tag rule "max-video-preview" requires a value' + ) + }) + }) + + describe('unavailable_after parametric rule', () => { + it('should accept valid RFC 822 date', () => { + expect(() => + validateXRobotsTag('unavailable_after: Wed, 03 Dec 2025 13:09:53 GMT') + ).not.toThrow() + }) + + it('should accept valid ISO 8601 date', () => { + expect(() => validateXRobotsTag('unavailable_after: 2025-12-03T13:09:53Z')).not.toThrow() + }) + + it('should accept other valid date format', () => { + expect(() => validateXRobotsTag('unavailable_after: 2025-12-03')).not.toThrow() + }) + + it('should accept RFC 822 date followed by another rule', () => { + expect(() => + validateXRobotsTag('unavailable_after: Wed, 03 Dec 2025 13:09:53 GMT, noindex') + ).not.toThrow() + }) + + it('should throw for invalid date', () => { + expect(() => validateXRobotsTag('unavailable_after: not-a-date')).toThrow( + 'X-Robots-Tag "unavailable_after" value must be a valid date' + ) + }) + + it('should throw for missing value', () => { + expect(() => validateXRobotsTag('unavailable_after:')).toThrow( + 'X-Robots-Tag rule "unavailable_after" requires a value' + ) + }) + }) + + describe('user agent specific rules', () => { + it('should accept single rule for specific user agent', () => { + expect(() => validateXRobotsTag('googlebot: noindex')).not.toThrow() + }) + + it('should accept multiple rules for specific user agent', () => { + expect(() => validateXRobotsTag('googlebot: noindex, nofollow')).not.toThrow() + }) + + it('should accept multiple user agents with different rules', () => { + expect(() => + validateXRobotsTag('BadBot: noindex, nofollow, googlebot: nofollow') + ).not.toThrow() + }) + + it('should throw for user agent with no rules', () => { + expect(() => validateXRobotsTag('googlebot:')).toThrow( + 'X-Robots-Tag user agent "googlebot" has no rules specified' + ) + }) + + it('should throw for user agent with whitespace-only rules', () => { + expect(() => validateXRobotsTag('googlebot: ')).toThrow( + 'X-Robots-Tag user agent "googlebot" has no rules specified' + ) + }) + + it('should throw for invalid rule in user agent rules', () => { + expect(() => validateXRobotsTag('googlebot: invalidrule')).toThrow( + 'Invalid X-Robots-Tag rule: "invalidrule"' + ) + }) + }) + + describe('invalid parametric rule names', () => { + it('should throw for unknown parametric rule', () => { + // When an unknown parametric-looking rule is provided, it's treated as a user agent + // and the value is validated as a rule, which should fail + expect(() => validateXRobotsTag('unknown-rule: invalidvalue')).toThrow( + 'Invalid X-Robots-Tag rule: "invalidvalue"' + ) + }) + }) + + describe('complex mixed rules', () => { + it('should accept mix of simple and parametric rules', () => { + expect(() => validateXRobotsTag('noindex, max-snippet: 100')).not.toThrow() + }) + + it('should accept mix of user agent and parametric rules', () => { + expect(() => validateXRobotsTag('googlebot: noindex, max-snippet: 50')).not.toThrow() + }) + + it('should accept complex real-world example', () => { + expect(() => + validateXRobotsTag('noindex, nofollow, max-snippet: 100, max-image-preview: large') + ).not.toThrow() + }) + }) + + describe('whitespace handling', () => { + it('should accept rules with excessive whitespace between rules', () => { + expect(() => validateXRobotsTag('noindex, nofollow, noimageindex')).not.toThrow() + }) + + it('should accept parametric rules with whitespace after colon', () => { + expect(() => validateXRobotsTag('max-snippet: 50')).not.toThrow() + }) + + it('should reject rules with spaces in rule names', () => { + expect(() => validateXRobotsTag('no index')).toThrow('Invalid X-Robots-Tag rule: "no index"') + }) + + it('should accept rules with leading and trailing whitespace', () => { + expect(() => validateXRobotsTag(' noindex, nofollow ')).not.toThrow() + }) + }) +})