From 16641ea5117ab13ae0d9ebe3418169a484a54d4a Mon Sep 17 00:00:00 2001 From: Juan Cruz Viotti Date: Thu, 16 Oct 2025 12:22:14 -0400 Subject: [PATCH] Implement a schema for e-mail addresses --- schemas/ietf/email/address.json | 56 +++++ tests/ietf/email/address.test.json | 366 +++++++++++++++++++++++++++++ 2 files changed, 422 insertions(+) create mode 100644 schemas/ietf/email/address.json create mode 100644 tests/ietf/email/address.test.json diff --git a/schemas/ietf/email/address.json b/schemas/ietf/email/address.json new file mode 100644 index 00000000..b1ee608c --- /dev/null +++ b/schemas/ietf/email/address.json @@ -0,0 +1,56 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "title": "RFC 5322 Email Address (Addr-Spec)", + "description": "A specific Internet identifier that represents an e-mail box to which messages are delivered", + "$comment": "https://www.rfc-editor.org/rfc/rfc5322#section-3.4.1", + "examples": [ + "simple@example.com", + "\"very.unusual.@.unusual.com\"@example.com", + "user@[IPv6:2001:db8::1]", + "(comment)john.smith@(comment)example.com(comment)", + "jo(comment)hn@example.com", + "user@[abc.def]" + ], + "x-license": "https://github.com/sourcemeta/std/blob/main/LICENSE", + "type": "string", + "anyOf": [ + { + "$comment": "Unquoted (dot-atom) local-part with dot-atom domain — both allow inline single-level comments", + "pattern": "^(?:(?:[ \\t]|\\((?:[^()\\\\]|\\\\.)*\\))*)(?:[A-Za-z0-9!#$%&'*+\\/=?^_`{|}~-]+(?:(?:[ \\t]*\\((?:[^()\\\\]|\\\\.)*\\)[ \\t]*[A-Za-z0-9!#$%&'*+\\/=?^_`{|}~-]+)|(?:\\.[A-Za-z0-9!#$%&'*+\\/=?^_`{|}~-]+))*)(?:[ \\t]|\\((?:[^()\\\\]|\\\\.)*\\))*@(?:(?:[ \\t]|\\((?:[^()\\\\]|\\\\.)*\\))*(?:[A-Za-z0-9](?:[A-Za-z0-9-]{0,61}[A-Za-z0-9])?))(?:((?:[ \\t]|\\((?:[^()\\\\]|\\\\.)*\\))*\\.(?:[ \\t]|\\((?:[^()\\\\]|\\\\.)*\\))*(?:[A-Za-z0-9](?:[A-Za-z0-9-]{0,61}[A-Za-z0-9])?)))*(?:[ \\t]|\\((?:[^()\\\\]|\\\\.)*\\))*$" + }, + { + "$comment": "Quoted local-part (with escapes) + dot-atom domain. Comments allowed around the quoted string and around domain labels", + "pattern": "^(?:[ \\t]|\\((?:[^()\\\\]|\\\\.)*\\))*(?:\"(?:[^\"\\\\\\r]|\\\\.)*\")(?:[ \\t]|\\((?:[^()\\\\]|\\\\.)*\\))*@(?:(?:[ \\t]|\\((?:[^()\\\\]|\\\\.)*\\))*(?:[A-Za-z0-9](?:[A-Za-z0-9-]{0,61}[A-Za-z0-9])?))(?:((?:[ \\t]|\\((?:[^()\\\\]|\\\\.)*\\))*\\.(?:[ \\t]|\\((?:[^()\\\\]|\\\\.)*\\))*(?:[A-Za-z0-9](?:[A-Za-z0-9-]{0,61}[A-Za-z0-9])?)))*(?:[ \\t]|\\((?:[^()\\\\]|\\\\.)*\\))*$" + }, + { + "$comment": "Unquoted local-part + domain-literal (arbitrary dtext / quoted-pairs inside brackets). Comments allowed around tokens", + "pattern": "^(?:(?:[ \\t]|\\((?:[^()\\\\]|\\\\.)*\\))*)(?:[A-Za-z0-9!#$%&'*+\\/=?^_`{|}~-]+(?:(?:[ \\t]*\\((?:[^()\\\\]|\\\\.)*\\)[ \\t]*[A-Za-z0-9!#$%&'*+\\/=?^_`{|}~-]+)|(?:\\.[A-Za-z0-9!#$%&'*+\\/=?^_`{|}~-]+))*)(?:[ \\t]|\\((?:[^()\\\\]|\\\\.)*\\))*@(?:[ \\t]|\\((?:[^()\\\\]|\\\\.)*\\))*\\[(?:[^\\[\\]\\\\\\r]|\\\\.)*\\](?:[ \\t]|\\((?:[^()\\\\]|\\\\.)*\\))*$" + }, + { + "$comment": "Quoted local-part + domain-literal. Comments allowed around quoted local and around the literal", + "pattern": "^(?:[ \\t]|\\((?:[^()\\\\]|\\\\.)*\\))*(?:\"(?:[^\"\\\\\\r]|\\\\.)*\")(?:[ \\t]|\\((?:[^()\\\\]|\\\\.)*\\))*@(?:[ \\t]|\\((?:[^()\\\\]|\\\\.)*\\))*\\[(?:[^\\[\\]\\\\\\r]|\\\\.)*\\](?:[ \\t]|\\((?:[^()\\\\]|\\\\.)*\\))*$" + } + ], + "not": { + "anyOf": [ + { + "$comment": "Disallow bare IPv4 address in domain (must be bracketed)", + "pattern": "@([0-9]{1,3}\\.){3}[0-9]{1,3}(?:$|\\s)" + }, + { + "$comment": "Disallow IPv6 literal without brackets", + "pattern": "@IPv6:" + }, + { + "$comment": "Disallow numeric-only final label (numeric TLDs like example.123)", + "pattern": "@[^@\\s]*\\.\\d+(?:$|\\s)" + }, + { + "$comment": "Reject addresses that start with a single-quote delimiter", + "pattern": "^'" + } + ] + }, + "format": "email", + "minLength": 1 +} diff --git a/tests/ietf/email/address.test.json b/tests/ietf/email/address.test.json new file mode 100644 index 00000000..f24e9426 --- /dev/null +++ b/tests/ietf/email/address.test.json @@ -0,0 +1,366 @@ +{ + "x-license": "https://github.com/sourcemeta/std/blob/main/LICENSE", + "target": "../../../schemas/ietf/email/address.json", + "tests": [ + { + "description": "Invalid type", + "data": 1, + "valid": false + }, + { + "description": "Basic email format", + "data": "simple@example.com", + "valid": true + }, + { + "description": "Local-part with dot", + "data": "very.common@example.com", + "valid": true + }, + { + "description": "Mixed case local and domain", + "data": "FirstName.LastName@EasierReading.org", + "valid": true + }, + { + "description": "Single character local part", + "data": "x@example.com", + "valid": true + }, + { + "description": "Local-part with hyphens and subdomains", + "data": "long.email-address-with-hyphens@and.subdomains.example.com", + "valid": true + }, + { + "description": "Local-part with plus sign and tags", + "data": "user.name+tag+sorting@example.com", + "valid": true + }, + { + "description": "Local-part with slash", + "data": "name/surname@example.com", + "valid": true + }, + { + "description": "Local-part with underscore", + "data": "user_name@example.com", + "valid": true + }, + { + "description": "Local-part and domain with hyphen", + "data": "user-name@sub-domain.example.co.uk", + "valid": true + }, + { + "description": "Quoted local-part with double dot inside", + "data": "\"john..doe\"@example.org", + "valid": true + }, + { + "description": "Quoted local-part with space", + "data": "\" \"@example.org", + "valid": true + }, + { + "description": "Quoted local-part ending with dot", + "data": "\"john.doe.\"@example.com", + "valid": true + }, + { + "description": "Quoted local-part containing @ symbol", + "data": "\"john@doe\"@example.com", + "valid": true + }, + { + "description": "Example with all allowed special characters in local-part", + "data": "test!#$%&'*+/=?^_`{|}~@example.com", + "valid": true + }, + { + "description": "Bang path style local-part", + "data": "mailhost!username@example.org", + "valid": true + }, + { + "description": "Percent hack routing", + "data": "user%example.com@example.org", + "valid": true + }, + { + "description": "Local-part ending with dash", + "data": "user-@example.org", + "valid": true + }, + { + "description": "IPv4 address literal domain", + "data": "postmaster@[123.123.123.123]", + "valid": true + }, + { + "description": "IPv6 address literal domain", + "data": "postmaster@[IPv6:2001:db8::85a3:0:0:8a2e:370:7334]", + "valid": true + }, + { + "description": "Local-part starts with underscore", + "data": "_test123@example.org", + "valid": true + }, + { + "description": "Plus in local-part with uppercase domain", + "data": "test+filter@Example.COM", + "valid": true + }, + { + "description": "Dot in local-part", + "data": "test.filter@example.com", + "valid": true + }, + { + "description": "Underscore in local-part", + "data": "test_filter@example.com", + "valid": true + }, + { + "description": "Domain with no TLD (dotless domain)", + "data": "admin@example", + "valid": true + }, + { + "description": "Comments around local, domain, and address", + "data": "(comment)john.smith@(comment)example.com(comment)", + "valid": true + }, + { + "description": "Quoted local-part with space and letters", + "data": "\"Fred Bloggs\"@example.com", + "valid": true + }, + { + "description": "Domain literal with letters and dot", + "data": "user@[abc.def]", + "valid": true + }, + { + "description": "All uppercase local and domain", + "data": "USER@EXAMPLE.COM", + "valid": true + }, + { + "description": "Local-part with tilde and domain with hyphen", + "data": "a~b@c-domain.com", + "valid": true + }, + { + "description": "Local-part with slash and plus", + "data": "first.last+category/department@domain.com", + "valid": true + }, + { + "description": "Local-part with apostrophe", + "data": "o'reilly@example.com", + "valid": true + }, + { + "description": "Local-part with multiple route characters", + "data": "dizzy%example.com+tag@example.org", + "valid": true + }, + { + "description": "Email with comment in domain", + "data": "john.smith@(comment)example.com", + "valid": true + }, + { + "description": "Email with comment in local part", + "data": "jo(comment)hn@example.com", + "valid": true + }, + { + "description": "Complex quoted local-part with many allowed symbols", + "data": "\"very.(),:;<>[]\\\".VERY.\\\"very@\\\\ \\\"very\\\".unusual\"@strange.example.com", + "valid": true + }, + { + "description": "Missing @ symbol", + "data": "plainaddress", + "valid": false + }, + { + "description": "Missing local part", + "data": "@no-local-part.com", + "valid": false + }, + { + "description": "Missing domain", + "data": "user@", + "valid": false + }, + { + "description": "Missing both local and domain parts", + "data": "@", + "valid": false + }, + { + "description": "Leading dot in local-part", + "data": ".user@example.com", + "valid": false + }, + { + "description": "Trailing dot in local-part", + "data": "user.@example.com", + "valid": false + }, + { + "description": "Consecutive dots in local-part", + "data": "user..user@example.com", + "valid": false + }, + { + "description": "Leading dot in domain", + "data": "user@.example.com", + "valid": false + }, + { + "description": "Trailing dot in domain", + "data": "user@example.com.", + "valid": false + }, + { + "description": "Consecutive dots in domain", + "data": "user@example..com", + "valid": false + }, + { + "description": "Domain label starts with hyphen", + "data": "user@-example.com", + "valid": false + }, + { + "description": "Domain label ends with hyphen", + "data": "user@example-.com", + "valid": false + }, + { + "description": "Underscore in domain part", + "data": "user@exam_ple.com", + "valid": false + }, + { + "description": "Numeric TLD in domain", + "data": "user@example.123", + "valid": false + }, + { + "description": "Multiple @ symbols", + "data": "A@b@c@example.com", + "valid": false + }, + { + "description": "Invalid characters in local-part (unquoted specials)", + "data": "a\"b(c)d,e:f;gi[j\\k]l@example.com", + "valid": false + }, + { + "description": "Incorrect quoted string placement in local-part", + "data": "just\"not\"right@example.com", + "valid": false + }, + { + "description": "Unquoted space in local-part", + "data": "this is\"not\\allowed@example.com", + "valid": false + }, + { + "description": "Escaped characters outside quotes in local-part", + "data": "this\\ still\\\"not\\\\allowed@example.com", + "valid": false + }, + { + "description": "Just a quoted string with no @ and domain", + "data": "\"test\"", + "valid": false + }, + { + "description": "Unclosed comment in local-part", + "data": "john(dôe@example.com", + "valid": false + }, + { + "description": "Unmatched parenthesis in domain", + "data": "john@example.com)", + "valid": false + }, + { + "description": "Non-ASCII character in local-part", + "data": "mañana@example.com", + "valid": false + }, + { + "description": "Non-ASCII character in domain", + "data": "user@exámple.com", + "valid": false + }, + { + "description": "Missing closing bracket in domain literal", + "data": "test@[127.0.0.1", + "valid": false + }, + { + "description": "Missing opening bracket in domain literal", + "data": "test@127.0.0.1]", + "valid": false + }, + { + "description": "Trailing dot after domain literal", + "data": "test@[127.0.0.1].", + "valid": false + }, + { + "description": "Unclosed IPv6 literal", + "data": "test@[IPv6:2001:db8::1", + "valid": false + }, + { + "description": "Extra characters after domain literal", + "data": "test@[IPv6:2001:db8::1]extra", + "valid": false + }, + { + "description": "Uppercase numeric TLD (invalid as TLD numeric only)", + "data": "user@EXAMPLE.123", + "valid": false + }, + { + "description": "IP address without brackets in domain", + "data": "user@123.123.123.123", + "valid": false + }, + { + "description": "IPv6 address without brackets", + "data": "user@IPv6:2001:db8::1", + "valid": false + }, + { + "description": "Single quote used as delimiter (invalid)", + "data": "'test'@example.com", + "valid": false + }, + { + "description": "Double quotes misplacement", + "data": "\"\"test@example.com", + "valid": false + }, + { + "description": "Escaped @ outside quoted string (invalid)", + "data": "test\\@example.com", + "valid": false + }, + { + "description": "Empty string", + "data": "", + "valid": false + } + ] +}