Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion packages/script/src/registry.ts
Original file line number Diff line number Diff line change
Expand Up @@ -334,6 +334,10 @@ export async function registry(resolve?: (path: string) => Promise<string>): Pro
return `${proxyPrefix}/${host}`
},
},
// PostHog supports `apiHost` for self-hosted instances and custom
// reverse proxies. Without this, custom-host users are 403'd through
// the proxy because only the SaaS US/EU hosts are allowlisted.
configDomainFields: ['apiHost'],
},
}),
def('fathomAnalytics', {
Expand Down Expand Up @@ -575,7 +579,13 @@ export async function registry(resolve?: (path: string) => Promise<string>): Pro
},
},
proxy: {
domains: ['www.clarity.ms', 'scripts.clarity.ms', 'd.clarity.ms', 'e.clarity.ms', 'k.clarity.ms', 'c.clarity.ms', 'a.clarity.ms', 'b.clarity.ms'],
// Clarity buckets visitors across letter/hash-prefixed shards (a/b/c/d/e/k/...).
// Microsoft adds shards over time, so an enumerated list silently 403s
// through the proxy when an unlisted letter is rolled out (#728-class bug).
// `*.clarity.ms` covers the full surface at runtime; `www.clarity.ms` is
// kept literal so the build-time URL rewrite (which filters wildcards)
// can still rewrite `https://www.clarity.ms/tag/<id>` in bundled SDKs.
domains: ['www.clarity.ms', '*.clarity.ms'],
privacy: PRIVACY_HEATMAP,
},
partytown: { forwards: ['clarity'] },
Expand Down
45 changes: 32 additions & 13 deletions packages/script/src/runtime/server/utils/match-domain.ts
Original file line number Diff line number Diff line change
@@ -1,30 +1,49 @@
/**
* Match a hostname against an allowlist pattern.
*
* Patterns may include `*` as a TLD wildcard that matches a top-level domain
* suffix shaped like a real ccTLD or gTLD:
* - `com` (the canonical gTLD we care about)
* - any 2-letter ccTLD (`tw`, `jp`, `de`, ...)
* - regional `com.<cc>` or `co.<cc>` (e.g. `com.tw`, `co.jp`, `com.hk`)
* Two wildcard shapes are supported:
*
* Used for geo-localized Google ccTLDs:
* `www.google.*` matches `www.google.com`, `www.google.com.tw`, `www.google.co.jp`.
* 1. Trailing TLD wildcard `host.<tld>.*` β€” matches a top-level domain suffix
* shaped like a real ccTLD or gTLD:
* - `com` (the canonical gTLD we care about)
* - any 2-letter ccTLD (`tw`, `jp`, `de`, ...)
* - regional `com.<cc>` or `co.<cc>` (e.g. `com.tw`, `co.jp`, `com.hk`)
*
* The pattern is intentionally narrow: it rejects attacker-controlled suffixes
* like `www.google.foo.bar` (two arbitrary 3-letter labels) or
* `www.google.attacker.com` (long second-level label).
* Used for geo-localized Google ccTLDs:
* `www.google.*` matches `www.google.com`, `www.google.com.tw`, `www.google.co.jp`.
*
* The pattern is intentionally narrow: it rejects attacker-controlled
* suffixes like `www.google.foo.bar` (two arbitrary 3-letter labels) or
* `www.google.attacker.com` (long second-level label).
*
* 2. Leading subdomain wildcard `*.host.tld` β€” matches exactly one DNS label
* in front of the suffix. The wildcard label must be one or more non-dot
* chars; it does not match the bare suffix or multi-label prefixes.
*
* Used for vendors that bucket clients across letter/hash-prefixed shards:
* `*.clarity.ms` matches `a.clarity.ms`, `www.clarity.ms`, `scripts.clarity.ms`.
* It does NOT match `clarity.ms` (no prefix) or `a.b.clarity.ms` (multi-label).
*
* Bare patterns also match subdomains, e.g. `google.com` matches `mail.google.com`.
*/
const TLD_WILDCARD_RE = /^(?:com|[a-z]{2}|(?:com|co)\.[a-z]{2})$/i
const SUBDOMAIN_LABEL_RE = /^[^.]+$/

export function matchDomain(domain: string, pattern: string): boolean {
if (!pattern.includes('*'))
return domain === pattern || domain.endsWith(`.${pattern}`)

// Only support a trailing single `*` wildcard for TLD matching (the only
// shape we use in practice). Reject any other pattern shape rather than
// silently allowing it.
// Leading subdomain wildcard: `*.host.tld` matches exactly one non-dot label.
if (pattern.startsWith('*.') && pattern.indexOf('*') === 0) {
const suffix = pattern.slice(2) // drop leading "*."
if (!domain.endsWith(`.${suffix}`))
return false
const label = domain.slice(0, -(suffix.length + 1))
return SUBDOMAIN_LABEL_RE.test(label)
}

// Trailing TLD wildcard: only support a single trailing `*`. Any other
// wildcard shape is rejected rather than silently allowed.
if (!pattern.endsWith('*') || pattern.indexOf('*') !== pattern.length - 1)
return false

Expand Down
7 changes: 7 additions & 0 deletions test/unit/first-party.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,13 @@ describe('first-party mode', () => {
apiUrl: 'https://events.analytics.example.com',
}, configs.databuddyAnalytics)).toEqual(['cdn.analytics.example.com', 'events.analytics.example.com'])
})

it('derives extra allowlist domains for self-hosted PostHog apiHost', async () => {
const configs = await getProxyConfigs()
expect(resolveConfiguredProxyDomains({
apiHost: 'https://posthog.example.com',
}, configs.posthog)).toEqual(['posthog.example.com'])
})
})

describe('full chain: capabilities β†’ proxy config β†’ domains', () => {
Expand Down
21 changes: 21 additions & 0 deletions test/unit/proxy-handler-match-domain.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -46,4 +46,25 @@ describe('matchDomain', () => {
expect(matchDomain('foo.bar.com', 'foo+bar.com')).toBe(false)
expect(matchDomain('foo+bar.com', 'foo+bar.com')).toBe(true)
})

// Microsoft Clarity buckets visitors across letter-prefixed shards
// (a/b/c/d/e/k/scripts/www/...); enumerating them silently 403s through
// the proxy when a new shard is rolled out.
it('matches single-label subdomain via leading wildcard', () => {
expect(matchDomain('a.clarity.ms', '*.clarity.ms')).toBe(true)
expect(matchDomain('z.clarity.ms', '*.clarity.ms')).toBe(true)
expect(matchDomain('whatever.clarity.ms', '*.clarity.ms')).toBe(true)
expect(matchDomain('www.clarity.ms', '*.clarity.ms')).toBe(true)
expect(matchDomain('scripts.clarity.ms', '*.clarity.ms')).toBe(true)
})

it('leading wildcard requires exactly one non-dot label', () => {
// bare suffix has no prefix label
expect(matchDomain('clarity.ms', '*.clarity.ms')).toBe(false)
// multi-label prefix would be too permissive (attacker.clarity.ms.evil.com)
expect(matchDomain('a.b.clarity.ms', '*.clarity.ms')).toBe(false)
// different host root must not match
expect(matchDomain('a.evil.ms', '*.clarity.ms')).toBe(false)
expect(matchDomain('clarity.ms.attacker.com', '*.clarity.ms')).toBe(false)
})
})
Loading