shellscape · shellscape · May 15, 2026 · May 14, 2026 · May 14, 2026 · May 14, 2026
diff --git a/.gitignore b/.gitignore
@@ -3,6 +3,7 @@
 .moon/cache
 .eslintcache
 .compiled
+.pnpm-store
 .rendered
 .test
 node_modules

diff --git a/AGENTS.md b/AGENTS.md
@@ -2,6 +2,7 @@ This repo is a pnpm workspace and uses Moon as the task runner.
 
 - Repo tasks are run via Moon (not `package.json` scripts). The only root script is `prepare` (Husky).
 - `pnpm` is for dependency management only. Never use it as a prefix to run another Node binary (no `pnpm exec`, no `pnpm <tool>`, etc.). The only exception is inside Husky hooks (for example `.husky/pre-commit`).
+- Do not bypass pnpm's `minimumReleaseAge` policy. When adding or updating dependencies, choose versions that satisfy the policy in `pnpm-workspace.yaml`.
 - Bun is BANNED in this repository. NEVER use it (use `tsx` for running TypeScript scripts).
 
 ## Commits and PR titles

diff --git a/packages/canispam/THIRD_PARTY_NOTICES.md b/packages/canispam/THIRD_PARTY_NOTICES.md
@@ -0,0 +1,26 @@
+# Third-Party Notices
+
+## Spamscanner Classifier Data
+
+`src/classifier/spamscanner-classifier.json` is derived from the `classifier.json`
+file published in `spamscanner@6.1.5`.
+
+- Source: https://github.com/spamscanner/spamscanner
+- Package: https://www.npmjs.com/package/spamscanner/v/6.1.5
+- Copyright: Niftylettuce, LLC
+- Original license: Business Source License 1.1
+- Change date: 2025-04-20
+- Change license: Apache License, Version 2.0
+
+The original BSL 1.1 license text is included at
+https://github.com/spamscanner/spamscanner/blob/master/LICENSE.
+
+Relevant upstream license parameters:
+
+```text
+Licensor:             Niftylettuce, LLC.
+Licensed Work:        Spam Scanner
+                      The Licensed Work is (c) 2020 Niftylettuce, LLC.
+Change Date:          2025-04-20
+Change License:       Apache License, Version 2.0
+```
diff --git a/packages/canispam/moon.yml b/packages/canispam/moon.yml
@@ -0,0 +1,17 @@
+# https://moonrepo.dev/docs/config/tasks
+$schema: 'https://moonrepo.dev/schemas/project.json'
+
+tasks:
+  test:
+    command: vitest --config vitest.config.ts
+    deps:
+      - ~:build
+    inputs:
+      - src
+      - test
+      - package.json
+    env:
+      FORCE_COLOR: '1'
+    options:
+      cache: false
+      outputStyle: 'stream'
diff --git a/packages/canispam/package.json b/packages/canispam/package.json
@@ -0,0 +1,47 @@
+{
+  "name": "canispam",
+  "version": "0.0.0",
+  "publishConfig": {
+    "access": "public"
+  },
+  "description": "Browser-safe spam analysis for generated email messages",
+  "license": "MIT",
+  "repository": {
+    "type": "git",
+    "url": "https://github.com/shellscape/jsx-email.git",
+    "directory": "packages/canispam"
+  },
+  "author": "Andrew Powell <andrew@shellscape.org>",
+  "homepage": "https://jsx.email/",
+  "type": "module",
+  "exports": {
+    "./package.json": "./package.json",
+    ".": {
+      "types": "./dist/index.d.ts",
+      "default": "./dist/index.js"
+    }
+  },
+  "engines": {
+    "node": ">=22.0.0"
+  },
+  "files": [
+    "dist/**",
+    "THIRD_PARTY_NOTICES.md"
+  ],
+  "keywords": [
+    "browser",
+    "email",
+    "eml",
+    "jsx-email",
+    "spam"
+  ],
+  "types": "./dist/index.d.ts",
+  "funding": {
+    "type": "github",
+    "url": "https://github.com/sponsors/shellscape"
+  },
+  "sideEffects": false,
+  "dependencies": {
+    "@ladjs/naivebayes": "^0.1.0"
+  }
+}
diff --git a/packages/canispam/src/classifier.d.ts b/packages/canispam/src/classifier.d.ts
@@ -0,0 +1,20 @@
+declare module '@ladjs/naivebayes' {
+  export interface NaiveBayesProbability {
+    category: string;
+    probability: number;
+  }
+
+  export interface NaiveBayesClassifier {
+    categorize(text: string, probability?: false): string;
+    categorize(text: string, probability: true): NaiveBayesProbability;
+    probabilities(text: string): NaiveBayesProbability[];
+    tokenizer: (text: string) => string[];
+  }
+
+  export interface NaiveBayesConstructor {
+    fromJson(json: unknown, limit?: number): NaiveBayesClassifier;
+  }
+
+  const NaiveBayes: NaiveBayesConstructor;
+  export default NaiveBayes;
+}
diff --git a/packages/canispam/src/classifier/classifier.ts b/packages/canispam/src/classifier/classifier.ts
@@ -0,0 +1,37 @@
+import NaiveBayes from '@ladjs/naivebayes';
+
+import classifierData from './spamscanner-classifier.json' with { type: 'json' };
+import { getClassifierTokens } from './tokenize.js';
+import type { CanispamClassifierResult, CanispamFinding, ParsedEml } from '../types.js';
+
+const classifier = NaiveBayes.fromJson(classifierData);
+classifier.tokenizer = (text: string) => text.split(/\s+/).filter(Boolean);
+
+const classifierScore = 6;
+
+export const classifyEmail = async (parsed: ParsedEml): Promise<CanispamClassifierResult> => {
+  const tokens = await getClassifierTokens(parsed);
+  const input = tokens.join(' ');
+  const probabilities = classifier.probabilities(input);
+  const category = probabilities[0]?.category || 'ham';
+
+  return {
+    category,
+    isSpam: category === 'spam',
+    probabilities,
+    tokenCount: tokens.length
+  };
+};
+
+export const getClassifierFindings = (result: CanispamClassifierResult): CanispamFinding[] => {
+  if (!result.isSpam) return [];
+
+  return [
+    {
+      evidence: `category=${result.category}; tokens=${result.tokenCount}`,
+      message: 'Naive Bayes classifier predicts spam.',
+      rule: 'naive-bayes',
+      score: classifierScore
+    }
+  ];
+};
diff --git a/packages/canispam/src/classifier/spamscanner-classifier.json b/packages/canispam/src/classifier/spamscanner-classifier.json
diff --git a/packages/canispam/src/classifier/tokenize.ts b/packages/canispam/src/classifier/tokenize.ts
@@ -0,0 +1,64 @@
+import { stripHtml } from '../html.js';
+
+const genericTokenizer =
+  /[^a-zá-úÁ-Úà-úÀ-Úñü\dа-яёæøåàáảãạăắằẳẵặâấầẩẫậéèẻẽẹêếềểễệíìỉĩịóòỏõọôốồổỗộơớờởỡợúùủũụưứừửữựýỳỷỹỵđäöëïîûœçążśźęćńł-]+/i;
+const encoder = new TextEncoder();
+
+const spamScannerStopwords = new Set([
+  'a',
+  'an',
+  'and',
+  'are',
+  'as',
+  'at',
+  'be',
+  'by',
+  'for',
+  'from',
+  'has',
+  'in',
+  'is',
+  'it',
+  'of',
+  'on',
+  'or',
+  'that',
+  'the',
+  'to',
+  'was',
+  'were',
+  'will',
+  'with'
+]);
+
+const toHex = (bytes: ArrayBuffer) =>
+  [...new Uint8Array(bytes)].map((byte) => byte.toString(16).padStart(2, '0')).join('');
+
+const hashToken = async (token: string) => {
+  const digest = await globalThis.crypto.subtle.digest('SHA-256', encoder.encode(token));
+  return toHex(digest).slice(0, 16);
+};
+
+const preprocessText = (value: string) =>
+  value
+    .replaceAll(/\b(?:\d{1,2}[/-]){2}\d{2,4}\b/g, ' DATE_PATTERN ')
+    .replaceAll(/\bhttps?:\/\/[^\s"'<>]+/gi, ' URL_LINK ')
+    .replaceAll(/\b[\w.+-]+@[\w.-]+\.[a-z]{2,}\b/gi, ' EMAIL_ADDRESS ')
+    .replaceAll(/\b(?:\d{1,3}\.){3}\d{1,3}\b/g, ' IP_ADDRESS ');
+
+export const getClassifierTokens = async (params: {
+  html: string;
+  subject: string;
+  text: string;
+}) => {
+  const content = preprocessText(`${params.text} ${stripHtml(params.html)} ${params.subject}`);
+  const tokens = content
+    .normalize('NFKC')
+    .split(genericTokenizer)
+    .map((token) => token.toLowerCase().trim())
+    .filter((token) => token.length > 0 && token.length <= 50)
+    .filter((token) => !spamScannerStopwords.has(token))
+    .slice(0, 2000);
+
+  return Promise.all(tokens.map(hashToken));
+};
diff --git a/packages/canispam/src/content-rules.ts b/packages/canispam/src/content-rules.ts
@@ -0,0 +1,75 @@
+import { stripHtml } from './html.js';
+import type { CanispamFinding } from './types.js';
+
+const spamKeywords = new Map([
+  ['free', 1],
+  ['winner', 2],
+  ['prize', 2],
+  ['lottery', 3],
+  ['act now', 2],
+  ['click here', 1],
+  ['verify your account', 2],
+  ['suspended', 2],
+  ['wire transfer', 3],
+  ['western union', 3],
+  ['viagra', 3],
+  ['cialis', 3],
+  ['bitcoin', 1],
+  ['guaranteed', 1],
+  ['risk free', 2]
+]);
+
+const phrasePatterns = [
+  /\bcongratulations.*won\b/i,
+  /\bconfirm your identity\b/i,
+  /\bupdate your account\b/i,
+  /\blimited time offer\b/i,
+  /\binvestment opportunity\b/i
+];
+
+export const scanContent = (text: string, html: string): CanispamFinding[] => {
+  const findings: CanispamFinding[] = [];
+  const content = `${text} ${stripHtml(html)}`.toLowerCase();
+  const rawContent = `${text} ${html}`;
+
+  if (rawContent.includes('XJS*C4JDBQADN1.NSBN3*2IDNEN*GTUBE-STANDARD-ANTI-UBE-TEST-EMAIL*C.34X')) {
+    findings.push({
+      message: 'GTUBE spam test pattern detected.',
+      rule: 'gtube',
+      score: 10
+    });
+  }
+
+  for (const [keyword, score] of spamKeywords) {
+    if (content.includes(keyword)) {
+      findings.push({
+        evidence: keyword,
+        message: `Spam keyword detected: ${keyword}.`,
+        rule: 'spam-keyword',
+        score
+      });
+    }
+  }
+
+  for (const pattern of phrasePatterns) {
+    const match = content.match(pattern);
+    if (match) {
+      findings.push({
+        evidence: match[0],
+        message: 'Spam phrase pattern detected.',
+        rule: 'spam-phrase',
+        score: 2
+      });
+    }
+  }
+
+  if (html && !text.trim()) {
+    findings.push({
+      message: 'HTML email is missing a plain text part.',
+      rule: 'missing-plain-text',
+      score: 1
+    });
+  }
+
+  return findings;
+};