diff --git a/src/__tests__/unit/checks/pii.test.ts b/src/__tests__/unit/checks/pii.test.ts index b63abf0..24ec246 100644 --- a/src/__tests__/unit/checks/pii.test.ts +++ b/src/__tests__/unit/checks/pii.test.ts @@ -224,19 +224,54 @@ describe('pii guardrail', () => { expect(result.info?.checked_text).toBe('cvv='); }); - it('detects BIC/SWIFT codes', async () => { + it('detects BIC/SWIFT codes with explicit prefixes', async () => { const config = PIIConfig.parse({ entities: [PIIEntity.BIC_SWIFT], block: false, }); - const text = 'Transfer to BIC DEXXDEXX tomorrow.'; + const text = 'Transfer to BIC DEUTDEFF500 tomorrow.'; const result = await pii({}, text, config); - expect((result.info?.detected_entities as Record)?.BIC_SWIFT).toEqual(['DEXXDEXX']); + expect((result.info?.detected_entities as Record)?.BIC_SWIFT).toEqual([ + 'DEUTDEFF500', + ]); expect(result.info?.checked_text).toBe('Transfer to BIC tomorrow.'); }); + it('detects BIC/SWIFT codes from known bank prefixes', async () => { + const config = PIIConfig.parse({ + entities: [PIIEntity.BIC_SWIFT], + block: false, + }); + const text = 'Send funds to CHASUS33 by Friday.'; + + const result = await pii({}, text, config); + + expect((result.info?.detected_entities as Record)?.BIC_SWIFT).toEqual(['CHASUS33']); + expect(result.info?.checked_text).toBe('Send funds to by Friday.'); + }); + + it('does not flag common words as BIC/SWIFT codes', async () => { + const config = PIIConfig.parse({ + entities: [PIIEntity.BIC_SWIFT], + block: false, + }); + const texts = [ + 'The CUSTOMER ordered a product.', + 'We will REGISTER your account.', + 'Please CONSIDER this option.', + 'The DOCUMENT is ready.', + 'This is ABSTRACT art.', + ]; + + for (const text of texts) { + const result = await pii({}, text, config); + expect((result.info?.detected_entities as Record)?.BIC_SWIFT).toBeUndefined(); + expect(result.info?.pii_detected).toBe(false); + } + }); + it('detects precise street addresses as location', async () => { const config = PIIConfig.parse({ entities: [PIIEntity.LOCATION], diff --git a/src/checks/pii.ts b/src/checks/pii.ts index e91273b..5527d63 100644 --- a/src/checks/pii.ts +++ b/src/checks/pii.ts @@ -205,6 +205,88 @@ interface PiiDetectionResult { spans: ReplacementSpan[]; } +const BIC_CONTEXT_PREFIX_PATTERN = [ + '(?:[sS][wW][iI][fF][tT])', + '(?:[bB][iI][cC])', + '(?:[bB][aA][nN][kK][\\s-]?[cC][oO][dD][eE])', + '(?:[sS][wW][iI][fF][tT][\\s-]?[cC][oO][dD][eE])', + '(?:[bB][iI][cC][\\s-]?[cC][oO][dD][eE])', +].join('|'); + +const BIC_WITH_CONTEXT_REGEX = new RegExp( + `(?:${BIC_CONTEXT_PREFIX_PATTERN})[:\\s=]+([A-Z]{4}[A-Z]{2}[A-Z0-9]{2}(?:[A-Z0-9]{3})?)\\b`, + 'g' +); + +const KNOWN_BIC_PREFIXES = [ + 'DEUT', + 'CHAS', + 'BARC', + 'HSBC', + 'BNPA', + 'CITI', + 'WELL', + 'BOFA', + 'JPMC', + 'GSCC', + 'MSNY', + 'COBA', + 'DRSD', + 'BYLA', + 'MALA', + 'HYVE', + 'WFBI', + 'USBC', + 'LOYD', + 'MIDL', + 'NWBK', + 'RBOS', + 'CRLY', + 'SOGE', + 'AGRI', + 'UBSW', + 'CRES', + 'SANB', + 'BBVA', + 'UNCR', + 'BCIT', + 'INGB', + 'ABNA', + 'RABO', + 'ROYA', + 'TDOM', + 'BNSC', + 'ANZB', + 'NATA', + 'WPAC', + 'CTBA', + 'BKCH', + 'MHCB', + 'BOTK', + 'ICBK', + 'ABOC', + 'PCBC', + 'HSBC', + 'SCBL', + 'DBSS', + 'OCBC', + 'UOVB', + 'CZNB', + 'SHBK', + 'KOEX', + 'HVBK', + 'NACF', + 'IBKO', + 'KODB', + 'HNBN', + 'CITI', +]; + +const KNOWN_BIC_REGEX = new RegExp( + `\\b(?:${KNOWN_BIC_PREFIXES.join('|')})[A-Z]{2}[A-Z0-9]{2}(?:[A-Z0-9]{3})?\\b`, + 'g' +); + /** * Default regex patterns for PII entity types. */ @@ -245,7 +327,10 @@ const DEFAULT_PII_PATTERNS: Record = { group: 1, }, ], - [PIIEntity.BIC_SWIFT]: [{ regex: /\b[A-Z]{4}[A-Z]{2}[A-Z0-9]{2}(?:[A-Z0-9]{3})?\b/g }], + [PIIEntity.BIC_SWIFT]: [ + { regex: BIC_WITH_CONTEXT_REGEX, group: 1 }, + { regex: KNOWN_BIC_REGEX }, + ], // USA [PIIEntity.US_BANK_NUMBER]: [{ regex: /\b\d{8,17}\b/g }],