From 7a0a13f38af9d803ee6638485c1046f09388c4b5 Mon Sep 17 00:00:00 2001 From: Steven C Date: Fri, 31 Oct 2025 09:48:48 -0400 Subject: [PATCH 1/2] Adding Korean RRN --- .gitignore | 3 +++ src/__tests__/unit/checks/pii.test.ts | 27 +++++++++++++++++++++++++++ src/checks/pii.ts | 6 ++++++ 3 files changed, 36 insertions(+) diff --git a/.gitignore b/.gitignore index cd263d3..43a6083 100644 --- a/.gitignore +++ b/.gitignore @@ -101,3 +101,6 @@ site/ __pycache__/ *.pyc .pytest_cache/ + +# internal examples +internal_examples/ \ No newline at end of file diff --git a/src/__tests__/unit/checks/pii.test.ts b/src/__tests__/unit/checks/pii.test.ts index 198d662..52da9cf 100644 --- a/src/__tests__/unit/checks/pii.test.ts +++ b/src/__tests__/unit/checks/pii.test.ts @@ -42,4 +42,31 @@ describe('pii guardrail', () => { await expect(pii({}, '', config)).rejects.toThrow('Text cannot be empty or null'); }); + + it('detects Korean Resident Registration Number (KR_RRN)', async () => { + const config = PIIConfig.parse({ + entities: [PIIEntity.KR_RRN], + block: false, + }); + const text = 'Korean RRN: 123456-1234567'; + + const result = await pii({}, text, config); + + expect(result.tripwireTriggered).toBe(false); + expect((result.info?.detected_entities as Record)?.KR_RRN).toEqual(['123456-1234567']); + expect(result.info?.checked_text).toBe('Korean RRN: '); + }); + + it('triggers tripwire for KR_RRN when block=true', async () => { + const config = PIIConfig.parse({ + entities: [PIIEntity.KR_RRN], + block: true, + }); + const text = 'Korean RRN: 123456-1234567'; + + const result = await pii({}, text, config); + + expect(result.tripwireTriggered).toBe(true); + expect((result.info?.detected_entities as Record)?.KR_RRN).toEqual(['123456-1234567']); + }); }); diff --git a/src/checks/pii.ts b/src/checks/pii.ts index da9e9d0..6c3e46f 100644 --- a/src/checks/pii.ts +++ b/src/checks/pii.ts @@ -125,6 +125,9 @@ export enum PIIEntity { // Finland FI_PERSONAL_IDENTITY_CODE = 'FI_PERSONAL_IDENTITY_CODE', + + // Korea + KR_RRN = 'KR_RRN', } /** @@ -236,6 +239,9 @@ const DEFAULT_PII_PATTERNS: Record = { // Finland [PIIEntity.FI_PERSONAL_IDENTITY_CODE]: /\b\d{6}[+-A]\d{3}[A-Z0-9]\b/g, + + // Korea + [PIIEntity.KR_RRN]: /\b\d{6}-\d{7}\b/g, }; /** From fb542173d84f99121f8e090d064bb2753a8ba0e7 Mon Sep 17 00:00:00 2001 From: Steven C Date: Fri, 31 Oct 2025 09:56:36 -0400 Subject: [PATCH 2/2] Make regex more specific --- src/__tests__/unit/checks/pii.test.ts | 47 ++++++++++++++++++++++++--- src/checks/pii.ts | 3 +- 2 files changed, 44 insertions(+), 6 deletions(-) diff --git a/src/__tests__/unit/checks/pii.test.ts b/src/__tests__/unit/checks/pii.test.ts index 52da9cf..e96e383 100644 --- a/src/__tests__/unit/checks/pii.test.ts +++ b/src/__tests__/unit/checks/pii.test.ts @@ -43,30 +43,67 @@ describe('pii guardrail', () => { await expect(pii({}, '', config)).rejects.toThrow('Text cannot be empty or null'); }); - it('detects Korean Resident Registration Number (KR_RRN)', async () => { + it('detects valid Korean Resident Registration Number (KR_RRN)', async () => { const config = PIIConfig.parse({ entities: [PIIEntity.KR_RRN], block: false, }); - const text = 'Korean RRN: 123456-1234567'; + // Valid format: YYMMDD-GNNNNNN (900101 = Jan 1, 1990, gender digit 1) + const text = 'Korean RRN: 900101-1234567'; const result = await pii({}, text, config); expect(result.tripwireTriggered).toBe(false); - expect((result.info?.detected_entities as Record)?.KR_RRN).toEqual(['123456-1234567']); + expect((result.info?.detected_entities as Record)?.KR_RRN).toEqual(['900101-1234567']); expect(result.info?.checked_text).toBe('Korean RRN: '); }); + it('detects multiple valid KR_RRN formats', async () => { + const config = PIIConfig.parse({ + entities: [PIIEntity.KR_RRN], + block: false, + }); + // Testing different valid date ranges and gender digits (1-4) + const text = 'RRNs: 850315-2345678, 001231-3456789, 750628-4123456'; + + const result = await pii({}, text, config); + + expect(result.tripwireTriggered).toBe(false); + expect((result.info?.detected_entities as Record)?.KR_RRN).toHaveLength(3); + expect((result.info?.detected_entities as Record)?.KR_RRN).toContain('850315-2345678'); + expect((result.info?.detected_entities as Record)?.KR_RRN).toContain('001231-3456789'); + expect((result.info?.detected_entities as Record)?.KR_RRN).toContain('750628-4123456'); + }); + + it('does not detect invalid KR_RRN patterns (false positives)', async () => { + const config = PIIConfig.parse({ + entities: [PIIEntity.KR_RRN], + block: false, + }); + // Invalid patterns that should NOT be detected: + // - Invalid month (13) + // - Invalid day (00, 32) + // - Invalid gender digit (0, 5, 9) + // - Random tracking numbers + const text = 'Invalid: 901301-1234567, 900100-1234567, 900132-1234567, 900101-0234567, 900101-5234567, 123456-7890123'; + + const result = await pii({}, text, config); + + expect(result.tripwireTriggered).toBe(false); + expect(result.info?.detected_entities).toEqual({}); + expect(result.info?.checked_text).toBe(text); // No masking should occur + }); + it('triggers tripwire for KR_RRN when block=true', async () => { const config = PIIConfig.parse({ entities: [PIIEntity.KR_RRN], block: true, }); - const text = 'Korean RRN: 123456-1234567'; + const text = 'Korean RRN: 900101-1234567'; const result = await pii({}, text, config); expect(result.tripwireTriggered).toBe(true); - expect((result.info?.detected_entities as Record)?.KR_RRN).toEqual(['123456-1234567']); + expect((result.info?.detected_entities as Record)?.KR_RRN).toEqual(['900101-1234567']); }); }); diff --git a/src/checks/pii.ts b/src/checks/pii.ts index 6c3e46f..c593ef9 100644 --- a/src/checks/pii.ts +++ b/src/checks/pii.ts @@ -241,7 +241,8 @@ const DEFAULT_PII_PATTERNS: Record = { [PIIEntity.FI_PERSONAL_IDENTITY_CODE]: /\b\d{6}[+-A]\d{3}[A-Z0-9]\b/g, // Korea - [PIIEntity.KR_RRN]: /\b\d{6}-\d{7}\b/g, + // Format: YYMMDD-GNNNNNN where YY=year, MM=month(01-12), DD=day(01-31), G=gender/century(1-4) + [PIIEntity.KR_RRN]: /\b\d{2}(0[1-9]|1[0-2])(0[1-9]|[12]\d|3[01])-[1-4]\d{6}\b/g, }; /**