From 8e5e86d8c960d2189163e619d829ec6eb1aa2cfc Mon Sep 17 00:00:00 2001 From: Karan Thakkar Date: Sun, 14 Dec 2025 13:58:52 +0000 Subject: [PATCH 1/6] docs: add verified real-world examples for CLI and Docker API --- README.md | 87 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 87 insertions(+) diff --git a/README.md b/README.md index 3141dff..9885382 100644 --- a/README.md +++ b/README.md @@ -82,3 +82,90 @@ src/ ## 📄 License GNU General Public License v3.0 + +## 📖 Real-World Recipes + +### 🛠️ CLI Power Usage + +#### 1. Batch Process a Directory +The CLI processes one file at a time. Use a shell loop to process entire folders: +```bash +# Process all JPGs in 'input' dir and save to 'output' dir +mkdir -p output +for f in input/*.jpg; do + npm run cli -- "$f" -o "output/$(basename "$f")" +done +``` + +#### 2. Strict Redaction for Finance/Invoices +Enable strict blocking for sensitive documents: +```bash +npm run cli -- invoice.jpg \ + --block-words "Confidential,SSN,Account" \ + --custom-regex "(?i)account\s*#?\s*\d+" \ + --no-ips # Disable IP scanner if irrelevant to boost speed +``` + +#### 3. Allowlist for Internal Docs +Prevent redaction of known internal terms or headers: +```bash +npm run cli -- internal-doc.jpg \ + --allowlist "CorpCorp,192.168.1.1,ProjectX" +``` + +--- + +### 🐳 Docker API Examples + +The Docker API runs on port 3000 by default. It currently uses standard detection settings (Emails, IPs, Keys, PII). + +#### 1. Quick Test via Curl +```bash +curl -X POST http://localhost:3000/redact \ + -F "image=@/path/to/doc.jpg" \ + -o redacted.png +``` +*Check the `X-Redacted-Stats` header in the response for detection counts.* + +#### 2. Python Integration +Process images programmatically in your Python apps: + +```python +import requests + +url = 'http://localhost:3000/redact' +files = {'image': open('contract.jpg', 'rb')} + +response = requests.post(url, files=files) + +if response.status_code == 200: + with open('redacted_contract.png', 'wb') as f: + f.write(response.content) + print("Stats:", response.headers.get('X-Redacted-Stats')) +else: + print("Error:", response.text) +``` + +#### 3. Node.js Integration (Native Fetch) +Requires Node.js 18+. No extra libraries needed! + +```javascript +import fs from 'fs'; + +const fileBuffer = fs.readFileSync('id_card.jpg'); +const blob = new Blob([fileBuffer], { type: 'image/jpeg' }); + +const formData = new FormData(); +formData.append('image', blob, 'id_card.jpg'); + +const response = await fetch('http://localhost:3000/redact', { + method: 'POST', + body: formData +}); + +if (response.ok) { + const buffer = Buffer.from(await response.arrayBuffer()); + fs.writeFileSync('redacted_id.png', buffer); + console.log('Stats:', response.headers.get('x-redacted-stats')); +} +``` From ace2024a08a58442b2b7fab6e6b32611d88891e3 Mon Sep 17 00:00:00 2001 From: Karan Thakkar Date: Sun, 14 Dec 2025 15:01:39 +0000 Subject: [PATCH 2/6] fix(api): use DEFAULT_ALLOWLIST in server settings --- src/server.ts | 76 ++++++++++++++++++++++++++++----------------------- 1 file changed, 42 insertions(+), 34 deletions(-) diff --git a/src/server.ts b/src/server.ts index c86579d..6adae32 100644 --- a/src/server.ts +++ b/src/server.ts @@ -4,6 +4,8 @@ import { processImage } from './core/processor'; import { NodeCanvasAdapter } from './adapters/NodeCanvasAdapter'; import type { DetectionSettings } from './types'; +import { DEFAULT_ALLOWLIST } from './constants/config'; + const server = fastify({ logger: true }); // Register multipart support for file uploads @@ -21,40 +23,44 @@ server.get('/health', async () => { }); server.post('/redact', async (req, reply) => { - const data = await req.file(); - - if (!data) { - return reply.code(400).send({ error: 'No image file uploaded' }); - } - - if (!['image/jpeg', 'image/png'].includes(data.mimetype)) { - return reply.code(400).send({ error: 'Only .jpg and .png files are supported' }); - } + const parts = req.parts(); + let imageBuffer: Buffer | undefined; + let settings: DetectionSettings = { + email: true, + ip: true, + creditCard: true, + secret: true, + pii: true, + allowlist: [...DEFAULT_ALLOWLIST], + blockWords: [], + customDates: [], + customRegex: [] + }; try { - const buffer = await data.toBuffer(); - - // Parse settings (optional) - // Note: multipart fields come as streams or values. - // For simplicity, we can inspect other fields if parsed, - // but @fastify/multipart with req.file() iterates. - // Let's attach settings via a specific header or query param for mvp, - // or parse fields if needed. - // Better: simple default settings for now, allow enhancement later. - const settings: DetectionSettings = { - email: true, - ip: true, - creditCard: true, - secret: true, - pii: true, - // Default empty limits - allowlist: [], - blockWords: [], - customDates: [], - customRegex: [] - }; - - const result = await processImage(buffer, { + for await (const part of parts) { + if (part.type === 'file' && part.fieldname === 'image') { + if (!['image/jpeg', 'image/png'].includes(part.mimetype)) { + // We continue to consume the stream to avoid hanging, but note the error + // Or just throw immediately if we want to fail fast + throw new Error('Only .jpg and .png files are supported'); + } + imageBuffer = await part.toBuffer(); + } else if (part.type === 'field' && part.fieldname === 'settings') { + try { + const parsed = JSON.parse(part.value as string); + settings = { ...settings, ...parsed }; + } catch { + req.log.warn('Failed to parse settings JSON'); + } + } + } + + if (!imageBuffer) { + return reply.code(400).send({ error: 'No image file uploaded' }); + } + + const result = await processImage(imageBuffer, { canvasFactory: adapter, detectionSettings: settings, }); @@ -70,9 +76,11 @@ server.post('/redact', async (req, reply) => { reply.type('image/png'); return outputBuffer; - } catch (err) { + } catch (err: unknown) { req.log.error(err); - return reply.code(500).send({ error: 'Redaction processing failed' }); + const errorMessage = err instanceof Error ? err.message : 'Unknown error'; + const status = errorMessage.includes('supported') ? 400 : 500; + return reply.code(status).send({ error: errorMessage || 'Redaction processing failed' }); } }); From 19942ec631e93d55ef22dc4095f5ea34d5af9b6e Mon Sep 17 00:00:00 2001 From: Karan Thakkar Date: Sun, 14 Dec 2025 15:06:16 +0000 Subject: [PATCH 3/6] docs: add API configuration example --- README.md | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/README.md b/README.md index 9885382..4732f59 100644 --- a/README.md +++ b/README.md @@ -169,3 +169,14 @@ if (response.ok) { console.log('Stats:', response.headers.get('x-redacted-stats')); } ``` + +#### 4. Custom Configuration (JSON) +You can fine-tune the API detection by passing a `settings` JSON field: + +```bash +# Disable Email redaction and add custom block words +curl -X POST http://localhost:3000/redact \ + -F "image=@doc.jpg" \ + -F 'settings={"email":false, "blockWords":["CONFIDENTIAL"]}' \ + -o redacted.png +``` From c66ad975d5db3698d15dda6fa4a38729f6053a85 Mon Sep 17 00:00:00 2001 From: Karan Thakkar Date: Sun, 14 Dec 2025 15:09:54 +0000 Subject: [PATCH 4/6] docs: clarify API configurability in README --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 4732f59..b640b66 100644 --- a/README.md +++ b/README.md @@ -117,7 +117,7 @@ npm run cli -- internal-doc.jpg \ ### 🐳 Docker API Examples -The Docker API runs on port 3000 by default. It currently uses standard detection settings (Emails, IPs, Keys, PII). +The Docker API runs on port 3000 by default. It uses standard detection settings (Emails, IPs, Keys, PII) by default, but is **fully configurable** via the `settings` parameter. #### 1. Quick Test via Curl ```bash From c3859a01f875b5df535ddae9eea29a121026b146 Mon Sep 17 00:00:00 2001 From: Karan Thakkar Date: Sun, 14 Dec 2025 15:12:21 +0000 Subject: [PATCH 5/6] docs: create exhaustive API documentation --- README.md | 61 ++---------------------- docs/API.md | 133 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 136 insertions(+), 58 deletions(-) create mode 100644 docs/API.md diff --git a/README.md b/README.md index b640b66..d0bb0e9 100644 --- a/README.md +++ b/README.md @@ -115,68 +115,13 @@ npm run cli -- internal-doc.jpg \ --- -### 🐳 Docker API Examples - The Docker API runs on port 3000 by default. It uses standard detection settings (Emails, IPs, Keys, PII) by default, but is **fully configurable** via the `settings` parameter. -#### 1. Quick Test via Curl -```bash -curl -X POST http://localhost:3000/redact \ - -F "image=@/path/to/doc.jpg" \ - -o redacted.png -``` -*Check the `X-Redacted-Stats` header in the response for detection counts.* - -#### 2. Python Integration -Process images programmatically in your Python apps: - -```python -import requests - -url = 'http://localhost:3000/redact' -files = {'image': open('contract.jpg', 'rb')} - -response = requests.post(url, files=files) - -if response.status_code == 200: - with open('redacted_contract.png', 'wb') as f: - f.write(response.content) - print("Stats:", response.headers.get('X-Redacted-Stats')) -else: - print("Error:", response.text) -``` - -#### 3. Node.js Integration (Native Fetch) -Requires Node.js 18+. No extra libraries needed! - -```javascript -import fs from 'fs'; - -const fileBuffer = fs.readFileSync('id_card.jpg'); -const blob = new Blob([fileBuffer], { type: 'image/jpeg' }); - -const formData = new FormData(); -formData.append('image', blob, 'id_card.jpg'); - -const response = await fetch('http://localhost:3000/redact', { - method: 'POST', - body: formData -}); - -if (response.ok) { - const buffer = Buffer.from(await response.arrayBuffer()); - fs.writeFileSync('redacted_id.png', buffer); - console.log('Stats:', response.headers.get('x-redacted-stats')); -} -``` - -#### 4. Custom Configuration (JSON) -You can fine-tune the API detection by passing a `settings` JSON field: +👉 **[View Full API Documentation](docs/API.md)** for detailed usage, schema, and Python/Node.js examples. +#### Quick Test (Curl) ```bash -# Disable Email redaction and add custom block words curl -X POST http://localhost:3000/redact \ - -F "image=@doc.jpg" \ - -F 'settings={"email":false, "blockWords":["CONFIDENTIAL"]}' \ + -F "image=@/path/to/doc.jpg" \ -o redacted.png ``` diff --git a/docs/API.md b/docs/API.md new file mode 100644 index 0000000..140e21a --- /dev/null +++ b/docs/API.md @@ -0,0 +1,133 @@ +# AutoRedact API Documentation + +The AutoRedact API provides a simple, high-performance interface for redacting sensitive information from images using the same engine as the CLI and Web UI. + +## Base URL + +By default, the Docker container exposes the API on port `3000`. + +\`\`\` +http://localhost:3000 +\`\`\` + +--- + +## Endpoints + +### 1. Redact Image + +**POST** \`/redact\` + +Uploads an image for processing and returns the redacted image. + +#### Request Headers +* **Content-Type**: \`multipart/form-data\` + +#### Request Body (Multipart Fields) + +| Field | Type | Required | Description | +| :--- | :--- | :--- | :--- | +| \`image\` | File | **Yes** | The image file to process. Supports \`.jpg\` and \`.png\`. | +| \`settings\` | JSON String | No | Configuration object to control detection rules. | + +#### Settings Schema + +The \`settings\` field accepts a JSON string matching the \`DetectionSettings\` interface: + +\`\`\`json +{ + "email": boolean, // Default: true + "ip": boolean, // Default: true + "creditCard": boolean, // Default: true + "secret": boolean, // Default: true + "pii": boolean, // Default: true + "allowlist": string[], // e.g. ["127.0.0.1", "MyCorp"] + "blockWords": string[], // e.g. ["CONFIDENTIAL"] + "customRegex": [ + { + "pattern": "regex_pattern", + "flags": "i" // Optional + } + ] +} +\`\`\` + +#### Response + +* **Status**: \`200 OK\` +* **Content-Type**: \`image/png\` (The redacted image) +* **Header** \`X-Redacted-Stats\`: JSON string containing detection counts. + +--- + +## Examples + +### 1. Basic Usage (Curl) +Redact an image using default settings: + +\`\`\`bash +curl -X POST http://localhost:3000/redact \\ + -F "image=@/path/to/document.jpg" \\ + -o redacted.png +\`\`\` + +### 2. Advanced Configuration (Curl) +Disable email redaction and add custom block words: + +\`\`\`bash +curl -X POST http://localhost:3000/redact \\ + -F "image=@invoice.jpg" \\ + -F 'settings={"email":false, "blockWords":["CONFIDENTIAL", "SSN"]}' \\ + -o redacted_invoice.png +\`\`\` + +### 3. Node.js (Native Fetch) +Requires Node 18+: + +\`\`\`javascript +import fs from 'fs'; + +const fileBuffer = fs.readFileSync('doc.jpg'); +const blob = new Blob([fileBuffer], { type: 'image/jpeg' }); + +const formData = new FormData(); +formData.append('image', blob, 'doc.jpg'); +formData.append('settings', JSON.stringify({ + allowlist: ['CorpName'], + ip: false // Disable IP scanner +})); + +const response = await fetch('http://localhost:3000/redact', { + method: 'POST', + body: formData +}); + +if (response.ok) { + const buffer = Buffer.from(await response.arrayBuffer()); + fs.writeFileSync('redacted.png', buffer); + console.log('Stats:', response.headers.get('x-redacted-stats')); +} +\`\`\` + +### 4. Python (Requests) + +\`\`\`python +import requests +import json + +url = 'http://localhost:3000/redact' +files = {'image': open('doc.jpg', 'rb')} +settings = { + "email": False, + "blockWords": ["DO NOT DISTRIBUTE"] +} + +data = {'settings': json.dumps(settings)} + +response = requests.post(url, files=files, data=data) + +if response.status_code == 200: + with open('redacted.png', 'wb') as f: + f.write(response.content) + print("Stats:", response.headers.get('X-Redacted-Stats')) +\`\`\` From 3bc2860d0eb0e4d2cac23218c0e6005717e54614 Mon Sep 17 00:00:00 2001 From: Karan Thakkar Date: Sun, 14 Dec 2025 15:14:37 +0000 Subject: [PATCH 6/6] docs: create exhaustive API documentation --- docs/API.md | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/docs/API.md b/docs/API.md index 140e21a..ad45a8d 100644 --- a/docs/API.md +++ b/docs/API.md @@ -6,9 +6,9 @@ The AutoRedact API provides a simple, high-performance interface for redacting s By default, the Docker container exposes the API on port `3000`. -\`\`\` +``` http://localhost:3000 -\`\`\` +``` --- @@ -34,7 +34,7 @@ Uploads an image for processing and returns the redacted image. The \`settings\` field accepts a JSON string matching the \`DetectionSettings\` interface: -\`\`\`json +```json { "email": boolean, // Default: true "ip": boolean, // Default: true @@ -50,7 +50,7 @@ The \`settings\` field accepts a JSON string matching the \`DetectionSettings\` } ] } -\`\`\` +``` #### Response @@ -65,26 +65,26 @@ The \`settings\` field accepts a JSON string matching the \`DetectionSettings\` ### 1. Basic Usage (Curl) Redact an image using default settings: -\`\`\`bash +```bash curl -X POST http://localhost:3000/redact \\ -F "image=@/path/to/document.jpg" \\ -o redacted.png -\`\`\` +``` ### 2. Advanced Configuration (Curl) Disable email redaction and add custom block words: -\`\`\`bash +```bash curl -X POST http://localhost:3000/redact \\ -F "image=@invoice.jpg" \\ -F 'settings={"email":false, "blockWords":["CONFIDENTIAL", "SSN"]}' \\ -o redacted_invoice.png -\`\`\` +``` ### 3. Node.js (Native Fetch) Requires Node 18+: -\`\`\`javascript +```javascript import fs from 'fs'; const fileBuffer = fs.readFileSync('doc.jpg'); @@ -107,11 +107,11 @@ if (response.ok) { fs.writeFileSync('redacted.png', buffer); console.log('Stats:', response.headers.get('x-redacted-stats')); } -\`\`\` +``` ### 4. Python (Requests) -\`\`\`python +```python import requests import json @@ -130,4 +130,4 @@ if response.status_code == 200: with open('redacted.png', 'wb') as f: f.write(response.content) print("Stats:", response.headers.get('X-Redacted-Stats')) -\`\`\` +```