Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add --skip and --preserve-null support. With chalk for logging. #40

Merged
merged 1 commit into from
Mar 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,4 @@
/tmp
node_modules
output.sql
.idea
16 changes: 16 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,22 @@ module.exports = {
};
```

### Skip tables

Use `--skip` to skip anonymizing entire tables

```bash
npx pg-anonymizer postgres://localhost/mydb --skip public.posts
```

### Ignore `NULL` values

Use `--preserve-null|-n` to skip anonymization on fields with `NULL` values.

```bash
npx pg-anonymizer postgres://localhost/mydb --preserve-null
```

### Locale (i18n)

Use `-l` to change the locale used by faker (default: `en`)
Expand Down
5 changes: 4 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,17 @@
"@oclif/config": "^1.18.2",
"@oclif/plugin-help": "^5.1.10",
"@types/faker": "^5.5.9",
"chalk": "^4",
"faker": "^5.2.0",
"pluralize": "^8.0.0",
"tslib": "^2.3.1"
},
"devDependencies": {
"@oclif/dev-cli": "^1.26.10",
"@semantic-release/changelog": "^6.0.1",
"@semantic-release/git": "^10.0.1",
"@types/node": "^17.0.6",
"@types/node": "^18.15.3",
"@types/pluralize": "^0.0.29",
"eslint": "^8.6.0",
"eslint-config-oclif": "^4.0.0",
"eslint-config-oclif-typescript": "^1.0.2",
Expand Down
197 changes: 123 additions & 74 deletions src/index.ts
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
import { Command, flags } from "@oclif/command";
import { spawn } from "child_process";
const faker = require("faker");
const fs = require("fs");
const path = require("path");
const readline = require("readline");
import fs from 'fs';
import path from 'path';
import readline from 'readline';
import { Input } from "@oclif/parser";
import faker from 'faker';
import chalk from 'chalk';
import pluralize from 'pluralize';

function dieAndLog(message: string, error: any) {
console.error(message);
Expand Down Expand Up @@ -36,6 +38,11 @@ function sanitizePgDumpArgs(argv: string[]) {
}
}

const log = {
info: (...args: unknown[]) => console.log(chalk.grey(...args)),
status: console.log
}

class PgAnonymizer extends Command {
static description = "dump anonymized database";

Expand All @@ -58,7 +65,12 @@ class PgAnonymizer extends Command {
help: flags.help({ char: "h" }),
list: flags.string({
char: "l",
description: "[default: email,name,description,address,city,country,phone,comment,birthdate] list of columns to anonymize",
description: "list of columns to anonymize",
default: "email,name,description,address,city,country,phone,comment,birthdate"
}),
skip: flags.string({
char: "s",
description: "specify tables to skip",
}),
configFile: flags.string({
char: "c",
Expand All @@ -77,6 +89,11 @@ class PgAnonymizer extends Command {
char: "f",
description: "faker locale (e.g: en, fr, de)",
}),
'preserve-null': flags.boolean({
char: "n",
description: "preserve NULL values",
default: false
}),
pgDumpOutputMemory: flags.string({
char: "m",
description:
Expand All @@ -95,31 +112,18 @@ class PgAnonymizer extends Command {
? require(path.join(process.cwd(), flags.extension))
: null;

sanitizePgDumpArgs(argv);
console.error("Launching pg_dump");
const pg = spawn("pg_dump", argv);
pg.on("exit", function (code) {
if (code != 0) {
dieAndLog("pg_dump command failed with exit code", code);
}
});
pg.stderr.on("data", function (data) {
dieAndLog("pg_dump command error:", data.toString());
});
pg.stdout.setEncoding("utf8");

if (!(flags.list || flags.configFile)) {
flags.list = "email,name,description,address,city,country,phone,comment,birthdate";
}

let list: { col: string; replacement: string | null; }[];
let list: { col: string; replacement: string | null; }[] = [];
if (flags.configFile) {
list = fs.readFileSync(flags.configFile, "utf8")
.split(/\r?\n/)
.map((l: string) => l.trim())
.map((l: string) => {
if (l === "") return null;
if (l.startsWith("#")) return null;
if (l === "") return null as never; // casting to never, as they're filtered out below
if (l.startsWith("#")) return null as never; // casting to never, as they're filtered out below
return {
col: l.replace(/:(?:.*)$/, "").toLowerCase(),
replacement: l.includes(":") ? l.replace(/^(?:.*):/, "") : null
Expand All @@ -135,31 +139,62 @@ class PgAnonymizer extends Command {
});
}

log.info("List:", list.map(c => c.col).join(", "))

const skip = [];

if (flags.skip) {
const tables = flags.skip.split(",").map((t: string) => t.toLowerCase().trim())
skip.push(...tables)

log.info("Skipping:", tables.join(", "))
}

let table: string | null = null;
let indices: Number[] = [];
let cols: string[] = [];

console.error("Command pg_dump started, running anonymization.");

let out: any;

if (flags.output === "-") {
out = process.stdout;
out._handle.setBlocking(true);
console.error("Output to stdout");
log.info("Output to stdout");
} else {
out = fs.createWriteStream(flags.output);
console.error("Output file: " + flags.output);
log.info("Output file: " + flags.output);
}


sanitizePgDumpArgs(argv);

log.status()
log.status("Launching pg_dump...");

const pg = spawn("pg_dump", argv);

pg.on("exit", function (code) {
if (code != 0) {
dieAndLog("pg_dump command failed with exit code", code);
}
});
pg.stderr.on("data", function (data) {
dieAndLog("pg_dump command error:", data.toString());
});
pg.stdout.setEncoding("utf8");

log.status("Command pg_dump started, running anonymization.");

const inputLineResults = readline.createInterface({
input: pg.stdout,
crlfDelay: Infinity,
}) as any as Iterable<String>;

for await (let line of inputLineResults) {
if (line.match(/^COPY .* FROM stdin;$/)) {
log.info()

table = line.replace(/^COPY (.*?) .*$/, "$1");
console.error("Anonymizing table " + table);

cols = line
.replace(/^COPY (?:.*?) \((.*)\).*$/, "$1")
Expand All @@ -175,59 +210,73 @@ class PgAnonymizer extends Command {
return acc;
}, []);

if (indices.length)
console.error(
"Columns to anonymize: " +
cols.filter((v, k) => indices.includes(k)).join(", ")
);
else console.error("No columns to anonymize");
log.status(chalk`{blueBright ${table}}:`, cols.map((c, i) => indices.includes(i) ? chalk.yellow(`[${c}]`) : c).join(", "))

if (skip.includes(table.toLowerCase())) {
log.status(`Skipping... excluded by user`);
} else if (!indices.length) {
log.status(`Skipping... no matching columns`)
} else {
log.status(`Anonymizing ${chalk.yellow(indices.length)} ${pluralize('column', indices.length)}...`);
}
} else if (table && line.trim() && (line !== "\\.")) {
line = line
.split("\t")
.map((v, k) => {
if (indices.includes(k)) {
let replacement = list.find(
(l: any) => l.col === cols[k]
)?.replacement;
if (!replacement) {
replacement = list.find(
(l: any) => l.col === table + "." + cols[k]
// Skip if specified, or there's no columns to anonymize
if (!skip.includes(table.toLowerCase()) && indices.length) {
line = line
.split("\t")
.map((v, k) => {
if (indices.includes(k)) {
if (flags['preserve-null'] && v === "\\N") {
return v
}

let replacement = list.find(
(l: any) => l.col === cols[k]
)?.replacement;
}
if (replacement) {
if (replacement.startsWith("faker.")) {
const [_one, two, three] = replacement.split(".");
if (!(two && three)) return replacement;
if (two === "date")
return postgreSQLDate(faker.date[three]());
return faker[two][three]();
if (!replacement) {
replacement = list.find(
(l: any) => l.col === table + "." + cols[k]
)?.replacement;
}
if (replacement.startsWith("extension.")) {
const functionPath = replacement.split(".");
return functionPath.reduce((acc: any, key: any) => {
if (acc[key]) {
return acc[key];
}
return acc;
}, extension)(v, table);
if (replacement) {
if (replacement.startsWith("faker.")) {
const [, two, three] = replacement.split(".") as ["faker", keyof Faker.FakerStatic, string];
if (!(two && three)) return replacement;

const fn = (faker[two] as any)[three];

if (two === "date") return postgreSQLDate(fn())

return fn();
}
if (replacement.startsWith("extension.")) {
const functionPath = replacement.split(".");
return functionPath.reduce((acc: any, key: any) => {
if (acc[key]) {
return acc[key];
}
return acc;
}, extension)(v, table);
}
return replacement;
}
return replacement;
if (cols[k] === "email") return faker.internet.email();
if (cols[k] === "name") return faker.name.findName();
if (cols[k] === "description") return faker.random.words(3);
if (cols[k] === "address") return faker.address.streetAddress();
if (cols[k] === "city") return faker.address.city();
if (cols[k] === "country") return faker.address.country();
if (cols[k] === "phone") return faker.phone.phoneNumber();
if (cols[k] === "comment") return faker.random.words(3);
if (cols[k] === "birthdate")
return postgreSQLDate(faker.date.past());
return faker.random.word();
}
if (cols[k] === "email") return faker.internet.email();
if (cols[k] === "name") return faker.name.findName();
if (cols[k] === "description") return faker.random.words(3);
if (cols[k] === "address") return faker.address.streetAddress();
if (cols[k] === "city") return faker.address.city();
if (cols[k] === "country") return faker.address.country();
if (cols[k] === "phone") return faker.phone.phoneNumber();
if (cols[k] === "comment") return faker.random.words(3);
if (cols[k] === "birthdate")
return postgreSQLDate(faker.date.past());
return faker.random.word();
}
return v;
})
.join("\t");

return v;
})
.join("\t");
}
} else {
table = null;
indices = [];
Expand Down
1 change: 1 addition & 0 deletions tsconfig.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
"compilerOptions": {
"declaration": true,
"importHelpers": true,
"esModuleInterop": true,
"module": "commonjs",
"outDir": "lib",
"rootDir": "src",
Expand Down
17 changes: 14 additions & 3 deletions yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -750,10 +750,15 @@
version "1.2.2"
resolved "https://registry.npmjs.org/@types/minimist/-/minimist-1.2.2.tgz"

"@types/node@*", "@types/node@^17.0.6":
"@types/node@*":
version "17.0.6"
resolved "https://registry.npmjs.org/@types/node/-/node-17.0.6.tgz"

"@types/node@^18.15.3":
version "18.15.3"
resolved "https://registry.yarnpkg.com/@types/node/-/node-18.15.3.tgz#f0b991c32cfc6a4e7f3399d6cb4b8cf9a0315014"
integrity sha512-p6ua9zBxz5otCmbpb5D3U4B5Nanw6Pk3PPyX05xnxbB/fRv71N7CPmORg7uAD5P70T0xmx1pzAx/FUfa5X+3cw==

"@types/normalize-package-data@^2.4.0":
version "2.4.1"
resolved "https://registry.npmjs.org/@types/normalize-package-data/-/normalize-package-data-2.4.1.tgz"
Expand All @@ -762,6 +767,11 @@
version "4.0.0"
resolved "https://registry.npmjs.org/@types/parse-json/-/parse-json-4.0.0.tgz"

"@types/pluralize@^0.0.29":
version "0.0.29"
resolved "https://registry.yarnpkg.com/@types/pluralize/-/pluralize-0.0.29.tgz#6ffa33ed1fc8813c469b859681d09707eb40d03c"
integrity sha512-BYOID+l2Aco2nBik+iYS4SZX0Lf20KPILP5RGmM1IgzdwNdTs0eebiFriOPcej1sX9mLnSoiNte5zcFxssgpGA==

"@types/retry@^0.12.0":
version "0.12.1"
resolved "https://registry.npmjs.org/@types/retry/-/retry-0.12.1.tgz"
Expand Down Expand Up @@ -1118,7 +1128,7 @@ chalk@^2.0.0, chalk@^2.3.2, chalk@^2.4.1:
escape-string-regexp "^1.0.5"
supports-color "^5.3.0"

chalk@^4.0.0, chalk@^4.1.0, chalk@^4.1.2:
chalk@^4, chalk@^4.0.0, chalk@^4.1.0, chalk@^4.1.2:
version "4.1.2"
resolved "https://registry.yarnpkg.com/chalk/-/chalk-4.1.2.tgz#aac4e2b7734a740867aeb16bf02aad556a1e7a01"
dependencies:
Expand Down Expand Up @@ -3393,7 +3403,8 @@ pkg-dir@^4.2.0:

pluralize@^8.0.0:
version "8.0.0"
resolved "https://registry.npmjs.org/pluralize/-/pluralize-8.0.0.tgz"
resolved "https://registry.yarnpkg.com/pluralize/-/pluralize-8.0.0.tgz#1a6fa16a38d12a1901e0320fa017051c539ce3b1"
integrity sha512-Nc3IT5yHzflTfbjgqWcCPpo7DaKy4FnpB0l/zCAW0Tc7jxAiuqSxHasntB3D7887LSrA93kDJ9IXovxJYxyLCA==

prelude-ls@^1.2.1:
version "1.2.1"
Expand Down