-
-
Notifications
You must be signed in to change notification settings - Fork 7
/
parser.ts
134 lines (114 loc) · 4.24 KB
/
parser.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
import { BankFile, ParsedBankFile, Parser, Transaction } from "../types";
import parseCsv from "csv-parse/lib/sync";
import { Options as parseOptions } from "csv-parse";
import { DateTime } from "luxon";
import fs from "fs";
import chalk from "chalk";
import { messages } from "../constants";
export function parseBankFile(source: BankFile, parsers: Parser[]) {
const csv = fs.readFileSync(source.path);
console.log(`\n${messages.parsing}`, source.path);
// Configure parser to detect the right columns and delimiter
const parser = parsers.find((p) => p.name === source.matchedParser)!;
const parseOptions = { ...baseParseOptions };
parseOptions.columns = parser.columns.map(unifyColumns);
parseOptions.delimiter = parser.delimiter;
let records: any[] = parseCsv(csv, parseOptions);
// Delete header and footer rows
const startRow = parser.header_rows;
const endRow = records.length - parser.footer_rows;
records = records.slice(startRow, endRow);
const transactions = records.map((tx) => buildTransaction(tx, parser));
logResult(transactions.length, source.path);
return {
transactions,
source,
} as ParsedBankFile;
}
export function buildTransaction(record: any, parser: Parser): Transaction {
const tx: Transaction = {
amount: parseAmount(record, parser),
date: parseDate(record, parser.date_format),
memo: mergeMemoFields(record),
payee_name: record.payee?.trim(),
};
if (!tx.payee_name) delete tx.payee_name;
return tx;
}
function mergeMemoFields(record: any) {
// Merge fields named memo, memo1, memo2, etc. into a single memo field
const memoFields = Object.keys(record)
.filter((key) => key.match(/^memo[0-9]*$/))
.sort();
const allMemos = memoFields.map((key) => record[key]);
return allMemos.join(" ");
}
function parseDate(record: any, dateFormat: string) {
const { date } = record;
const dateTime = DateTime.fromFormat(date.trim(), dateFormat, {
zone: "UTC",
});
if (dateTime.isValid) return dateTime.toJSDate();
const error = messages.parseDateError.join("\n");
console.error(chalk.redBright(error), date, dateFormat);
throw "PARSING ERROR";
}
function parseAmount(record: any, parser: Parser): number {
const { thousand_separator, decimal_separator, outflow_indicator } = parser;
const { inflow, outflow, amount, in_out_flag } = record;
let value = inflow || outflow || amount;
if (typeof value === "string") {
if (thousand_separator) {
value = value.replace(thousand_separator, ""); // 69.420,00 -> 69420.00
}
if (decimal_separator) {
value = value.replace(decimal_separator, "."); // 69420,00 -> 69420.00
}
if (!decimal_separator && !thousand_separator) {
// Backwards compatibility: if value has a ',' convert it to a '.'
value = value.replace(",", ".");
}
// Remove non digit, non decimal separator, non minus characters
value = value.replace(/[^0-9-.]/g, ""); // $420.69 -> 420.69
value = parseFloat(value); // "420.69" ==> 420.69
}
// If the outflow column exists, OR
// If the in_out_flag column exists AND it contains the outflow indicator
// invert the value of the amount
if (outflow !== undefined || in_out_flag?.startsWith(outflow_indicator)) {
value = -value; // 420.69 ==> -420.69
}
return value;
}
function logResult(txCount: number, sourcePath: string) {
const msg = chalk.greenBright(messages.parsingDone);
console.log(msg, txCount);
}
/**
* Turns a list of column names into a list where only allowed columns exist.
* Ignored columns are kept, but receive a unique name.
* That way they are still parsed, but ignored later on.
* Example input: ['skip', 'memo', 'skip', 'Date', 'Inflow', 'Foobar', 'memo2'] ==>
* output: ['_0', 'memo', '_1', 'date', 'inflow', '_3', 'memo2']
*/
function unifyColumns(columnName: string, index: number) {
const columnLowerCase = columnName.toLowerCase();
const allowedColumns = [
/^date$/,
/^inflow$/,
/^outflow$/,
/^amount$/,
/^memo[0-9]*$/,
/^in_out_flag$/,
/^payee$/,
];
const isAllowed = allowedColumns.some((regex) =>
columnLowerCase.match(regex)
);
if (isAllowed) return columnLowerCase;
else return `__${index}`;
}
const baseParseOptions: parseOptions = {
skipEmptyLines: true,
relaxColumnCount: true,
};