-
Notifications
You must be signed in to change notification settings - Fork 36
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #94 from a0js/pdfreader-and-bamboohr-importer
pdfreader and bamboohr paycheck importer
- Loading branch information
Showing
12 changed files
with
397 additions
and
6 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -50,6 +50,7 @@ coverage.xml | |
*.py,cover | ||
.hypothesis/ | ||
.pytest_cache/ | ||
.debug-* | ||
|
||
# Translations | ||
*.mo | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
"""BambooHR paycheck importer""" | ||
|
||
import re | ||
|
||
from dateparser.search import search_dates | ||
|
||
from beancount_reds_importers.libreader import pdfreader | ||
from beancount_reds_importers.libtransactionbuilder import paycheck | ||
|
||
# BambooHR exports paycheck stubs to pdf, with multiple tables across multiple pages. | ||
# Call this importer with a config that looks like: | ||
# | ||
# bamboohr.Importer({"desc":"Paycheck (My Company)", | ||
# "main_account":"Income:Employment", | ||
# "paycheck_template": {}, # See beancount_reds_importers/libtransactionbuilder/paycheck.py for sample template | ||
# "currency": "PENNIES", | ||
# }), | ||
# | ||
|
||
|
||
class Importer(paycheck.Importer, pdfreader.Importer): | ||
IMPORTER_NAME = "BambooHR Paycheck" | ||
|
||
def custom_init(self): | ||
self.max_rounding_error = 0.04 | ||
self.filename_pattern_def = r"PayStub.*\.pdf" | ||
self.pdf_table_extraction_settings = {"join_tolerance": 4, "snap_tolerance": 4} | ||
self.pdf_table_extraction_crop = (0, 40, 0, 0) | ||
self.debug = False | ||
|
||
self.header_map = { | ||
"Deduction Type": "description", | ||
"Pay Type": "description", | ||
"Paycheck Total": "amount", | ||
"Tax Type": "description", | ||
} | ||
|
||
self.currency_fields = ["ytd_total", "amount"] | ||
|
||
def paycheck_date(self, input_file): | ||
if not self.file_read_done: | ||
self.read_file(input_file) | ||
dates = [date for _, date in search_dates(self.meta_text)] | ||
return dates[2].date() | ||
|
||
def prepare_tables(self): | ||
def valid_header(label): | ||
if label in self.header_map: | ||
return self.header_map[header] | ||
|
||
label = label.lower().replace(" ", "_") | ||
return re.sub(r"20\d{2}", "ytd", label) | ||
|
||
for section, table in self.alltables.items(): | ||
# rename columns | ||
for header in table.header(): | ||
table = table.rename(header, valid_header(header)) | ||
# convert columns | ||
table = self.convert_columns(table) | ||
|
||
self.alltables[section] = table | ||
|
||
def build_metadata(self, file, metatype=None, data={}): | ||
return {"filing_account": self.config["main_account"]} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
"""Generic pdf paycheck importer""" | ||
|
||
import datetime | ||
|
||
from beancount_reds_importers.libreader import pdfreader | ||
from beancount_reds_importers.libtransactionbuilder import paycheck | ||
|
||
# Generic pdf paystub importer. Use this to build your own pdf paystub importer. | ||
# Call this importer with a config that looks like: | ||
# | ||
# genericpdf.Importer({"desc":"Paycheck (My Company)", | ||
# "main_account":"Income:Employment", | ||
# "paycheck_template": {}, # See beancount_reds_importers/libtransactionbuilder/paycheck.py for sample template | ||
# "currency": "PENNIES", | ||
# }), | ||
# | ||
|
||
|
||
class Importer(paycheck.Importer, pdfreader.Importer): | ||
IMPORTER_NAME = "Generic PDF Paycheck" | ||
|
||
def custom_init(self): | ||
self.max_rounding_error = 0.04 | ||
self.filename_pattern_def = r"paystub.*\.pdf" | ||
self.pdf_table_extraction_settings = {"join_tolerance": 4, "snap_tolerance": 4} | ||
self.pdf_table_extraction_crop = (0, 0, 0, 0) | ||
self.pdf_table_title_height = 0 | ||
# Set this true as you play with the extraction settings and crop to view images of what the pdf parser detects | ||
self.debug = True | ||
|
||
self.header_map = { | ||
"CURRENT": "amount", | ||
"CURRENT PAY": "amount", | ||
"PAY DESCRIPTION": "description", | ||
"DEDUCTIONS": "description", | ||
"TAX TYPE": "description", | ||
"TOTAL NET PAY": "description", | ||
"YTD": "ytd", | ||
"YTD PAY": "ytd", | ||
} | ||
|
||
self.currency_fields = ["ytd", "amount"] | ||
self.date_format = "%m/%d/%Y" | ||
|
||
def paycheck_date(self, input_file): | ||
if not self.file_read_done: | ||
self.read_file(input_file) | ||
*_, d = self.alltables["table_1"].header() | ||
self.date = datetime.datetime.strptime(d, self.date_format) | ||
return self.date.date() | ||
|
||
def prepare_tables(self): | ||
def valid_header(label): | ||
if label in self.header_map: | ||
return self.header_map[header] | ||
|
||
return label.lower().replace(" ", "_") | ||
|
||
for section, table in self.alltables.items(): | ||
# rename columns | ||
for header in table.header(): | ||
if section == "table_6" and header == "": | ||
table = table.rename(header, "amount") | ||
else: | ||
table = table.rename(header, valid_header(header)) | ||
# convert columns | ||
table = self.convert_columns(table) | ||
|
||
self.alltables[section] = table | ||
|
||
def build_metadata(self, file, metatype=None, data={}): | ||
return {"filing_account": self.config["main_account"]} |
33 changes: 33 additions & 0 deletions
33
beancount_reds_importers/importers/genericpdf/tests/genericpdf_test.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
from os import path | ||
|
||
from beancount.ingest import regression_pytest as regtest | ||
|
||
from beancount_reds_importers.importers import genericpdf | ||
|
||
|
||
@regtest.with_importer( | ||
genericpdf.Importer( | ||
{ | ||
"desc": "Paycheck", | ||
"main_account": "Income:Salary:FakeCompany", | ||
"paycheck_template": { | ||
"table_4": { | ||
"Bonus": "Income:Bonus:FakeCompany", | ||
"Overtime": "Income:Overtime:FakeCompany", | ||
"Regular": "Income:Salary:FakeCompany", | ||
}, | ||
"table_5": { | ||
"Federal MED/EE": "Expenses:Taxes:Medicare", | ||
"Federal OASDI/EE": "Expenses:Taxes:SocialSecurity", | ||
"Federal Withholding": "Expenses:Taxes:FederalIncome", | ||
"State Withholding": "Expenses:Taxes:StateIncome", | ||
}, | ||
"table_6": {"CURRENT": "Assets:Checking:ABCBank"}, | ||
}, | ||
"currency": "USD", | ||
} | ||
) | ||
) | ||
@regtest.with_testdir(path.dirname(__file__)) | ||
class TestGenericPDF(regtest.ImporterTestBase): | ||
pass |
Binary file not shown.
11 changes: 11 additions & 0 deletions
11
beancount_reds_importers/importers/genericpdf/tests/paystub.sample.pdf.extract
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
|
||
2023-12-03 * "Paycheck" | ||
filing_account: "Income:Salary:FakeCompany" | ||
Assets:Checking:ABCBank 4228.00 USD | ||
Expenses:Taxes:FederalIncome 416.00 USD | ||
Expenses:Taxes:Medicare 128.00 USD | ||
Expenses:Taxes:SocialSecurity 96.00 USD | ||
Expenses:Taxes:StateIncome 32.00 USD | ||
Income:Bonus:FakeCompany -3000.00 USD | ||
Income:Overtime:FakeCompany -300.00 USD | ||
Income:Salary:FakeCompany -1600.00 USD |
1 change: 1 addition & 0 deletions
1
beancount_reds_importers/importers/genericpdf/tests/paystub.sample.pdf.file_account
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
Income:Salary:FakeCompany |
1 change: 1 addition & 0 deletions
1
beancount_reds_importers/importers/genericpdf/tests/paystub.sample.pdf.file_date
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
2023-12-03 |
1 change: 1 addition & 0 deletions
1
beancount_reds_importers/importers/genericpdf/tests/paystub.sample.pdf.file_name
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
paystub.sample.pdf |
Oops, something went wrong.