Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Merge branch 'master' into next

  • Loading branch information...
commit 4df39f91687c44db7fabe0216e1be2ee7100622e 2 parents da26072 + 342f16f
@normanr authored
Showing with 125 additions and 11 deletions.
  1. +9 −0 README.md
  2. +25 −10 aib2ofx
  3. +1 −1  aib2ofx_lib/ofx.py
  4. +90 −0 aib2ofx_lib/pdfparse.py
View
9 README.md
@@ -1,5 +1,6 @@
# aib2ofx
...or how to suck data out of AIB's online interface, and format it into `OFX` file.
+Also supports conversion of AIB e-statements.
## Installation
@@ -71,6 +72,14 @@ The script should connect to AIB, log in using provided credentials,
iterate through all accounts, and save each of those to a separate
file located in `/output/directory`.
+To convert AIB PDF e-statements, download the statements from online banking and
+put them in a folder of your choice.
+Then run:
+
+ aib2ofx -d /output/directory -p /pdf/statement/directory
+
+Only checking account e-statements are supported currently.
+
## Guarantee
There is none.
View
35 aib2ofx
@@ -1,9 +1,9 @@
#!/usr/bin/env python
# coding: utf-8
-import optparse, re, sys
+import optparse, re, sys, codecs
-from aib2ofx_lib import aib, cfg, ofx
+from aib2ofx_lib import aib, cfg, ofx, pdfparse
def getOptions():
@@ -21,6 +21,10 @@ def getOptions():
'-q', '--quiet', action='store_true',
dest='quiet_mode', help='display no output at all [False]'
),
+ optparse.make_option(
+ '-p', '--pdfstatement-dir',
+ dest='pdfstatement_dir', help='directory location of pdf statements'
+ ),
]
parser.add_options(option_list)
parser.set_defaults(output_dir='/tmp', debug_mode=False, quiet_mode=False)
@@ -53,14 +57,17 @@ def getData(user, config, output_dir, formatter, chatter):
name = re.sub(cleanup_re,
'_',
account['accountId']).lower()
- f = open(
- '%s/%s_%s.ofx' % (output_dir, user, name),
+ writeFile(account, output_dir, user, name, formatter)
+
+def writeFile(account_data, output_dir, user, accountId, formatter):
+ f = codecs.open(
+ '%s/%s_%s.ofx' % (output_dir, user, accountId),
'w'
+ 'utf-8'
)
- f.write(formatter.prettyprint(account))
+ f.write(formatter.prettyprint(account_data).encode('utf-8'))
f.close
-
def main():
# Parse command line options.
(options, args) = getOptions()
@@ -73,10 +80,18 @@ def main():
config = cfg.config()
formatter = ofx.ofx()
- # Iterate through accounts, scrape, format and save data.
- for user in config.users():
- getData(user, config, options.output_dir, formatter,
- chatter)
+ if options.pdfstatement_dir:
+ pdfparser = pdfparse.PdfParse(options.pdfstatement_dir)
+ user = config.users()[0]
+ data = pdfparser.getData()
+ for d in data:
+ accountId = d['accountId']+"_"+d['reportDate'].strftime('%Y%m%d')
+ writeFile(d, options.output_dir, user, accountId, formatter)
+ else:
+ # Iterate through accounts, scrape, format and save data.
+ for user in config.users():
+ getData(user, config, options.output_dir, formatter,
+ chatter)
if __name__ == '__main__':
View
2  aib2ofx_lib/ofx.py
@@ -99,7 +99,7 @@ def prettyprint(self, input):
t['type'] = 'DEBIT'
t['amount'] = '-%s' % t['debit']
t['timestamp'] = _toDate(t['timestamp'])
- t['tid'] = sha256(t['timestamp'] + t['amount'] + t['description']).hexdigest()
+ t['tid'] = sha256(t['timestamp'].encode("utf-8") + t['amount'].encode("utf-8") + t['description'].encode("utf-8")).hexdigest()
transactions.append(self.single_transaction % t)
data['transactions'] = '\n'.join(transactions)
View
90 aib2ofx_lib/pdfparse.py
@@ -0,0 +1,90 @@
+#!/usr/bin/env python
+# coding: utf-8
+
+from BeautifulSoup import BeautifulStoneSoup
+import re, os, subprocess, fnmatch, codecs
+from datetime import datetime
+
+class PdfParse:
+ def __init__(self, directory):
+ self.debit_rpos=307
+ self.credit_rpos=363
+ self.balance_rpos=430
+ self.desc_lpos=79
+
+ self.dateRegEx = '\d+\s\w+\s\d{4,4}'
+ self.accountNoRegEx = '\d{5,5}-\d{3,3}'
+ self.directory = os.path.abspath(directory)
+
+ def getData(self):
+ files = os.listdir(self.directory)
+ data=[]
+ for f in files:
+ if fnmatch.fnmatch(f, '*.pdf'):
+ fullname = self.directory+"/"+f
+ subprocess.call(['pdftohtml', '-xml', fullname])
+ fullname = fullname.rstrip('.pdf')+".xml"
+ data.append(self._get_data_for_file(fullname))
+ return data
+
+ def _get_data_for_file(self, file_name):
+ data = {'type':'checking',
+ 'available': '',
+ 'balance' : '',
+ 'bankId': 'AIB',
+ 'currency': 'EUR',
+ 'operations': []}
+
+ statement = self._parse_xml(file_name)
+ data['accountId']=statement['accountId']
+
+ operations = statement['operations']
+ data['operations']=operations
+ data['balance']=operations[-1]['balance']
+ data['available']=data['balance']
+ data['reportDate']=operations[-1]['timestamp']
+ return data
+
+ def _parse_xml(self, file_name):
+ file = codecs.open(file_name, "r")
+ xml = file.read()
+ soup = BeautifulStoneSoup(xml);
+
+ operations=[]
+ current_ts = ''
+ accountId = ''
+ #Template for an operation
+ operation_tmpl = dict(debit='',credit='',balance='',description='')
+ operation=operation_tmpl.copy()
+
+ for elm in soup.findAll('text'):
+ right_pos=int(elm['left'])+int(elm['width'])
+ left_pos=int(elm['left'])
+
+ accountIdMatch = re.search(self.accountNoRegEx, elm.text)
+ if(accountIdMatch):
+ accountId=accountIdMatch.group(0)
+
+ if(left_pos<=70):
+ dateMatch = re.search(self.dateRegEx,elm.text)
+ if(dateMatch):
+ date = dateMatch.group(0)
+ operation['description'] = elm.text.replace(date,'').lstrip()
+ current_ts = datetime.strptime(date, '%d %b %Y')
+ if(right_pos==self.debit_rpos):
+ operation['debit'] = elm.text
+ operations.append(operation)
+ operation=operation_tmpl.copy()
+ if(right_pos==self.credit_rpos):
+ operation['credit'] = elm.text
+ operations.append(operation)
+ operation=operation_tmpl.copy()
+ if(left_pos==self.desc_lpos):
+ operation['description'] = elm.text
+
+ if(right_pos==self.balance_rpos and len(operations)):
+ operations[-1]['balance'] = elm.text
+
+ operation['timestamp'] = current_ts
+
+ return {'accountId':accountId, 'operations':operations}
Please sign in to comment.
Something went wrong with that request. Please try again.