In [13]:
import os

# Define constants
INPUT_DIR = "data/input"
OUTPUT_DIR = "data/output"
INSTITUTION = "HDFC"
STATEMENT_PASSWORD = "123"  # Customer ID

if STATEMENT_PASSWORD == "123":
    raise Exception("Please change password to actual one")

# Ensure output dir exists
if not os.path.exists(OUTPUT_DIR):
    os.makedirs(OUTPUT_DIR)

In [24]:
# load common functions
from importlib.machinery import SourceFileLoader

common_lib = SourceFileLoader("common_lib", "../common_lib/common_functions.py").load_module()

from common_lib import *

In [25]:
from datetime import datetime

def get_filedate(filename):
    file_date_str = filename.split("/")[-1].split("_")[2]
    return datetime.strptime(file_date_str, "%d%m%Y").date()

In [26]:
def extract_balance(line):
    return float(line.split(" ")[3])

def get_balance(line, date):
    balance = extract_balance(line)
    usd2inr_rate = ExchangeRate.get_inr_rate(date)
    if line.startswith("INR"):
        return (balance, balance / usd2inr_rate)
    elif line.startswith("USD"):
        return (balance * usd2inr_rate, balance)

In [27]:
def extract_balances(page_text, file_date, output):
    date_str = file_date.isoformat()

    for line in page_text.splitlines():
        type = None
        if line.startswith("INR SAVINGS ACCOUNTS "):
            type = "Savings"
        elif line.startswith("INR TERM DEPOSITS "):
            type = "Deposit"
        elif line.startswith("USD TERM DEPOSITS "):
            type = "FCNR Deposit"

        if type is not None:
            inr_balance, usd_balance = get_balance(line, file_date)
            entry = f"{date_str},{INSTITUTION},{type},{inr_balance:.2f},{usd_balance:.2f},"
            output.append(entry)

In [28]:
def generate_monthly_balances():
    pdf_paths = get_all_pdf_paths(INPUT_DIR)
    output = []

    for pdf in pdf_paths:
        reader = get_pdf_reader(pdf, STATEMENT_PASSWORD)
        statement_date = get_filedate(pdf)
        extract_balances(reader.pages[0].extract_text(), statement_date, output)

        output_path = f"{OUTPUT_DIR}/{INSTITUTION}-Statement_{statement_date.year}-{statement_date.month:02}.pdf"
        write_pdf(reader, output_path)
    
    return output

In [29]:
output = generate_monthly_balances()
output.sort(reverse=True)
with open(f"{OUTPUT_DIR}/{INSTITUTION}-Monthly-Balances.csv", "w") as f:
    f.write("Date,Institution,Account Type,Balance INR,Balance USD,Comments\n")
    for line in output:
        f.write(f"{line}\n")