In [1]:
import re
from pathlib import Path
from datetime import datetime
from config import (
    load_env_if_present,
    load_config,
    get_invoice_folder,
    get_soa_folder,
    get_client_directory,
    get_file_regex,
)
from db.db_path import get_db_path
from db.db import (
    init_db,
    add_or_update_client,
    add_or_update_soa,
    record_invoice,
    get_unsent_invoices,
    mark_invoice_sent,
    get_client_email,
)
from utility import extract_pdf_text
from utility.read_xlsx import iter_xlsx_rows_as_dicts


In [2]:
# import io
# import fitz  # PyMuPDF
# 
# try:
#     import pytesseract
#     from PIL import Image, ImageDraw
#     OCR_LIB_AVAILABLE = True
# except Exception:
#     OCR_LIB_AVAILABLE = False

In [3]:
load_env_if_present()   # dev only, safe in prod
cfg = load_config()
invoice_folder = get_invoice_folder(cfg)
soa_folder = get_soa_folder(cfg)
client_directory = get_client_directory(cfg)
inv_file_regex = get_file_regex(cfg,'inv')
soa_file_regex = get_file_regex(cfg,'soa')


In [4]:
client_directory

PosixPath('/home/rajinder-mavi/code/safe_marine_invoice_mailer/raw_data/client_directory.xlsx')

In [5]:
soa_folder

PosixPath('/home/rajinder-mavi/code/safe_marine_invoice_mailer/raw_data/SOA')

In [6]:
DB_PATH: Path = get_db_path()

In [7]:
init_db()

In [8]:
for row in iter_xlsx_rows_as_dicts(client_directory):
    head_office = row.get('Head Office','')
    customer_number = row.get('Customer Number')
    emails = [row.get(f'emailforinvoice{idx}') for idx in range(1,6) if row.get(f'emailforinvoice{idx}')]
    add_or_update_client(head_office,customer_number,emails)

In [9]:
#inv_file_pattern = r'^([^\s]+)\s+invoice\s+([^\s]+)\s+(.+)\.pdf$'
#inv_file_regex = re.compile(inv_file_pattern, re.IGNORECASE)
#soa_file_pattern = r"^Statement of Account for- ([A-Za-z0-9]+)\s+(.+?)\s*\.PDF$"
#soa_file_regex = re.compile(soa_file_pattern, re.IGNORECASE)


for file in invoice_folder.rglob('*invoice*.pdf',case_sensitive=False):

    m = inv_file_regex.match(file.name)
    if not m:
        continue
    customer_number = m.group(1)
    tax_invoice_no = m.group(2)
    ship_name = m.group(3)
    inv_file_path = file.as_posix()    
    invoice_date = extract_pdf_text.extract_pdf_date(inv_file_path,field = 'inv_date')
    inv_period_month = invoice_date.rsplit('-',1)[0]
    record_invoice(
        tax_invoice_no,
        customer_number,
        ship_name,
        inv_file_path,
        invoice_date,
        inv_period_month
    )

    


In [10]:
for file in soa_folder.rglob('Statement*.pdf',case_sensitive=False):

    m = soa_file_regex.match(file.name)
    if not m:
        print('not m')
        continue
    head_office = m.group(1)
    head_office_name = m.group(2)
    soa_file_path = file.as_posix()
    soa_date = extract_pdf_text.extract_pdf_date(soa_file_path,'soa_date')
    soa_period_month = soa_date.rsplit('-',1)[0]
    add_or_update_soa(
        head_office,
        head_office_name,
        soa_file_path,
        soa_date,
        soa_period_month
    )

    