In [51]:
from datetime import datetime
from docx import Document
from pathlib import Path
import pandas as pd
import re

In [52]:
num_under_20 = {
    0: '',
    1: 'One',
    2: 'Two',
    3: 'Three',
    4: 'Four',
    5: 'Five',
    6: 'Six',
    7: 'Seven',
    8: 'Eight',
    9: 'Nine',
    10: 'Ten',
    11: 'Eleven',
    12: 'Twelve',
    13: 'Thirteen',
    14: 'Fourteen',
    15: 'Fifteen',
    16: 'Sixteen',
    17: 'Seventeen',
    18: 'Eighteen',
    19: 'Nineteen'
}

In [53]:
num_under_100 = {
    2: 'Twenty',
    3: 'Thirty',
    4: 'Forty',
    5: 'Fifty',
    6: 'Sixty',
    7: 'Seventy',
    8: 'Eighty',
    9: 'Ninety'
}

In [54]:
num_above_1000 = {
    1: 'Thousand',
    2: 'Million',
    3: 'Biliion',
    4: 'Trillion'
}

In [55]:
def num_to_word(num):
    decimal = round(num - int(num), 2)
    if decimal:
        return ' '.join([num_to_word(int(num)), 'and Cents', num_to_word(int(decimal * 100))])
    num_digit = len(str(num))
    power = (num_digit - 1) // 3
    if num < 20:
        return num_under_20[num]
    elif num < 100:
        return ' '.join([num_under_100[num // 10], num_to_word(num % 10)])
    elif num < 1000:
        return ' '.join([num_to_word(num // 100), 'Hundred', num_to_word(num % 100)])
    elif power < 5:
        return ' '.join([num_to_word(num // 1000 ** power), num_above_1000[power], num_to_word(num % 1000 ** power)])

In [56]:
input_path = Path.cwd() / 'input'

In [57]:
file_list = list(input_path.glob('*.xlsx'))
file_path = file_list[0]

In [58]:
df = pd.read_excel(file_path)

In [59]:
df['Total'] = df['Amount1']
df['Ringgit'] = df['Total'].apply(num_to_word).apply(lambda row: ' '.join([item for item in row.split(' ') if item] + ['Only']))
df['Date'] = df['Date'].dt.strftime('%d/%m/%Y')
df['Account1'] = df['Account1'].astype(str)
df['Amount1'] = df['Amount1'].apply(lambda row: f'{row:,.2f}')
df['Total'] = df['Total'].apply(lambda row: f'{row:,.2f}')
df = df.fillna('')

In [60]:
data = df.to_dict('records')[0]

In [61]:
data

{'Date': '15/01/2024',
 'Voucher': '24/01',
 'Payee': 'Kumpulan Wang Simpanan Pekerja',
 'Method': 'DDR',
 'Reference': '',
 'Account1': '50302',
 'Particular1': 'EPF Contribution',
 'Amount1': '9,979.00',
 'Account2': '',
 'Particular2': '',
 'Amount2': '',
 'Account3': '',
 'Particular3': '',
 'Amount3': '',
 'Account4': '',
 'Particular4': '',
 'Amount4': '',
 'Account5': '',
 'Particular5': '',
 'Amount5': '',
 'Account6': '',
 'Particular6': '',
 'Amount6': '',
 'Account7': '',
 'Particular7': '',
 'Amount7': '',
 'Account8': '',
 'Particular8': '',
 'Amount8': '',
 'Total': '9,979.00',
 'Ringgit': 'Nine Thousand Nine Hundred Seventy Nine Only'}

In [62]:
temp_path = Path.cwd() / 'temp'
temp_path = list(temp_path.glob('*.docx'))[1]

In [63]:
doc = Document(temp_path)
table = doc.tables[0]
rows = list(table.rows)
for row in rows:
    items = list(row.cells)
    for item in items:
        pattern = r'\$\{(.*?)\}'
        result = re.findall(pattern, item.text)
        if result:
            key = result[0]
            value = data[key]
            runs = list(item.paragraphs[0].runs)
            for index, run in enumerate(runs):
                if index == 0:
                    run.text = value
                else:
                    run.text = ''
doc.save('output.docx')