# The Audit Planning Agent - Draft Notebook

## Stage 1 - Ingestion
Ingesting trial balance and general ledger before mapping into a full fledge financial report

In [2]:
from pathlib import Path
from typing import Dict

import pandas as pd
from IPython.display import Markdown, display

try:
    NOTEBOOK_DIR = Path(__file__).resolve().parent
except NameError:  # pragma: no cover - jupyter magic
    NOTEBOOK_DIR = Path.cwd()

REPO_ROOT = NOTEBOOK_DIR.parent
DATA_DIR = REPO_ROOT / "data"
DATASETS: Dict[str, str] = {
    "Previous year trial balance": "TB_2024.xlsx",
    "Current year trial balance": "TB_2025.xlsx",
    "Previous year general ledger": "GL_30Sept2024.xlsx",
    "Current year general ledger": "GL_30Sept2025.xlsx",
}


def load_dataframes(dataset_map: Dict[str, str]) -> Dict[str, pd.DataFrame]:
    """Read all Excel files that participate in Stage 1 ingestion."""
    frames: Dict[str, pd.DataFrame] = {}
    for label, filename in dataset_map.items():
        path = DATA_DIR / filename
        if not path.exists():
            raise FileNotFoundError(f"{path} does not exist. Check Stage 1 dataset files.")
        frames[label] = pd.read_excel(path)
    return frames


def preview_dataframe(name: str, df: pd.DataFrame, sample_rows: int = 5) -> None:
    """Render highlights for each ingested DataFrame."""
    display(Markdown(f"### {name}"))
    display(
        Markdown(
            f"- Shape: {df.shape}\n"
            f"- Columns: {len(df.columns)}\n"
            f"- Memory (bytes): {df.memory_usage(deep=True).sum():,}"
        )
    )
    display(Markdown("**Column overview**"))
    overview = (
        pd.DataFrame(
            {
                "dtype": df.dtypes.astype(str),
                "non-null": df.notna().sum(),
                "unique": df.nunique(dropna=False),
            }
        )
        .reset_index()
        .rename(columns={"index": "column"})
    )
    display(overview)
    display(Markdown("**Sample rows**"))
    display(df.head(sample_rows))
    display(Markdown("**Numeric summary**"))
    numeric_summary = df.describe(include="number").T
    if not numeric_summary.empty:
        display(numeric_summary)
    else:
        display(Markdown("_No purely numeric columns to summarize._"))


ingested_frames = load_dataframes(DATASETS)

for dataset_name, dataframe in ingested_frames.items():
    preview_dataframe(dataset_name, dataframe)

### Previous year trial balance

- Shape: (137, 13)
- Columns: 13
- Memory (bytes): 35,898

**Column overview**

Unnamed: 0,column,dtype,non-null,unique
0,Account No,float64,136,137
1,Description,object,137,127
2,Category,object,136,3
3,Opening Dr,float64,33,20
4,Opening Cr,float64,20,11
5,Dr,float64,120,85
6,Cr,float64,82,61
7,Closing Dr,float64,31,21
8,Closing Cr,float64,23,13
9,Account to FS,float64,136,3


**Sample rows**

Unnamed: 0,Account No,Description,Category,Opening Dr,Opening Cr,Dr,Cr,Closing Dr,Closing Cr,Account to FS,SubAccount,Class,Số dư c.nợ
0,112.0,Tiền gửi ngân hàng,BS,6605748000.0,,11127310000.0,11462310000.0,6270754000.0,,1.0,0.0,1.0,0.0
1,1121.0,Tiền VNĐ gửi ngân hàng,BS,897868600.0,,7698427000.0,6862600000.0,1733696000.0,,0.0,0.0,2.0,0.0
2,11211.0,Tiền VNĐ gửi ngân hàng_Mizuho_H15-795-022091,BS,897868600.0,,7698427000.0,6862600000.0,1733696000.0,,0.0,1.0,3.0,0.0
3,1122.0,Tiền ngoại tệ gửi ngân hàng,BS,5707880000.0,,3428885000.0,4599707000.0,4537058000.0,,0.0,0.0,2.0,0.0
4,11222.0,Tiền USD tại Mizuho_Saving Account_F15-795-022083,BS,5707880000.0,,3428885000.0,4599707000.0,4537058000.0,,0.0,1.0,3.0,0.0


**Numeric summary**

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
Account No,136.0,80442.65,188924.6,112.0,1326.0,6337.0,33384.5,642709.0
Opening Dr,33.0,1550979000.0,3072601000.0,91360.0,101595100.0,148099200.0,897868600.0,14412370000.0
Opening Cr,20.0,2158972000.0,5091790000.0,18622374.0,24422230.0,30373709.0,158914500.0,14412370000.0
Dr,120.0,1853732000.0,7062774000.0,-653330658.0,9008053.0,175738834.5,1025226000.0,74214660000.0
Cr,82.0,2710060000.0,8437397000.0,-26002382.0,10032600.0,733929142.0,2374799000.0,74214660000.0
Closing Dr,31.0,1687735000.0,3148784000.0,9206375.0,39320000.0,335576938.0,1260622000.0,14705620000.0
Closing Cr,23.0,1926042000.0,4812192000.0,3353372.0,28452370.0,46657293.0,359937900.0,14705620000.0
Account to FS,136.0,0.2205882,0.4161762,0.0,0.0,0.0,0.0,1.0
SubAccount,136.0,0.6176471,0.4877586,0.0,0.0,1.0,1.0,1.0
Class,136.0,2.191176,0.812082,1.0,2.0,2.0,3.0,4.0


### Current year trial balance

- Shape: (134, 13)
- Columns: 13
- Memory (bytes): 35,280

**Column overview**

Unnamed: 0,column,dtype,non-null,unique
0,Account No,int64,134,134
1,Description,object,134,124
2,Category,object,134,2
3,Opening Dr,float64,31,19
4,Opening Cr,float64,24,13
5,Dr,float64,118,76
6,Cr,float64,79,57
7,Closing Dr,float64,32,19
8,Closing Cr,float64,25,13
9,Account to FS,int64,134,2


**Sample rows**

Unnamed: 0,Account No,Description,Category,Opening Dr,Opening Cr,Dr,Cr,Closing Dr,Closing Cr,Account to FS,SubAccount,Class,Số dư c.nợ
0,112,Tiền gửi ngân hàng,BS,6270754000.0,,20726260000.0,20911350000.0,6085664000.0,,1,0,1,0
1,1121,Tiền VNĐ gửi ngân hàng,BS,1733696000.0,,12324430000.0,12098300000.0,1959821000.0,,0,0,2,0
2,11211,Tiền VNĐ gửi ngân hàng_Mizuho_H15-795-022091,BS,1733696000.0,,12324430000.0,12098300000.0,1959821000.0,,0,1,3,0
3,1122,Tiền ngoại tệ gửi ngân hàng,BS,4537058000.0,,8401836000.0,8813051000.0,4125843000.0,,0,0,2,0
4,11222,Tiền USD tại Mizuho_Saving Account_F15-795-022083,BS,4537058000.0,,8401836000.0,8813051000.0,4125843000.0,,0,1,3,0


**Numeric summary**

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
Account No,134.0,82089.31,189963.9,112.0,1345.25,6337.0,33411.75,642709.0
Opening Dr,31.0,1220728000.0,2026580000.0,9383477.0,46085280.0,149686600.0,787548200.0,6290591000.0
Opening Cr,24.0,1242572000.0,3853259000.0,3353372.0,28452370.0,47401190.0,166479000.0,13746000000.0
Dr,118.0,1942289000.0,3840442000.0,-725841475.0,12742040.0,148258600.0,1322061000.0,20726260000.0
Cr,79.0,2909610000.0,4461241000.0,-234662244.0,35703800.0,1085109000.0,3391073000.0,20911350000.0
Closing Dr,32.0,1159204000.0,2022243000.0,17417802.0,107766700.0,188107200.0,645135900.0,7097761000.0
Closing Cr,25.0,1227605000.0,3769366000.0,28530574.0,38282360.0,120009600.0,278005300.0,13746000000.0
Account to FS,134.0,0.2164179,0.4133476,0.0,0.0,0.0,0.0,1.0
SubAccount,134.0,0.619403,0.4873555,0.0,0.0,1.0,1.0,1.0
Class,134.0,2.201493,0.8113838,1.0,2.0,2.0,3.0,4.0


### Previous year general ledger

- Shape: (6487, 18)
- Columns: 18
- Memory (bytes): 5,331,084

**Column overview**

Unnamed: 0,column,dtype,non-null,unique
0,No. (Index),float64,1721,1722
1,Voucher Date,datetime64[ns],6486,263
2,Voucher No,object,6487,1702
3,Customer Code,object,5102,120
4,Customer Name,object,5102,120
5,Description,object,6430,1469
6,Debit Account,float64,6486,96
7,TK 3 so,float64,6486,30
8,Credit Account,float64,6486,96
9,Doi ung 3 so,float64,6486,30


**Sample rows**

Unnamed: 0,No. (Index),Voucher Date,Voucher No,Customer Code,Customer Name,Description,Debit Account,TK 3 so,Credit Account,Doi ung 3 so,Debit,Credit,Currency Code,FX Rate,Credit Account Description (Vietnamese),Debit Account Description (Vietnamese),Debit Amount (Foreign Currency),Credit Amount (Foreign Currency)
0,1.0,2023-10-01,PKT2310-1,GRAB,CÔNG TY TNHH GRAB,Inv. 3537068_Phí grab taxi tháng 9.2023/ Grab ...,642706.0,642.0,33111.0,331.0,32307,0,,,Phải trả cho người bán NH: HDKD,Chi phí dịch vụ mua ngoài - Phí taxi,0.0,0.0
1,,2023-10-01,PKT2310-1,GRAB,CÔNG TY TNHH GRAB,Inv. 3537068_Phí grab taxi tháng 9.2023/ Grab ...,33111.0,331.0,642706.0,642.0,0,32307,,,Chi phí dịch vụ mua ngoài - Phí taxi,Phải trả cho người bán NH: HDKD,0.0,0.0
2,,2023-10-01,PKT2310-1,GRAB,CÔNG TY TNHH GRAB,Inv. 3537068_Phí grab taxi tháng 9.2023/ Grab ...,13311.0,133.0,33111.0,331.0,2693,0,,,Phải trả cho người bán NH: HDKD,"Thuế GTGT được khấu trừ của hàng hóa, dịch vụ ...",0.0,0.0
3,,2023-10-01,PKT2310-1,GRAB,CÔNG TY TNHH GRAB,Inv. 3537068_Phí grab taxi tháng 9.2023/ Grab ...,33111.0,331.0,13311.0,133.0,0,2693,,,"Thuế GTGT được khấu trừ của hàng hóa, dịch vụ ...",Phải trả cho người bán NH: HDKD,0.0,0.0
4,2.0,2023-10-01,PKT2310-2,GRAB,CÔNG TY TNHH GRAB,Inv. 3537069_Phí grab taxi tháng 9.2023/ Grab ...,642706.0,642.0,33111.0,331.0,35085,0,,,Phải trả cho người bán NH: HDKD,Chi phí dịch vụ mua ngoài - Phí taxi,0.0,0.0


**Numeric summary**

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
No. (Index),1721.0,861.0,496.9542,1.0,431.0,861.0,1291.0,1721.0
Debit Account,6486.0,1715735.0,9754878.0,151.0,6323.0,24228.0,138818.0,62770900.0
TK 3 so,6486.0,386.9015,227.7078,112.0,156.0,334.0,627.0,911.0
Credit Account,6486.0,1715735.0,9754878.0,151.0,6323.0,24228.0,138818.0,62770900.0
Doi ung 3 so,6486.0,386.9015,227.7078,112.0,156.0,334.0,627.0,911.0
Debit,6487.0,22881040.0,923079800.0,-312498590.0,0.0,0.0,1619858.5,74214660000.0
Credit,6487.0,22881040.0,923079800.0,-312498590.0,0.0,0.0,1619858.5,74214660000.0
FX Rate,990.0,11817.43,12356.53,0.0,0.0,175.31,24650.0,25473.0
Debit Amount (Foreign Currency),6487.0,453.0394,18681.18,-48070.0,0.0,0.0,0.0,1469433.0
Credit Amount (Foreign Currency),6487.0,453.0394,18681.18,-48070.0,0.0,0.0,0.0,1469433.0


### Current year general ledger

- Shape: (7877, 19)
- Columns: 19
- Memory (bytes): 7,451,942

**Column overview**

Unnamed: 0,column,dtype,non-null,unique
0,No. (Index),float64,1790,1791
1,Date,datetime64[ns],7876,251
2,Fiscal Year,object,7877,2
3,Voucher No,object,7793,1764
4,Customer Code,object,6054,110
5,Customer Name,object,6052,109
6,Description,object,7876,1669
7,Debit Account,float64,7876,98
8,TK 3 so,object,7877,30
9,Credit Account,float64,7876,98


**Sample rows**

Unnamed: 0,No. (Index),Date,Fiscal Year,Voucher No,Customer Code,Customer Name,Description,Debit Account,TK 3 so,Credit Account,Doi ung 3 so,Debit,Credit,Account Description,Credit Account Description (Vietnamese),FX Rate,Currency Code,Credit Amount (Foreign Currency),Debit Amount (Foreign Currency)
0,1.0,2024-10-01,2025,BC2410-2,MIZUHO,"Ngân hàng MIZUHO BANK, LTD.- Chi nhánh Thành p...",Lãi tiền gửi / Credit interest,11211.0,112,5151.0,515,128631,0,Tiền VNĐ gửi ngân hàng_Mizuho_H15-795-022091,"Lãi tiền gửi, tiền cho vay",,,0.0,0.0
1,,2024-10-01,2025,BC2410-2,MIZUHO,"Ngân hàng MIZUHO BANK, LTD.- Chi nhánh Thành p...",Lãi tiền gửi / Credit interest,5151.0,515,11211.0,112,0,128631,"Lãi tiền gửi, tiền cho vay",Tiền VNĐ gửi ngân hàng_Mizuho_H15-795-022091,,,0.0,0.0
2,2.0,2024-10-01,2025,PN236,QINGHAI,QINGHAI LANTIAN ENVIRONMENTAL PROTECTION TECHN...,Inv. PO2024080003 (CD 106610495610)_Nhập mua h...,1561.0,156,151.0,151,133218000,0,Giá mua hàng hóa,Hàng mua đang đi đường,24670.0,USD,0.0,5400.0
3,,2024-10-01,2025,PN236,QINGHAI,QINGHAI LANTIAN ENVIRONMENTAL PROTECTION TECHN...,Inv. PO2024080003 (CD 106610495610)_Nhập mua h...,151.0,151,1561.0,156,0,133218000,Hàng mua đang đi đường,Giá mua hàng hóa,24670.0,USD,5400.0,0.0
4,3.0,2024-10-01,2025,PN243,RISINGSKY,"RISINGSKY INTERNATIONAL TRADE (SHANGHAI) CO.,LTD.",Inv. E24AC08083UC (CD 106612260750)_Nhập mua h...,1561.0,156,151.0,151,188149500,0,Giá mua hàng hóa,Hàng mua đang đi đường,24750.0,USD,0.0,7602.0


**Numeric summary**

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
No. (Index),1790.0,895.5,516.8728,1.0,448.25,895.5,1342.75,1790.0
Debit Account,7876.0,2869963.0,12717100.0,151.0,11211.0,33111.0,333111.0,62770900.0
Credit Account,7876.0,2869963.0,12717100.0,151.0,11211.0,33111.0,333111.0,62770900.0
Debit,7877.0,28835080.0,1287964000.0,-306160956.0,0.0,0.0,1000000.0,113567000000.0
Credit,7877.0,28835080.0,1287964000.0,-306160956.0,0.0,0.0,1000000.0,113567000000.0
FX Rate,604.0,17213.16,11949.21,0.0,0.0,25050.0,25790.0,26468.0
Credit Amount (Foreign Currency),7877.0,289.2585,13216.76,-31088.0,0.0,0.0,0.0,1139245.0
Debit Amount (Foreign Currency),7877.0,289.2585,13216.76,-31088.0,0.0,0.0,0.0,1139245.0
