In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
import os
import pandas as pd

folder = "/content/drive/Shareddrives/KCW-Data/kcw_analytics/01_raw"

data = {}

for file in os.listdir(folder):
    if file.endswith(".csv"):
        path = os.path.join(folder, file)
        data[file] = pd.read_csv(
            path,
            dtype={
              "BCODE": "string",
              "ITEMNO": "string",
              "BILLNO": "string",
            },
            encoding="utf-8-sig",
            low_memory=False   # stops chunk guessing
        )
        print(f"Loaded: {file} -> {data[file].shape}")

Loaded: raw_inventory_hq_2024.csv -> (4983, 8)
Loaded: raw_syp_pimas_purchase_bills.csv -> (2900, 49)
Loaded: raw_syp_pidet_purchase_lines.csv -> (27099, 41)
Loaded: raw_syp_sidet_sales_lines.csv -> (35910, 38)
Loaded: raw_syp_simas_sales_bills.csv -> (12155, 49)
Loaded: raw_hq_icmas_products.csv -> (114887, 94)
Loaded: raw_hq_pimas_purchase_bills.csv -> (50092, 49)
Loaded: raw_hq_pidet_purchase_lines.csv -> (153305, 41)
Loaded: raw_hq_sidet_sales_lines.csv -> (732407, 38)
Loaded: raw_hq_simas_sales_bills.csv -> (275873, 49)


In [10]:
df_simas = data["raw_hq_simas_sales_bills.csv"].copy()
df_sidet = data["raw_hq_sidet_sales_lines.csv"].copy()

df_sidet["AMOUNT"] = pd.to_numeric(df_sidet["AMOUNT"], errors="coerce")
df_simas["AFTERTAX"] = pd.to_numeric(df_simas["AFTERTAX"], errors="coerce")

In [36]:
# ensure numeric first (legacy POS safe)
cols = ["AMOUNT", "VAT", "DED"]
df_sidet[cols] = df_sidet[cols].apply(pd.to_numeric, errors="coerce")

sidet_sum = (
    df_sidet
    .groupby("BILLNO", as_index=False)
    .agg(
        SIDET_AMOUNT_SUM=("AMOUNT", "sum"),
        SIDET_VAT_SUM=("VAT", "sum"),
        SIDET_DED_SUM=("DED", "sum")
    )
)

# create NETTOTAL
sidet_sum["SIDET_NETTOTAL"] = (
    sidet_sum["SIDET_AMOUNT_SUM"]
    + sidet_sum["SIDET_VAT_SUM"]
    + sidet_sum["SIDET_DED_SUM"]
)

In [37]:
df_check = df_simas.merge(
    sidet_sum,
    on="BILLNO",
    how="left"
)

In [38]:
df_check["DELTA"] = (
    pd.to_numeric(df_check["AFTERTAX"], errors="coerce")
    - pd.to_numeric(df_check["SIDET_AMOUNT_SUM"], errors="coerce")
)

In [39]:
df_problem = df_check[df_check["DELTA"].abs() > 0.01]

In [40]:
df_problem = df_problem.copy()

df_problem["BILLDATE"] = pd.to_datetime(
    df_problem["BILLDATE"],
    errors="coerce"
)

df_problem_22feb = df_problem[
    (df_problem["BILLDATE"] >= "2026-02-22") &
    (df_problem["BILLDATE"] < "2026-02-23")
]

In [44]:
df_sidet.columns

Index(['ID', 'JOURMODE', 'JOURTYPE', 'JOURDATE', 'BILLTYPE', 'BILLDATE',
       'BILLNO', 'LINE', 'ITEMNO', 'BCODE', 'PCODE', 'MCODE', 'DETAIL',
       'WHNUMBER', 'LOCATION1', 'STATUS', 'SERIAL', 'TAXIC', 'EXMPT', 'ISVAT',
       'QTY', 'UI', 'MTP', 'PRICE', 'XPRICE', 'DISCNT1', 'DISCNT2', 'DISCNT3',
       'DISCNT4', 'DED', 'VAT', 'AMOUNT', 'CHGAMT', 'ACCTNO', 'PAID',
       'ACCT_NO', 'DONE', 'CANCELED'],
      dtype='object')

In [45]:
df_problem_22feb[['BILLNO','DISCOUNT', 'TAX', 'VAT', 'DEDUCT','BEFORETAX', 'AFTERTAX', 'SIDET_AMOUNT_SUM', 'DELTA', 'SIDET_DED_SUM', 'SIDET_VAT_SUM','SIDET_NETTOTAL']]

Unnamed: 0,BILLNO,DISCOUNT,TAX,VAT,DEDUCT,BEFORETAX,AFTERTAX,SIDET_AMOUNT_SUM,DELTA,SIDET_DED_SUM,SIDET_VAT_SUM,SIDET_NETTOTAL
275761,8K69-0003199,,0.0,0.0,3.5,290.0,290.0,293.5,-3.5,0.0,0.0,293.5
275769,8K69-0003205,,0.0,0.0,4.0,1550.0,1550.0,1554.0,-4.0,0.0,0.0,1554.0
275777,6K69-0002055,,0.0,0.0,1.0,2070.0,2070.0,2071.0,-1.0,0.0,0.0,2071.0
275780,8K69-0003209,,0.0,0.0,1.0,50.0,50.0,51.0,-1.0,0.0,0.0,51.0
275786,6K69-0002059,,0.0,0.0,5.0,3460.0,3460.0,3465.0,-5.0,0.0,0.0,3465.0
275808,TR6902-024,,130.83,7.0,,1869.07,1999.9,1869.07,130.83,90.93,21.0,1981.0
275830,TR6902-025,,48.78,7.0,,696.82,745.6,696.82,48.78,43.18,21.0,761.0
275849,DN6902-008,,150.5,7.0,,2150.0,2300.5,2150.0,150.5,0.0,7.0,2157.0
275869,8K69-0003262,,0.0,0.0,2.0,10.0,10.0,12.0,-2.0,0.0,0.0,12.0


In [59]:
df_problem_22feb[['DELTA','AFTERTAX','SIDET_AMOUNT_SUM', 'DEDUCT', 'TAX']].sum()

Unnamed: 0,0
DELTA,313.61
AFTERTAX,12476.0
SIDET_AMOUNT_SUM,12162.39
DEDUCT,16.5
TAX,330.11


In [60]:
total = (
    df_simas.loc[
        (df_simas["BILLDATE"] >= "2026-02-22") &
        (df_simas["BILLDATE"] <  "2026-02-23"),
        ["AFTERTAX"]
    ]
    .sum(axis=1)
    .sum()
)

print(total)

94651.0


In [62]:
total = (
    df_sidet.loc[
        (df_sidet["BILLDATE"] >= "2026-02-22") &
        (df_sidet["BILLDATE"] <  "2026-02-23"),
        ["AMOUNT"]
    ]
    .sum(axis=1)
    .sum()
)

print(total + 330.11 - 16.50)

94651.0


In [54]:
cols = ["AFTERTAX", "DEDUCT", "TAX"]
df_simas[cols] = df_simas[cols].apply(pd.to_numeric, errors="coerce")

df_simas["BILLDATE"] = pd.to_datetime(df_simas["BILLDATE"], errors="coerce")

df_simas_22feb = df_simas[
    (df_simas["BILLDATE"] >= "2026-02-22") &
    (df_simas["BILLDATE"] <  "2026-02-23")
]

total = (
    df_simas_22feb["AFTERTAX"]
    - df_simas_22feb["DEDUCT"]
).sum()

print(total)

21863.5
