In [1]:
# !git clone https://github.com/pthengtr/kcw-analytics.git

In [2]:
# !cd /content/kcw-analytics && git pull origin main

In [3]:
# from google.colab import drive
# drive.mount('/content/drive')

# import os
# import pandas as pd

# folder = "/content/drive/Shareddrives/KCW-Data/kcw_analytics/01_raw"

In [4]:
import os
import sys

if "google.colab" in sys.modules:
    # Running in Colab
    from google.colab import drive
    drive.mount("/content/drive")
    
    BASE_FOLDER = "/content/drive/Shareddrives"
    BASE_FOLDER_GIT = "/content"
else:
    # Running in local Jupyter
    BASE_FOLDER = r"G:\Shared drives"
    BASE_FOLDER_GIT = r"C:\Users\Windows 11\Notebook"

print("Using folder:", BASE_FOLDER)

Using folder: G:\Shared drives


In [5]:
folder = f"{BASE_FOLDER}/KCW-Data/kcw_analytics/01_raw"

In [6]:
import os
import pandas as pd

data = {}

for file in os.listdir(folder):
    if file.endswith(".csv"):
        path = os.path.join(folder, file)
        data[file] = pd.read_csv(
            path,
            dtype={
              "BCODE": "string",
              "ITEMNO": "string",
              "BILLNO": "string",
            },
            encoding="utf-8-sig",
            low_memory=False   # stops chunk guessing
        )
        print(f"Loaded: {file} -> {data[file].shape}")

Loaded: raw_inventory_hq_2024.csv -> (4983, 8)
Loaded: raw_syp_simas_sales_bills.csv -> (12366, 49)
Loaded: raw_hq_pidet_purchase_lines.csv -> (153764, 41)
Loaded: raw_syp_sidet_sales_lines.csv -> (36551, 38)
Loaded: raw_syp_pimas_purchase_bills.csv -> (2919, 49)
Loaded: raw_hq_simas_sales_bills.csv -> (275969, 49)
Loaded: raw_hq_pimas_purchase_bills.csv -> (50222, 49)
Loaded: raw_hq_sidet_sales_lines.csv -> (733097, 38)
Loaded: raw_syp_pidet_purchase_lines.csv -> (27266, 41)
Loaded: raw_hq_icmas_products.csv -> (114922, 94)
Loaded: raw_hq_pvmas_notes_vouchers.csv -> (13730, 32)


In [7]:
import sys
import importlib

# ensure repo is on path
repo_path = f"{BASE_FOLDER_GIT}/kcw-analytics"
if repo_path not in sys.path:
    sys.path.append(repo_path)

# import the module (NOT individual functions)
import src.kcw.utils as utils

# reload to pick up latest .py changes
importlib.reload(utils)

get_vat_sales_lines_last_purchase_nonvat = utils.get_vat_sales_lines_last_purchase_nonvat
audit_bcode_vat_sales_last_purchase = utils.audit_bcode_vat_sales_last_purchase

In [8]:
def filter_year_month(df, year, month, date_col="BILLDATE"):
    return df[pd.to_datetime(df[date_col]).dt.to_period("M") == f"{year}-{month:02d}"]

In [9]:
df_sidet = data['raw_hq_sidet_sales_lines.csv'].copy()

dt = pd.to_datetime(df_sidet["BILLDATE"], errors="coerce")

YEAR  = dt.dt.year.max()
MONTH = dt[dt.dt.year == YEAR].dt.month.max()

print(YEAR, MONTH)

2026 2


In [10]:
vat_sales_last_purchase_nonvat_hq = get_vat_sales_lines_last_purchase_nonvat(
    data, year=YEAR, source="hq"
)

vat_sales_last_purchase_nonvat_syp = get_vat_sales_lines_last_purchase_nonvat(
    data, year=YEAR, source="syp"
)


In [11]:
vat_sales_last_purchase_nonvat_hq_cleaned = vat_sales_last_purchase_nonvat_hq[
    ~vat_sales_last_purchase_nonvat_hq["BILLNO"]
        .astype(str)
        .str.startswith(("TAR", "CN","DN"), na=False)
]

vat_sales_last_purchase_nonvat_hq_cleaned = filter_year_month(vat_sales_last_purchase_nonvat_hq_cleaned, YEAR, MONTH)

vat_sales_last_purchase_nonvat_hq_cleaned

Unnamed: 0,ID,JOURMODE,JOURTYPE,JOURDATE,BILLTYPE,BILLDATE,BILLNO,LINE,ITEMNO,BCODE,...,DED,VAT,AMOUNT,CHGAMT,ACCTNO,PAID,ACCT_NO,DONE,CANCELED,LAST_PURCHASE_ISVAT
6320,1244127,1,SJ,2026-02-01,1.0,2026-02-01,TD6902-013,20,2.0,02050661,...,,7.0,430.0,,7ทอทอดต,N,SRI,N,N,N
6337,1244085,1,SJ,2026-02-01,1.0,2026-02-01,TD6902-004,40,4.0,07051774,...,,7.0,420.0,,7PCH,N,SKM,N,N,N
6390,1244136,1,SJ,2026-02-01,1.0,2026-02-01,TD6902-015,30,3.0,13051071,...,,7.0,360.0,,7ทอทอดต,N,SKM,N,N,N
6401,1244093,1,SJ,2026-02-01,1.0,2026-02-01,TD6902-007,10,1.0,14050231,...,,7.0,550.0,,7PCH,N,CRRK,N,N,N
6404,1244094,1,SJ,2026-02-01,1.0,2026-02-01,TD6902-008,10,1.0,15011623,...,,7.0,1650.0,,7PCH,N,7VP,N,N,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11250,1256943,1,SJ,2026-02-25,1.0,2026-02-25,TD6902-144,20,2.0,15033057,...,,7.0,660.0,,7สหกต,N,TLT,N,N,N
11251,1256942,1,SJ,2026-02-25,1.0,2026-02-25,TD6902-144,10,1.0,15033058,...,,7.0,600.0,,7สหกต,N,TLT,N,N,N
11319,1256817,1,SJ,2026-02-25,1.0,2026-02-25,TAD6902-543,10,1.0,30050564,...,,7.0,3760.0,,7SICE,N,PTK,N,N,N
11320,1257066,1,SJ,2026-02-25,1.0,2026-02-25,TR6902-030,30,3.0,30051089,...,,7.0,160.0,,7มทม,N,BK,N,N,N


In [12]:
vat_sales_last_purchase_nonvat_syp_cleaned = vat_sales_last_purchase_nonvat_syp[
    ~vat_sales_last_purchase_nonvat_syp["BILLNO"]
        .astype(str)
        .str.startswith(("3TAR","3CN","3DN"), na=False)
]

vat_sales_last_purchase_nonvat_syp_cleaned = filter_year_month(vat_sales_last_purchase_nonvat_syp_cleaned, YEAR, MONTH)

vat_sales_last_purchase_nonvat_syp_cleaned

Unnamed: 0,ID,JOURMODE,JOURTYPE,JOURDATE,BILLTYPE,BILLDATE,BILLNO,LINE,ITEMNO,BCODE,...,DED,VAT,AMOUNT,CHGAMT,ACCTNO,PAID,ACCT_NO,DONE,CANCELED,LAST_PURCHASE_ISVAT
2390,1926443,1,SJ,2026-02-05,1,2026-02-05,3TR6902-001,30,3.0,13052478,...,,7.0,120.0,,7ชจร,N,SKT,N,N,N
2391,1926447,1,SJ,2026-02-05,1,2026-02-05,3TR6902-001,70,7.0,13052497,...,,7.0,280.0,,7ชจร,N,KV,N,N,N
2410,1926441,1,SJ,2026-02-05,1,2026-02-05,3TR6902-001,10,1.0,21050290,...,,7.0,95.0,,7ชจร,N,JPL,N,N,N
3646,2028122,1,SJ,2026-02-21,1,2026-02-21,3TR6902-006,40,4.0,13021120,...,,7.0,20.0,,7ชจร,N,SHSW,N,N,N
3647,2028121,1,SJ,2026-02-21,1,2026-02-21,3TR6902-006,30,3.0,13021130,...,,7.0,20.0,,7ชจร,N,SHSW,N,N,N
3656,2028123,1,SJ,2026-02-21,1,2026-02-21,3TR6902-006,50,5.0,13050418,...,,7.0,40.0,,7ชจร,N,QC,N,N,N
3677,2028128,1,SJ,2026-02-21,1,2026-02-21,3TR6902-006,100,10.0,19060007,...,,7.0,320.0,,7ชจร,N,ANP,N,N,N


In [13]:
import pandas as pd
import numpy as np

def enrich_sales_with_lastcost_and_newbillno(
    data: dict,
    sales_lines: pd.DataFrame,
    *,
    source: str,  # "hq" or "syp"
    purchase_key: str = "raw_hq_pidet_purchase_lines.csv",
    # common columns
    bcode_col: str = "BCODE",
    date_col: str = "BILLDATE",
    # sales qty columns (kept for signature compatibility)
    sales_qty_col: str = "QTY",
    sales_mtp_col: str = "MTP",
    # purchase columns for cost calc
    purchase_amount_col: str = "AMOUNT",
    purchase_qty_col: str = "QTY",
    purchase_mtp_col: str = "MTP",
    # bill cols (kept for signature compatibility)
    billno_col: str = "BILLNO",
    # output
    output_cols: tuple = (
        "BCODE", "BILLDATE", "BILLNO", "DETAIL", "QTY", "MTP", "UI", "AMOUNT", "ACCT_NO",
        "CANCELED", "ISVAT", "LAST_PURCHASE_ISVAT", "LAST_COST"
    ),
) -> pd.DataFrame:
    """
    Takes already-filtered sales lines and:
      1) Looks up LAST purchase UNIT cost as-of sale date from purchases in data[purchase_key].
      2) Adds NEW_BILLNO (HQ: RV..., SYP: 3RV...)
      3) Returns requested columns (plus NEW_BILLNO).

    """

    # --- validate source ---
    source = source.lower().strip()
    if source not in ("hq", "syp"):
        raise ValueError("source must be 'hq' or 'syp'")

    bill_prefix = "RV" if source == "hq" else "3RV"

    # --- helpers ---
    def _clean_cols(df: pd.DataFrame) -> pd.DataFrame:
        out = df.copy()
        out.columns = (
            out.columns.astype(str)
            .str.replace("\ufeff", "", regex=False)
            .str.strip()
        )
        return out

    def _clean_bcode(s: pd.Series) -> pd.Series:
        return s.astype("string").str.strip()

    # --- copy & clean ---
    sales = _clean_cols(sales_lines).copy()
    purch = _clean_cols(data[purchase_key]).copy()

    # Ensure key columns exist
    for col in [bcode_col, date_col]:
        if col not in sales.columns:
            raise KeyError(f"sales_lines missing required column: {col}")
        if col not in purch.columns:
            raise KeyError(f"purchase data missing required column: {col}")

    # Clean BCODE + dates
    sales[bcode_col] = _clean_bcode(sales[bcode_col])
    purch[bcode_col] = _clean_bcode(purch[bcode_col])

    sales[date_col] = pd.to_datetime(sales[date_col], errors="coerce")
    purch[date_col] = pd.to_datetime(purch[date_col], errors="coerce")

    sales = sales.dropna(subset=[bcode_col, date_col]).copy()
    purch = purch.dropna(subset=[bcode_col, date_col]).copy()

    sales = sales[sales[bcode_col] != ""].copy()
    purch = purch[purch[bcode_col] != ""].copy()

    # Reduce purchases to only needed BCODEs (perf)
    purch = purch[purch[bcode_col].isin(sales[bcode_col].unique())].copy()

    # --- 1) LAST_COST as-of sale date (no rolling AV_COST) ---
    for c in [purchase_amount_col, purchase_qty_col, purchase_mtp_col]:
        purch[c] = pd.to_numeric(purch[c], errors="coerce")

    purch = purch.dropna(subset=[purchase_amount_col, purchase_qty_col, purchase_mtp_col]).copy()
    purch = purch[(purch[purchase_qty_col] != 0) & (purch[purchase_mtp_col] > 0)].copy()

    purch["__UNITS__"] = purch[purchase_qty_col] * purch[purchase_mtp_col]
    purch["__LASTCOST__"] = purch[purchase_amount_col] / purch["__UNITS__"].replace(0, np.nan)
    purch = purch.dropna(subset=["__LASTCOST__"]).copy()

    purch_key = purch[[bcode_col, date_col, "__LASTCOST__"]].copy()

    # merge_asof requires sorting by ON key first, then BY key
    sales_sorted = sales.sort_values([date_col, bcode_col], kind="mergesort").reset_index(drop=True)
    purch_key = purch_key.sort_values([date_col, bcode_col], kind="mergesort").reset_index(drop=True)

    merged = pd.merge_asof(
        sales_sorted,
        purch_key,
        left_on=date_col,
        right_on=date_col,
        by=bcode_col,
        direction="backward",
        allow_exact_matches=True,
    )

    merged["LAST_COST"] = merged["__LASTCOST__"]
    merged.drop(columns=["__LASTCOST__"], inplace=True)

    # --- 2) NEW_BILLNO (monthly reset, daily + 10-line chunk increment) ---
    merged["__DATEKEY__"] = merged[date_col].dt.normalize()
    merged["__MONTHKEY__"] = merged["__DATEKEY__"].dt.to_period("M")

    merged = merged.sort_values(["__DATEKEY__", bcode_col], kind="mergesort").reset_index(drop=True)

    day_chunk = merged.groupby("__DATEKEY__", sort=False).cumcount() // 20

    day_tbl = (
        pd.DataFrame({
            "__MONTHKEY__": merged["__MONTHKEY__"],
            "__DATEKEY__": merged["__DATEKEY__"],
            "day_chunk": day_chunk
        })
        .groupby(["__MONTHKEY__", "__DATEKEY__"], sort=False)["day_chunk"]
        .max()
        .rename("MAX_CHUNK")
        .reset_index()
        .sort_values(["__MONTHKEY__", "__DATEKEY__"], kind="mergesort")
    )

    day_tbl["DAY_START_SEQ0"] = (
        day_tbl.groupby("__MONTHKEY__")["MAX_CHUNK"]
        .transform(lambda x: (x + 1).cumsum() - (x + 1))
    )

    start_map = day_tbl.set_index(["__MONTHKEY__", "__DATEKEY__"])["DAY_START_SEQ0"]
    merged["__DAY_START_SEQ0__"] = start_map.loc[
        list(zip(merged["__MONTHKEY__"], merged["__DATEKEY__"]))
    ].to_numpy()

    seq = (merged["__DAY_START_SEQ0__"] + day_chunk + 1).astype(int)

    bd_yy = ((merged["__DATEKEY__"].dt.year + 543) % 100).astype(int)
    mm = merged["__DATEKEY__"].dt.month.astype(int)

    merged["NEW_BILLNO"] = (
        bill_prefix
        + bd_yy.map(lambda x: f"{x:02d}")
        + mm.map(lambda x: f"{x:02d}")
        + "-"
        + seq.map(lambda x: f"{x:03d}")
    )

    merged.drop(columns=["__DATEKEY__", "__MONTHKEY__", "__DAY_START_SEQ0__"], inplace=True)

    # --- 3) Output ---
    for col in output_cols:
        if col not in merged.columns:
            merged[col] = pd.NA

    return merged[list(output_cols) + ["NEW_BILLNO"]].copy()


In [14]:
out_syp_staged = enrich_sales_with_lastcost_and_newbillno(
    data=data,
    sales_lines=vat_sales_last_purchase_nonvat_syp_cleaned,
    source = "syp"
)

out_hq_staged = enrich_sales_with_lastcost_and_newbillno(
    data=data,
    sales_lines=vat_sales_last_purchase_nonvat_hq_cleaned,
    source = "hq"
)


In [15]:
import pandas as pd
import numpy as np

def refill_last_cost_from_icmas(
    data: dict,
    df: pd.DataFrame,
    *,
    icmas_key: str = "raw_hq_icmas_products.csv",
    bcode_col: str = "BCODE",
    last_cost_col: str = "LAST_COST",
    icmas_cost_col: str = "COSTNET",
) -> pd.DataFrame:
    """
    Refill LAST_COST when it is 0 or NaN using COSTNET from ICMAS.
    Includes BCODE cleanup to avoid merge mismatch.
    """

    result = df.copy()

    # --- BCODE CLEANUP (VERY IMPORTANT for KCW datasets) ---
    result[bcode_col] = (
        result[bcode_col]
        .astype(str)
        .str.strip()
        .str.upper()
    )

    icmas = data[icmas_key][[bcode_col, icmas_cost_col]].copy()

    icmas[bcode_col] = (
        icmas[bcode_col]
        .astype(str)
        .str.strip()
        .str.upper()
    )

    # --- numeric safety ---
    result[last_cost_col] = pd.to_numeric(result[last_cost_col], errors="coerce")
    icmas[icmas_cost_col] = pd.to_numeric(icmas[icmas_cost_col], errors="coerce")

    # --- merge COSTNET ---
    result = result.merge(
        icmas,
        on=bcode_col,
        how="left",
        suffixes=("", "_ICMAS")
    )

    # --- detect invalid LAST_COST ---
    mask_invalid = result[last_cost_col].isna() | (result[last_cost_col] == 0)

    # --- refill only invalid rows ---
    result.loc[mask_invalid, last_cost_col] = result.loc[
        mask_invalid, icmas_cost_col
    ]

    # --- drop helper column ---
    result = result.drop(columns=[icmas_cost_col])

    return result


In [16]:
out_hq = refill_last_cost_from_icmas(data, out_hq_staged)
out_syp = refill_last_cost_from_icmas(data, out_syp_staged)

In [17]:
# --- ensure numeric (handles "UNKNOWN" too) ---
out_hq["LAST_COST"] = pd.to_numeric(out_hq["LAST_COST"], errors="coerce")

# --- count before ---
before_count = len(out_hq)

# --- filter ---
out_hq = out_hq[(out_hq["LAST_COST"].notna()) & (out_hq["LAST_COST"] != 0)]

# --- count after ---
after_count = len(out_hq)

# --- show removed rows ---
removed_count = before_count - after_count
print(f"HQ: Removed {removed_count} rows where LAST_COST was 0 or NaN")


HQ: Removed 1 rows where LAST_COST was 0 or NaN


In [18]:
# --- ensure numeric (handles "UNKNOWN" too) ---
out_syp["LAST_COST"] = pd.to_numeric(out_syp["LAST_COST"], errors="coerce")

# --- count before ---
before_count = len(out_syp)

# --- filter ---
out_syp = out_syp[(out_syp["LAST_COST"].notna()) & (out_syp["LAST_COST"] != 0)]

# --- count after ---
after_count = len(out_syp)

# --- show removed rows ---
removed_count = before_count - after_count
print(f"SYP: Removed {removed_count} rows where LAST_COST was 0 or NaN")

SYP: Removed 0 rows where LAST_COST was 0 or NaN


In [19]:
# !apt-get -y install libpango-1.0-0 libpangoft2-1.0-0 libcairo2 libgdk-pixbuf2.0-0 libffi-dev shared-mime-info
!pip install -U weasyprint



In [20]:
import os
import pandas as pd
import numpy as np
from weasyprint import HTML, CSS
from pathlib import Path

# -----------------------
# Thai helpers (reuse yours)
# -----------------------
TH_MONTHS_ABBR = [
    "ม.ค.", "ก.พ.", "มี.ค.", "เม.ย.", "พ.ค.", "มิ.ย.",
    "ก.ค.", "ส.ค.", "ก.ย.", "ต.ค.", "พ.ย.", "ธ.ค."
]

def thai_date(d) -> str:
    dt = pd.to_datetime(d).to_pydatetime()
    return f"{dt.day} {TH_MONTHS_ABBR[dt.month-1]} {dt.year + 543}"

def _thai_read_integer(num: int) -> str:
    if num == 0:
        return "ศูนย์"

    units = ["", "สิบ", "ร้อย", "พัน", "หมื่น", "แสน"]
    digits = ["ศูนย์", "หนึ่ง", "สอง", "สาม", "สี่", "ห้า", "หก", "เจ็ด", "แปด", "เก้า"]

    def read_under_million(n: int) -> str:
        s = ""
        d = list(map(int, str(n)))
        L = len(d)
        for i, x in enumerate(d):
            pos = L - i - 1
            if x == 0:
                continue
            if pos == 0:
                s += "เอ็ด" if (x == 1 and L > 1) else digits[x]
            elif pos == 1:
                if x == 1: s += "สิบ"
                elif x == 2: s += "ยี่สิบ"
                else: s += digits[x] + "สิบ"
            else:
                s += digits[x] + units[pos]
        return s

    out = ""
    parts = []
    while num > 0:
        parts.append(num % 1_000_000)
        num //= 1_000_000

    for i in range(len(parts)-1, -1, -1):
        n = parts[i]
        if n == 0:
            continue
        out += read_under_million(n)
        if i != 0:
            out += "ล้าน"
    return out

def thai_baht_text(amount) -> str:
    x = float(amount) if amount is not None else 0.0
    if x < 0:
        return "ลบ" + thai_baht_text(-x)

    baht = int(x)
    satang = int(round((x - baht) * 100))
    if satang == 100:
        baht += 1
        satang = 0

    baht_words = _thai_read_integer(baht) + "บาท"
    return baht_words + ("ถ้วน" if satang == 0 else _thai_read_integer(satang) + "สตางค์")

def money(x):
    try:
        return f"{float(x):,.2f}"
    except Exception:
        return ""

def esc(s):
    # minimal HTML escape
    if s is None:
        return ""
    return (str(s)
            .replace("&", "&amp;")
            .replace("<", "&lt;")
            .replace(">", "&gt;")
            .replace('"', "&quot;"))

# -----------------------
# Build ONE receipt HTML -> PDF
# -----------------------
def build_one_receipt_pdf_weasy(group_df: pd.DataFrame, pdf_path: str, *,
                               font_dir: str,
                               signature_path: str):
    # paths for @font-face (local files)
    sarabun = Path(font_dir, "THSarabunNew.ttf").resolve().as_uri()
    sarabun_bold = Path(font_dir, "THSarabunNew-Bold.ttf").resolve().as_uri()

    # header fields
    new_billno = str(group_df["NEW_BILLNO"].iloc[0])
    billdate = thai_date(group_df["BILLDATE"].iloc[0])

    branch_text = "สำนักงานใหญ่"
    if new_billno.startswith("3"):
        branch_text = "สี่แยกพัฒนา"

    # compute receipt numbers
    df = group_df.copy()
    df["QTY"] = pd.to_numeric(df.get("QTY", 0), errors="coerce").fillna(0)
    df["MTP"] = pd.to_numeric(df.get("MTP", 0), errors="coerce").fillna(0)
    last_cost = pd.to_numeric(df.get("LAST_COST", np.nan), errors="coerce")

    df["UNIT_PRICE"] = last_cost
    df["AMOUNT_CALC"] = last_cost.fillna(0) * df["QTY"] * df["MTP"]

    grand_total = float(df["AMOUNT_CALC"].sum())
    thai_words = thai_baht_text(grand_total)

    # build table rows
    rows_html = []
    for _, r in df.iterrows():
        bcode = esc(r.get("BCODE", ""))
        detail = esc(r.get("DETAIL", "")).replace("\n", " ")
        unit_price = money(r["UNIT_PRICE"]) if pd.notna(r["UNIT_PRICE"]) else "UNKNOWN"
        qty = r.get("QTY", 0)
        qty_str = money(qty) if (float(qty) % 1 != 0) else str(int(qty))
        unit = esc(r.get("UI", ""))
        total = money(r.get("AMOUNT_CALC", 0))

        rows_html.append(f"""
          <tr>
            <td class="c-bcode">{bcode}</td>
            <td class="c-detail">{detail}</td>
            <td class="c-num">{unit_price}</td>
            <td class="c-num">{qty_str}</td>
            <td class="c-unit">{unit}</td>
            <td class="c-num">{total}</td>
          </tr>
        """)

    rows_html = "\n".join(rows_html)

    # signature path: WeasyPrint likes file:// URLs
    sig_url = Path(signature_path).resolve().as_uri()

    html = f"""
<!doctype html>
<html>
<head>
  <meta charset="utf-8">
  <style>
    @font-face {{
      font-family: "Sarabun";
      src: url("{sarabun}");
    }}
    @font-face {{
      font-family: "Sarabun";
      src: url("{sarabun_bold}");
      font-weight: bold;
    }}

    @page {{
      size: A4;
      margin: 18px 24px;
    }}

    body {{
      font-family: "Sarabun";
      font-size: 14px;
      line-height: 1.35;
      color: #000;
    }}

    .title {{
      font-size: 22px;
      font-weight: bold;
      margin-bottom: 8px;
    }}

    .hdr {{
      display: flex;
      justify-content: flex-end;
      gap: 24px;
      margin-bottom: 10px;
    }}
    .hdr-col {{
      text-align: right;
    }}
    .lbl {{
      font-weight: bold;
    }}

    .block {{
      margin: 6px 0;
    }}

    table {{
      width: 100%;
      border-collapse: collapse;
      margin-top: 8px;
    }}
    th, td {{
      border: 1px solid #000;
      padding: 4px 6px;
      vertical-align: top;
    }}
    th {{
      background: #f2f2f2;
      text-align: center;
      font-weight: bold;
    }}

    .c-bcode {{ width: 14%; }}
    .c-detail {{ width: 46%; }}
    .c-unit  {{ width: 8%; text-align: center; }}
    .c-num   {{ width: 10%; text-align: right; white-space: nowrap; }}

    .totals {{
      margin-top: 10px;
      text-align: right;
      font-weight: bold;
      font-size: 16px;
    }}
    .words {{
      margin-top: 2px;
      text-align: right;
      font-size: 13px;
      font-weight: normal;
    }}

    .sign-wrap {{
      margin-top: 18px;
      display: flex;
      justify-content: flex-end;
    }}
    .sign-table {{
      border-collapse: collapse;
      width: 320px;
    }}
    .sign-table td {{
      border: none;
      padding: 2px 6px;
    }}
    .sign-img {{
      width: 188px;
      height: 48px;
      object-fit: contain;
      display: block;
    }}
    .note {{
      margin-top: 10px;
      text-align: right;
      font-size: 12px;
    }}
  </style>
</head>

<body>


  <div class="hdr">
    <div class="hdr-col">
      <div class="title">ใบสำคัญรับเงิน</div>
      <div><span class="lbl">เลขที่:</span> {esc(new_billno)}</div>
      <div><span class="lbl">วันที่:</span> {esc(billdate)}</div>
    </div>
  </div>

  <div class="block"><span class="lbl">ข้าพเจ้า:</span> นางสาวนฤมล วิทยผโลทัย (ผู้ขายสินค้า)</div>
  <div class="block"><span class="lbl">ที่อยู่:</span> 305 หมู่ 1 ตำบล ชุมแสง อำเภอ วังจันทร์ จังหวัด ระยอง</div>
  <div class="block"><span class="lbl">เลขประจำตัวผู้เสียภาษี:</span> 1-2001-99001-42-8</div>

  <div class="block" style="margin-top:10px;">
    ได้รับเงินจาก บริษัทเกียรติชัยอะไหล่ยนต์ 2007 จำกัด ({esc(branch_text)}) (ผู้ซื้อ) ดังรายการต่อไปนี้
  </div>

  <table>
    <thead>
      <tr>
        <th>รหัสสินค้า</th>
        <th>รายการ</th>
        <th>ราคา/หน่วย</th>
        <th>จำนวน</th>
        <th>หน่วย</th>
        <th>รวมยอดเงิน</th>
      </tr>
    </thead>
    <tbody>
      {rows_html}
    </tbody>
  </table>

  <div class="totals">รวมทั้งสิ้น: {money(grand_total)}</div>
  <div class="words">จำนวนเงิน (ตัวอักษร): {esc(thai_words)}</div>

  <div class="sign-wrap">
    <table class="sign-table">
      <tr>
        <td style="text-align:right; width:110px;">ผู้รับเงิน</td>
        <td style="text-align:right;"><img class="sign-img" src="{sig_url}"></td>
      </tr>
      <tr>
        <td style="text-align:right;">ผู้จ่ายเงิน</td>
        <td style="text-align:right;"><img class="sign-img" src="{sig_url}"></td>
      </tr>
    </table>
  </div>

  <div class="note">หมายเหตุ: แนบสำเนาบัตรประชาชนผู้รับเงิน</div>
</body>
</html>
"""

    HTML(string=html).write_pdf(pdf_path)


def build_receipts_by_new_billno_weasy(df: pd.DataFrame, out_dir: str, *,
                                      font_dir: str,
                                      signature_path: str):
    os.makedirs(out_dir, exist_ok=True)
    for new_billno, g in df.groupby("NEW_BILLNO", sort=True):
        pdf_path = os.path.join(out_dir, f"{new_billno}.pdf")
        build_one_receipt_pdf_weasy(g, pdf_path, font_dir=font_dir, signature_path=signature_path)
    return out_dir


In [21]:
import logging

# WeasyPrint
logging.getLogger("weasyprint").setLevel(logging.ERROR)
logging.getLogger("weasyprint.progress").setLevel(logging.ERROR)
logging.getLogger("weasyprint.CSS").setLevel(logging.ERROR)
logging.getLogger("weasyprint.HTML").setLevel(logging.ERROR)

# fontTools (the spam you're seeing)
logging.getLogger("fontTools").setLevel(logging.ERROR)
logging.getLogger("fontTools.subset").setLevel(logging.ERROR)
logging.getLogger("fontTools.ttLib").setLevel(logging.ERROR)

# Optional: also silence warnings from fontTools tables
logging.getLogger("fontTools.ttLib.tables").setLevel(logging.ERROR)

In [22]:
import os

kcwdir = os.path.join(BASE_FOLDER, "KCW-Data")
print(kcwdir)

G:\Shared drives\KCW-Data


In [23]:
font_dir = os.path.join(
    kcwdir,
    "kcw_analytics",
    "00_fonts",
    "THSarabunNew"
)

signature_path = os.path.join(
    kcwdir,
    "kcw_analytics",
    "00_fonts",
    "Signature.jpg"
)

print(font_dir)
print(signature_path)

print("font_dir exists:", os.path.isdir(font_dir))
print("signature exists:", os.path.isfile(signature_path))

G:\Shared drives\KCW-Data\kcw_analytics\00_fonts\THSarabunNew
G:\Shared drives\KCW-Data\kcw_analytics\00_fonts\Signature.jpg
font_dir exists: True
signature exists: True


In [24]:
pdf_dir = os.path.join(
    kcwdir,
    "kcw_analytics",
    "04_outputs",
    "3RV",
    f"3RV_{YEAR}_{MONTH}",
    "PDF"
)

print(pdf_dir)

G:\Shared drives\KCW-Data\kcw_analytics\04_outputs\3RV\3RV_2026_2\PDF


In [25]:
out_dir = build_receipts_by_new_billno_weasy(
    df=out_syp,   # or your filtered month df
    out_dir=pdf_dir,
    font_dir=font_dir,
    signature_path=signature_path
)

print("Saved to:", out_dir)

Saved to: G:\Shared drives\KCW-Data\kcw_analytics\04_outputs\3RV\3RV_2026_2\PDF


In [26]:
pdf_dir = os.path.join(
    kcwdir,
    "kcw_analytics",
    "04_outputs",
    "RV",
    f"RV_{YEAR}_{MONTH}",
    "PDF"
)

out_dir = build_receipts_by_new_billno_weasy(
    df=out_hq,   # or your filtered month df
    out_dir=pdf_dir,
    font_dir=font_dir,
    signature_path=signature_path
)

print("Saved to:", out_dir)


Saved to: G:\Shared drives\KCW-Data\kcw_analytics\04_outputs\RV\RV_2026_2\PDF


In [27]:
hq_csv_path = os.path.join(
    kcwdir,
    "kcw_analytics",
    "04_outputs",
    "RV",
    f"RV_{YEAR}_{MONTH}",
    "CSV"
)

os.makedirs(hq_csv_path, exist_ok=True)

out_hq.to_csv(os.path.join(hq_csv_path, f"RV_{YEAR}_{MONTH}.csv"), index=False, encoding="utf-8-sig")

syp_csv_path = hq_csv_path = os.path.join(
    kcwdir,
    "kcw_analytics",
    "04_outputs",
    "3RV",
    f"3RV_{YEAR}_{MONTH}",
    "CSV"
)

os.makedirs(syp_csv_path, exist_ok=True)

out_syp.to_csv(os.path.join(syp_csv_path, f"3RV_{YEAR}_{MONTH}.csv"), index=False, encoding="utf-8-sig")