In [29]:
!git clone https://github.com/pthengtr/kcw-analytics.git

fatal: destination path 'kcw-analytics' already exists and is not an empty directory.


In [30]:
!cd /content/kcw-analytics && git pull origin main

From https://github.com/pthengtr/kcw-analytics
 * branch            main       -> FETCH_HEAD
Already up to date.


In [31]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [32]:
import os
import pandas as pd

folder = "/content/drive/MyDrive/kcw_analytics/01_raw"

data = {}

for file in os.listdir(folder):
    if file.endswith(".csv"):
        path = os.path.join(folder, file)
        data[file] = pd.read_csv(
            path,
            dtype={
              "BCODE": "string",
              "ITEMNO": "string",
              "BILLNO": "string",
            },
            encoding="utf-8-sig",
            low_memory=False   # stops chunk guessing
        )
        print(f"Loaded: {file} -> {data[file].shape}")



Loaded: raw_inventory_hq_2024.csv -> (4983, 8)
Loaded: raw_hq_pimas_purchase_bills.csv -> (83130, 49)
Loaded: raw_hq_sidet_sales_lines.csv -> (1194399, 38)
Loaded: raw_hq_simas_sales_bills.csv -> (484283, 49)
Loaded: raw_hq_pidet_purchase_lines.csv -> (247915, 41)
Loaded: raw_syp_pimas_purchase_bills.csv -> (2829, 49)
Loaded: raw_syp_simas_sales_bills.csv -> (11348, 49)
Loaded: raw_syp_pidet_purchase_lines.csv -> (26431, 41)
Loaded: raw_syp_sidet_sales_lines.csv -> (33314, 38)
Loaded: raw_hq_icmas_products.csv -> (114825, 94)


In [33]:
import sys
import importlib

# ensure repo is on path
repo_path = "/content/kcw-analytics"
if repo_path not in sys.path:
    sys.path.append(repo_path)

# import the module (NOT individual functions)
import src.kcw.utils as utils

# reload to pick up latest .py changes
importlib.reload(utils)

get_nonvat_sales_lines_last_purchase_vat = utils.get_nonvat_sales_lines_last_purchase_vat
audit_bcode_nonvat_sales_last_purchase_vat = utils.audit_bcode_nonvat_sales_last_purchase_vat

In [34]:
def filter_year_month(df, year, month, date_col="BILLDATE"):
    return df[pd.to_datetime(df[date_col]).dt.to_period("M") == f"{year}-{month:02d}"]

In [35]:
import pandas as pd
import numpy as np

def enrich_sales_with_newbillno_only(
    sales_lines: pd.DataFrame,
    *,
    source: str,  # "hq" or "syp"
    bcode_col: str = "BCODE",
    date_col: str = "BILLDATE",
    billno_col: str = "BILLNO",
    output_cols: tuple = (
        "BCODE", "BILLDATE", "BILLNO", "DETAIL", "QTY", "MTP", "UI", "PRICE", "AMOUNT",
        "ACCT_NO", "CANCELED", "ISVAT"
    ),
    chunk_size: int = 20,  # keep same behavior as your current code
) -> pd.DataFrame:
    """
    Adds NEW_BILLNO without calculating any cost.

    NEW_BILLNO:
      - HQ  -> RV[BDyy][mm]-[seq]
      - SYP -> 3RV[BDyy][mm]-[seq]
      - seq resets per month
      - within month: increments per DATE and per each additional chunk_size lines per DATE
    """
    source = source.lower().strip()
    if source not in ("hq", "syp"):
        raise ValueError("source must be 'hq' or 'syp'")
    bill_prefix = "TAR" if source == "hq" else "3TAR"

    def _clean_cols(df: pd.DataFrame) -> pd.DataFrame:
        out = df.copy()
        out.columns = out.columns.astype(str).str.replace("\ufeff", "", regex=False).str.strip()
        return out

    def _clean_bcode(s: pd.Series) -> pd.Series:
        return s.astype(str).str.strip()

    sales = _clean_cols(sales_lines).copy()

    # validate required
    for col in [bcode_col, date_col]:
        if col not in sales.columns:
            raise KeyError(f"sales_lines missing required column: {col}")

    sales[bcode_col] = _clean_bcode(sales[bcode_col])
    sales[date_col] = pd.to_datetime(sales[date_col], errors="coerce")

    sales = sales.dropna(subset=[bcode_col, date_col]).copy()
    sales = sales[sales[bcode_col] != ""].copy()

    # ---- NEW_BILLNO logic (same pattern as your original)
    sales["__DATEKEY__"] = sales[date_col].dt.normalize()
    sales["__MONTHKEY__"] = sales["__DATEKEY__"].dt.to_period("M")

    sales = sales.sort_values(["__DATEKEY__", bcode_col], kind="mergesort").reset_index(drop=True)

    day_chunk = sales.groupby("__DATEKEY__", sort=False).cumcount() // int(chunk_size)

    day_tbl = (
        pd.DataFrame({
            "__MONTHKEY__": sales["__MONTHKEY__"],
            "__DATEKEY__": sales["__DATEKEY__"],
            "day_chunk": day_chunk
        })
        .groupby(["__MONTHKEY__", "__DATEKEY__"], sort=False)["day_chunk"]
        .max()
        .rename("MAX_CHUNK")
        .reset_index()
        .sort_values(["__MONTHKEY__", "__DATEKEY__"], kind="mergesort")
    )

    day_tbl["DAY_START_SEQ0"] = (
        day_tbl.groupby("__MONTHKEY__")["MAX_CHUNK"]
        .transform(lambda x: (x + 1).cumsum() - (x + 1))
    )

    start_map = day_tbl.set_index(["__MONTHKEY__", "__DATEKEY__"])["DAY_START_SEQ0"]
    sales["__DAY_START_SEQ0__"] = start_map.loc[
        list(zip(sales["__MONTHKEY__"], sales["__DATEKEY__"]))
    ].to_numpy()

    seq = (sales["__DAY_START_SEQ0__"] + day_chunk + 1).astype(int)

    bd_yy = ((sales["__DATEKEY__"].dt.year + 543) % 100).astype(int)
    mm = sales["__DATEKEY__"].dt.month.astype(int)

    sales["NEW_BILLNO"] = (
        bill_prefix
        + bd_yy.map(lambda x: f"{x:02d}")
        + mm.map(lambda x: f"{x:02d}")
        + "-"
        + seq.map(lambda x: f"{x:03d}")
    )

    sales.drop(columns=["__DATEKEY__", "__MONTHKEY__", "__DAY_START_SEQ0__"], inplace=True)

    # ensure output cols exist
    for col in output_cols:
        if col not in sales.columns:
            sales[col] = pd.NA

    return sales[list(output_cols) + ["NEW_BILLNO"]].copy()


In [36]:
!apt-get -y install libpango-1.0-0 libpangoft2-1.0-0 libcairo2 libgdk-pixbuf2.0-0 libffi-dev shared-mime-info
!pip -q install weasyprint

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
libcairo2 is already the newest version (1.16.0-5ubuntu2).
libffi-dev is already the newest version (3.4.2-4).
shared-mime-info is already the newest version (2.1-2).
libgdk-pixbuf2.0-0 is already the newest version (2.40.2-2build4).
libpango-1.0-0 is already the newest version (1.50.6+ds-2ubuntu1).
libpangoft2-1.0-0 is already the newest version (1.50.6+ds-2ubuntu1).
0 upgraded, 0 newly installed, 0 to remove and 2 not upgraded.


In [37]:
import os
import pandas as pd
from weasyprint import HTML

COMPANY_INFO = {
    "hq": {
        "name": "บริษัท เกียรติชัยอะไหล่ยนต์ 2007 จำกัด (สำนักงานใหญ่)",
        "address": "ที่อยู่ 305 ม.1 ต.ชุมแสง อ.วังจันทร์ จ.ระยอง 21210",
        "phone": "โทร. 038-666-078",
        "tax": "เลขประจำตัวผู้เสียภาษี 0215560000262"
    },
    "syp": {
        "name": "บริษัท เกียรติชัยอะไหล่ยนต์ 2007 จำกัด (สาขาสี่แยกพัฒนา)",
        "address": "ที่อยู่ 16/2 ม.2 ต.ห้วยทับมอญ อ.เขาชะเมา จ.ระยอง 21110",
        "phone": "โทร. 063-2655387, 038-015818",
        "tax": "เลขประจำตัวผู้เสียภาษี 0215560000262 (สาขาที่ 00003)"
    }
}

TH_MONTHS_ABBR = [
    "ม.ค.", "ก.พ.", "มี.ค.", "เม.ย.", "พ.ค.", "มิ.ย.",
    "ก.ค.", "ส.ค.", "ก.ย.", "ต.ค.", "พ.ย.", "ธ.ค."
]

def get_company_info(new_billno: str):
    if str(new_billno).startswith("3"):
        return COMPANY_INFO["syp"]
    return COMPANY_INFO["hq"]

def thai_date(d) -> str:
    dt = pd.to_datetime(d).to_pydatetime()
    return f"{dt.day} {TH_MONTHS_ABBR[dt.month - 1]} {dt.year + 543}"

def _money(x):
    try:
        return f"{float(x):,.2f}"
    except Exception:
        return ""

def build_one_receipt_weasy_vat(
    group_df: pd.DataFrame,
    pdf_path: str,
    *,
    font_regular_path: str,   # e.g. "/content/drive/MyDrive/kcw_analytics/00_fonts/THSarabunNew.ttf"
    font_bold_path: str,      # e.g. "/content/drive/MyDrive/kcw_analytics/00_fonts/THSarabunNew-Bold.ttf"
    signature_img_path: str | None = None,
):
    df = group_df.copy()

    new_billno = str(df["NEW_BILLNO"].iloc[0])
    billdate = thai_date(df["BILLDATE"].iloc[0])
    src_billno = str(df["BILLNO"].iloc[0]) if "BILLNO" in df.columns else ""

    branch_text = "สำนักงานใหญ่"
    if new_billno.startswith("3"):
        branch_text = "สี่แยกพัฒนา"

    info = get_company_info(new_billno)

    # numeric safety
    df["QTY"] = pd.to_numeric(df.get("QTY", 0), errors="coerce").fillna(0)
    df["MTP"] = pd.to_numeric(df.get("MTP", 1), errors="coerce").fillna(1)
    df["PRICE"] = pd.to_numeric(df.get("PRICE", 0), errors="coerce").fillna(0)
    df["AMOUNT"] = pd.to_numeric(df.get("AMOUNT", 0), errors="coerce").fillna(0)

    # per-line VAT split (AMOUNT is VAT-inclusive)
    # VAT portion for inclusive amount at 7% = amount * 7/107
    df["VAT_PORTION"] = df["AMOUNT"] * (7.0 / 107.0)
    df["BASE_EXVAT"] = df["AMOUNT"] - df["VAT_PORTION"]

    total_amount = float(df["AMOUNT"].sum())
    total_vat = float(df["VAT_PORTION"].sum())
    total_base = float(df["BASE_EXVAT"].sum())

    # rows HTML
    rows_html = []
    for _, r in df.iterrows():
        bcode = str(r.get("BCODE", ""))
        detail = str(r.get("DETAIL", ""))
        unit_price = _money(r.get("PRICE", 0))  # PRICE as UNIT_PRICE
        qty_val = r.get("QTY", 0)
        qty = _money(qty_val) if (qty_val % 1) else str(int(qty_val))
        unit = str(r.get("UI", ""))

        amount_incl = _money(r.get("AMOUNT", 0))
        vat_part = _money(r.get("VAT_PORTION", 0))

        rows_html.append(f"""
          <tr>
            <td class="c">{bcode}</td>
            <td class="l">{detail}</td>
            <td class="r">{unit_price}</td>
            <td class="r">{qty}</td>
            <td class="c">{unit}</td>
            <td class="r">{amount_incl}</td>
            <td class="r">{vat_part}</td>
          </tr>
        """)

    html = f"""
<!doctype html>
<html>
<head>
  <meta charset="utf-8"/>
  <style>
    @page {{
      size: A4;
      margin: 18px 24px;
    }}

    @font-face {{
      font-family: "THSarabunNew";
      src: url("{font_regular_path}");
    }}
    @font-face {{
      font-family: "THSarabunNew";
      src: url("{font_bold_path}");
      font-weight: bold;
    }}

    body {{
      font-family: "THSarabunNew";
      font-size: 12pt;
      line-height: 1.35;
    }}

    .title {{
      margin-bottom: 6px;
      text-align:left;
      font-weight:700;
      font-size:20px;
    }}

    .right {{
      text-align: right;
    }}

    .kv b {{
      font-weight: bold;
    }}

    table {{
      width: 100%;
      border-collapse: collapse;
      margin-top: 8px;
    }}
    th, td {{
      border: 1px solid #000;
      padding: 4px 6px;
      vertical-align: top;
    }}
    th {{
      font-weight: bold;
      background: #f5f5f5;
      text-align: center;
    }}

    .l {{ text-align: left; }}
    .c {{ text-align: center; }}
    .r {{ text-align: right; }}

    .totals {{
      margin-top: 10px;
      width: 100%;
    }}
    .totals .row {{
      display: flex;
      justify-content: flex-end;
      gap: 10px;
    }}
    .totals .label {{
      min-width: 140px;
      text-align: right;
      font-weight: bold;
    }}
    .totals .val {{
      min-width: 120px;
      text-align: right;
    }}

    .sign-block {{
      margin-top: 18px;
      display: flex;
      flex-direction: column;
      align-items: flex-end;
      gap: 12px;
    }}
    .sign-row {{
      display: flex;
      align-items: center;
      gap: 10px;
    }}
    .sign-label {{
      width: 80px;
      text-align: center;
      font-weight: bold;
    }}
    .sign-box {{
      width: 200px;
      height: 60px;
      border: 1px solid #000;
      display: flex;
      align-items: center;
      justify-content: center;
    }}
    .sig-img {{
      max-width: 180px;
      max-height: 50px;
    }}

    .note {{
      margin-top: 10px;
      text-align: right;
    }}

    .header-row{{
        display:flex;
        justify-content:space-between;   /* push apart */
        align-items:flex-start;
        width:100%;
    }}

    .company{{
        text-align:left;
        font-size:14px;
        line-height:1.4;
        grid-column:1;
        grid-row:1; }}
    .company-name{{ font-weight:700; font-size:16px; }}
    .company-line{{ font-size:14px; line-height:1.35; }}
    .company-line.tax{{ margin-top:6px; }}
  </style>
</head>

<body>

  <div class="header-row">

    <div class="company">
      <div class="company-name">{info['name']}</div>
      <div class="company-line">{info['address']}</div>
      <div class="company-line">{info['phone']}</div>
      <div class="company-line tax">{info['tax']}</div>
    </div>

    <div>
      <div class="title">
        ใบเสร็จรับเงิน/ใบกำกับภาษีอย่างย่อ
      </div>
      <div class="right kv">
        <div><b>เลขที่:</b> {new_billno}</div>
        <div><b>วันที่:</b> {billdate}</div>
      </div>
    </div>

  </div>

  <table>
    <thead>
      <tr>
        <th style="width: 12%">รหัสสินค้า</th>
        <th style="width: 36%">รายการ</th>
        <th style="width: 10%">ราคา/หน่วย</th>
        <th style="width: 8%">จำนวน</th>
        <th style="width: 8%">หน่วย</th>
        <th style="width: 13%">รวมยอดเงิน<br/>(รวม VAT)</th>
        <th style="width: 13%">VAT 7%<br/>(ส่วนในยอด)</th>
      </tr>
    </thead>
    <tbody>
      {''.join(rows_html)}
    </tbody>
  </table>

  <div class="totals">
    <div class="row"><div class="label">ยอดก่อน VAT:</div><div class="val">{_money(total_base)}</div></div>
    <div class="row"><div class="label">VAT 7%:</div><div class="val">{_money(total_vat)}</div></div>
    <div class="row"><div class="label">รวมทั้งสิ้น (รวม VAT):</div><div class="val">{_money(total_amount)}</div></div>
  </div>

</body>
</html>
"""

    HTML(string=html, base_url="/").write_pdf(pdf_path)


def build_receipts_by_new_billno_weasy_vat(
    df: pd.DataFrame,
    out_dir: str,
    *,
    font_regular_path: str,
    font_bold_path: str,
    signature_img_path: str | None = None,
):
    os.makedirs(out_dir, exist_ok=True)

    if "NEW_BILLNO" not in df.columns:
        raise ValueError("df must contain NEW_BILLNO column")

    groups = list(df.groupby("NEW_BILLNO", sort=True))
    total = len(groups)

    print(f"Generating {total} receipts...\n")

    for i, (new_billno, g) in enumerate(groups, start=1):

        # ---- Progress Line ----
        pct = (i / total) * 100
        print(f"\rProgress: {i}/{total}  ({pct:6.2f}%)  -> {new_billno}", end="")

        pdf_path = os.path.join(out_dir, f"{new_billno}.pdf")

        build_one_receipt_weasy_vat(
            g,
            pdf_path,
            font_regular_path=font_regular_path,
            font_bold_path=font_bold_path,
            signature_img_path=signature_img_path
        )

    print("\nDone.")
    return out_dir



In [38]:
YEAR = 2026
MONTH = 1

nonvat_sales_lines_last_purchase_vat_hq = get_nonvat_sales_lines_last_purchase_vat(
    data, year=YEAR, source="hq"
)

nonvat_sales_lines_last_purchase_vat_syp = get_nonvat_sales_lines_last_purchase_vat(
    data, year=YEAR, source="syp"
)

In [39]:
nonvat_sales_lines_last_purchase_vat_hq = filter_year_month(nonvat_sales_lines_last_purchase_vat_hq, YEAR, MONTH)
nonvat_sales_lines_last_purchase_vat_syp = filter_year_month(nonvat_sales_lines_last_purchase_vat_syp, YEAR, MONTH)

In [40]:
mask = nonvat_sales_lines_last_purchase_vat_hq["BILLNO"].astype("string").str.startswith(("TF", "TFV"), na=False)

removed_tf = nonvat_sales_lines_last_purchase_vat_hq.loc[mask].copy()
nonvat_sales_lines_last_purchase_vat_hq = nonvat_sales_lines_last_purchase_vat_hq.loc[~mask].copy()

print(f"Removed TF/TFV lines: {len(removed_tf)}")

Removed TF/TFV lines: 1102


In [41]:
mask = nonvat_sales_lines_last_purchase_vat_syp["BILLNO"].astype("string").str.startswith(("TF", "TFV"), na=False)

removed_tf = nonvat_sales_lines_last_purchase_vat_syp.loc[mask].copy()
nonvat_sales_lines_last_purchase_vat_syp = nonvat_sales_lines_last_purchase_vat_syp.loc[~mask].copy()

print(f"Removed TF/TFV lines: {len(removed_tf)}")

Removed TF/TFV lines: 0


In [42]:
import pandas as pd
import numpy as np

def remove_negative_and_monthend_offset(
    sales: pd.DataFrame,
    *,
    date_col: str = "BILLDATE",
    amount_col: str = "AMOUNT_NUM",   # use AMOUNT_NUM if you have it, else "AMOUNT"
    cutoff_day: int = 15,
    group_cols: tuple[str, ...] = (), # <-- month only by default
    tie_break_cols: tuple[str, ...] = ("BILLNO", "BCODE"),
):
    """
    1) neg_df: remove all rows where AMOUNT < 0
    2) removed_final_df: for each (group, month), remove rows from month-end backwards (only day > cutoff_day),
       until sum(negatives) + sum(removed_final) >= 0
    Returns: kept_df, neg_df, removed_final_df
    """
    df = sales.copy()

    # Ensure datetime
    df[date_col] = pd.to_datetime(df[date_col], errors="coerce")

    amt = pd.to_numeric(df[amount_col], errors="coerce")

    # (1) negatives
    neg_mask = amt < 0
    neg_df = df.loc[neg_mask].copy()
    base_df = df.loc[~neg_mask].copy()

    # month bucket
    neg_df["_MONTH"] = neg_df[date_col].dt.to_period("M").dt.to_timestamp()
    base_df["_MONTH"] = base_df[date_col].dt.to_period("M").dt.to_timestamp()

    keys = list(group_cols) + ["_MONTH"]

    # total negative per month (and group if provided)
    neg_sum = (
        neg_df.groupby(keys, dropna=False)[amount_col]
        .sum()
        .rename("NEG_SUM")
        .reset_index()
    )
    neg_sum["NEED_POS"] = (-neg_sum["NEG_SUM"]).clip(lower=0)

    # (2) candidates: only day > cutoff_day, only months that actually have negatives
    cand = base_df.loc[base_df[date_col].dt.day > cutoff_day].copy()
    cand = cand.merge(neg_sum[keys + ["NEED_POS"]], on=keys, how="inner")

    if cand.empty:
        removed_final_df = base_df.iloc[0:0].copy()
        kept_df = base_df.drop(columns=["_MONTH"], errors="ignore")
        neg_df = neg_df.drop(columns=["_MONTH"], errors="ignore")
        return kept_df, neg_df, removed_final_df

    # sort month-end backwards
    sort_cols = [date_col]
    asc = [False]
    for c in tie_break_cols:
        if c in cand.columns:
            sort_cols.append(c)
            asc.append(False)

    cand = cand.sort_values(keys + sort_cols, ascending=[True]*len(keys) + asc, kind="mergesort")

    # cumulative positive removed
    cand["_CUM_POS"] = cand.groupby(keys, dropna=False)[amount_col].cumsum()

    # remove prefix until crossing NEED_POS
    cand["_REMOVE"] = cand["_CUM_POS"] <= cand["NEED_POS"]
    cand["_CROSS"] = cand["_CUM_POS"] >= cand["NEED_POS"]
    first_cross_idx = (
        cand[cand["_CROSS"]]
        .groupby(keys, dropna=False, sort=False)
        .head(1)
        .index
    )
    cand.loc[first_cross_idx, "_REMOVE"] = True

    removed_final_df = cand.loc[cand["_REMOVE"]].copy()

    # kept = base minus removed_final
    if "ROW_ID" in base_df.columns and "ROW_ID" in removed_final_df.columns:
        removed_ids = set(removed_final_df["ROW_ID"].tolist())
        kept_df = base_df.loc[~base_df["ROW_ID"].isin(removed_ids)].copy()
    else:
        kept_df = base_df.drop(index=removed_final_df.index, errors="ignore").copy()

    # cleanup helper cols
    for d in (kept_df, neg_df, removed_final_df):
        d.drop(columns=["_MONTH"], errors="ignore", inplace=True)
    removed_final_df.drop(columns=["NEED_POS", "_CUM_POS", "_REMOVE", "_CROSS"], errors="ignore", inplace=True)

    return kept_df, neg_df, removed_final_df


In [43]:
out_hq_kept, out_hq_neg, out_hq_removed = remove_negative_and_monthend_offset(
    nonvat_sales_lines_last_purchase_vat_hq,
    date_col="BILLDATE",
    amount_col="AMOUNT",   # recommended
    cutoff_day=15,
    group_cols=(),             # month only
)

print("HQ neg_df:", len(out_hq_neg))
print("HQ removed_final_df:", len(out_hq_removed))
print("HQ kept_df:", len(out_hq_kept))


HQ neg_df: 89
HQ removed_final_df: 160
HQ kept_df: 5355


In [44]:
out_syp_kept, out_syp_neg, out_syp_removed = remove_negative_and_monthend_offset(
    nonvat_sales_lines_last_purchase_vat_syp,
    date_col="BILLDATE",
    amount_col="AMOUNT",   # recommended
    cutoff_day=15,
    group_cols=(),             # month only
)

print("SYP neg_df:", len(out_syp_neg))
print("SYP removed_final_df:", len(out_syp_removed))
print("SYP kept_df:", len(out_syp_kept))

SYP neg_df: 43
SYP removed_final_df: 39
SYP kept_df: 2196


In [45]:
# 1) enrich bill numbers only
out_hq_kept = enrich_sales_with_newbillno_only(
    out_hq_kept,
    source="hq"
)

out_syp_kept = enrich_sales_with_newbillno_only(
    out_syp_kept,
    source="syp"
)

In [46]:
import logging

# WeasyPrint
logging.getLogger("weasyprint").setLevel(logging.ERROR)
logging.getLogger("weasyprint.progress").setLevel(logging.ERROR)
logging.getLogger("weasyprint.CSS").setLevel(logging.ERROR)
logging.getLogger("weasyprint.HTML").setLevel(logging.ERROR)

# fontTools (the spam you're seeing)
logging.getLogger("fontTools").setLevel(logging.ERROR)
logging.getLogger("fontTools.subset").setLevel(logging.ERROR)
logging.getLogger("fontTools.ttLib").setLevel(logging.ERROR)

# Optional: also silence warnings from fontTools tables
logging.getLogger("fontTools.ttLib.tables").setLevel(logging.ERROR)

In [47]:
out = build_receipts_by_new_billno_weasy_vat(
    out_hq_kept,
    f"/content/drive/MyDrive/kcw_analytics/04_outputs/TAR_{YEAR}_{MONTH}/PDF",
    font_regular_path="/content/drive/MyDrive/kcw_analytics/00_fonts/THSarabunNew/THSarabunNew.ttf",
    font_bold_path="/content/drive/MyDrive/kcw_analytics/00_fonts/THSarabunNew/THSarabunNew-Bold.ttf",
    signature_img_path="/content/drive/MyDrive/kcw_analytics/00_fonts/Signature.jpg",
)
print("Saved to:", out)

Generating 280 receipts...

Progress: 280/280  (100.00%)  -> TAR6901-280
Done.
Saved to: /content/drive/MyDrive/kcw_analytics/04_outputs/TAR_2026_1/PDF


In [48]:
out = build_receipts_by_new_billno_weasy_vat(
    out_syp_kept,
    f"/content/drive/MyDrive/kcw_analytics/04_outputs/3TAR_{YEAR}_{MONTH}/PDF",
    font_regular_path="/content/drive/MyDrive/kcw_analytics/00_fonts/THSarabunNew/THSarabunNew.ttf",
    font_bold_path="/content/drive/MyDrive/kcw_analytics/00_fonts/THSarabunNew/THSarabunNew-Bold.ttf",
    signature_img_path="/content/drive/MyDrive/kcw_analytics/00_fonts/Signature.jpg",
)
print("Saved to:", out)

Generating 125 receipts...

Progress: 125/125  (100.00%)  -> 3TAR6901-125
Done.
Saved to: /content/drive/MyDrive/kcw_analytics/04_outputs/3TAR_2026_1/PDF


In [49]:
import os

output_dir_hq = f"/content/drive/MyDrive/kcw_analytics/04_outputs/TAR_{YEAR}_{MONTH}/CSV"

os.makedirs(output_dir_hq, exist_ok=True)

out_hq_kept.to_csv(f"{output_dir_hq}/TAR_{YEAR}_{MONTH}_kept.csv", index=False, encoding="utf-8-sig")
out_hq_neg.to_csv(f"{output_dir_hq}/TAR_{YEAR}_{MONTH}_neg.csv", index=False, encoding="utf-8-sig")
out_hq_removed.to_csv(f"{output_dir_hq}/TAR_{YEAR}_{MONTH}_removed.csv", index=False, encoding="utf-8-sig")

output_dir_syp = f"/content/drive/MyDrive/kcw_analytics/04_outputs/3TAR_{YEAR}_{MONTH}/CSV"

os.makedirs(output_dir_syp, exist_ok=True)

out_syp_kept.to_csv(f"{output_dir_syp}/3TAR_{YEAR}_{MONTH}_kept.csv", index=False, encoding="utf-8-sig")
out_syp_neg.to_csv(f"{output_dir_syp}/3TAR_{YEAR}_{MONTH}_neg.csv", index=False, encoding="utf-8-sig")
out_syp_removed.to_csv(f"{output_dir_syp}/3TAR_{YEAR}_{MONTH}_removed.csv", index=False, encoding="utf-8-sig")
