In [1]:
# !git clone https://github.com/pthengtr/kcw-analytics.git

In [2]:
# !cd /content/kcw-analytics && git pull origin main

In [3]:
# from google.colab import drive
# drive.mount('/content/drive')

In [4]:
import os
import sys

if "google.colab" in sys.modules:
    # Running in Colab
    from google.colab import drive
    drive.mount("/content/drive")
    
    BASE_FOLDER = "/content/drive/Shareddrives"
    BASE_FOLDER_GIT = "/content"
else:
    # Running in local Jupyter
    BASE_FOLDER = r"G:\Shared drives"
    BASE_FOLDER_GIT = r"C:\Users\Windows 11\Notebook"

print("Using folder:", BASE_FOLDER)

Using folder: G:\Shared drives


In [5]:
folder = f"{BASE_FOLDER}/KCW-Data/kcw_analytics/01_raw"

In [6]:
import os
import pandas as pd

data = {}

for file in os.listdir(folder):
    if file.endswith(".csv"):
        path = os.path.join(folder, file)
        data[file] = pd.read_csv(
            path,
            dtype={
              "BCODE": "string",
              "ITEMNO": "string",
              "BILLNO": "string",
            },
            encoding="utf-8-sig",
            low_memory=False   # stops chunk guessing
        )
        print(f"Loaded: {file} -> {data[file].shape}")



Loaded: raw_inventory_hq_2024.csv -> (4983, 8)
Loaded: raw_syp_simas_sales_bills.csv -> (12366, 49)
Loaded: raw_hq_pidet_purchase_lines.csv -> (153764, 41)
Loaded: raw_syp_sidet_sales_lines.csv -> (36551, 38)
Loaded: raw_syp_pimas_purchase_bills.csv -> (2919, 49)
Loaded: raw_hq_simas_sales_bills.csv -> (275969, 49)
Loaded: raw_hq_pimas_purchase_bills.csv -> (50222, 49)
Loaded: raw_hq_sidet_sales_lines.csv -> (733097, 38)
Loaded: raw_syp_pidet_purchase_lines.csv -> (27266, 41)
Loaded: raw_hq_icmas_products.csv -> (114922, 94)
Loaded: raw_hq_pvmas_notes_vouchers.csv -> (13730, 32)


In [7]:

import sys
import importlib

# ensure repo is on path
repo_path = f"{BASE_FOLDER_GIT}/kcw-analytics"
if repo_path not in sys.path:
    sys.path.append(repo_path)

# import the module (NOT individual functions)
import src.kcw.utils as utils

# reload to pick up latest .py changes
importlib.reload(utils)

get_nonvat_sales_lines_last_purchase_vat = utils.get_nonvat_sales_lines_last_purchase_vat
audit_bcode_nonvat_sales_last_purchase_vat = utils.audit_bcode_nonvat_sales_last_purchase_vat

In [8]:
def filter_year_month(df, year, month, date_col="BILLDATE"):
    return df[pd.to_datetime(df[date_col]).dt.to_period("M") == f"{year}-{month:02d}"]

In [9]:
import pandas as pd

def filter_last_year(
    df: pd.DataFrame,
    date_col: str = "BILLDATE",
    *,
    years: int = 1,
    copy: bool = True,
    verbose: bool = True,
) -> pd.DataFrame:
    """
    Filter dataframe to keep rows within N years back from latest date.

    Parameters
    ----------
    df : pd.DataFrame
    date_col : str
        Column name containing date (default BILLDATE)
    years : int
        How many years back from latest date
    copy : bool
        Return a copy (safe for pipelines)
    verbose : bool
        Print diagnostics

    Returns
    -------
    pd.DataFrame
    """

    if date_col not in df.columns:
        raise ValueError(f"{date_col} not found in dataframe")

    # ensure datetime (legacy POS safe)
    dates = pd.to_datetime(df[date_col], errors="coerce")

    latest_date = dates.max()
    if pd.isna(latest_date):
        raise ValueError("No valid dates found")

    cutoff_date = latest_date - pd.DateOffset(years=years)

    mask = dates >= cutoff_date
    result = df.loc[mask]

    if copy:
        result = result.copy()

    if verbose:
        print(
            f"[filter_last_year] latest={latest_date.date()} | "
            f"cutoff={cutoff_date.date()} | rows={len(result):,}/{len(df):,}"
        )

    return result

In [10]:
import pandas as pd
import numpy as np

def enrich_sales_with_newbillno_only(
    sales_lines: pd.DataFrame,
    *,
    source: str,  # "hq" or "syp"
    bcode_col: str = "BCODE",
    date_col: str = "BILLDATE",
    billno_col: str = "BILLNO",
    output_cols: tuple = (
        "BCODE", "BILLDATE", "BILLNO", "DETAIL", "QTY", "MTP", "UI", "PRICE", "AMOUNT",
        "ACCT_NO", "CANCELED", "ISVAT"
    ),
    chunk_size: int = 20,  # keep same behavior as your current code
) -> pd.DataFrame:
    """
    Adds NEW_BILLNO without calculating any cost.

    NEW_BILLNO:
      - HQ  -> RV[BDyy][mm]-[seq]
      - SYP -> 3RV[BDyy][mm]-[seq]
      - seq resets per month
      - within month: increments per DATE and per each additional chunk_size lines per DATE
    """
    source = source.lower().strip()
    if source not in ("hq", "syp"):
        raise ValueError("source must be 'hq' or 'syp'")
    bill_prefix = "TAR" if source == "hq" else "3TAR"

    def _clean_cols(df: pd.DataFrame) -> pd.DataFrame:
        out = df.copy()
        out.columns = out.columns.astype(str).str.replace("\ufeff", "", regex=False).str.strip()
        return out

    def _clean_bcode(s: pd.Series) -> pd.Series:
        return s.astype(str).str.strip()

    sales = _clean_cols(sales_lines).copy()

    # validate required
    for col in [bcode_col, date_col]:
        if col not in sales.columns:
            raise KeyError(f"sales_lines missing required column: {col}")

    sales[bcode_col] = _clean_bcode(sales[bcode_col])
    sales[date_col] = pd.to_datetime(sales[date_col], errors="coerce")

    sales = sales.dropna(subset=[bcode_col, date_col]).copy()
    sales = sales[sales[bcode_col] != ""].copy()

    # ---- NEW_BILLNO logic (same pattern as your original)
    sales["__DATEKEY__"] = sales[date_col].dt.normalize()
    sales["__MONTHKEY__"] = sales["__DATEKEY__"].dt.to_period("M")

    sales = sales.sort_values(["__DATEKEY__", bcode_col], kind="mergesort").reset_index(drop=True)

    day_chunk = sales.groupby("__DATEKEY__", sort=False).cumcount() // int(chunk_size)

    day_tbl = (
        pd.DataFrame({
            "__MONTHKEY__": sales["__MONTHKEY__"],
            "__DATEKEY__": sales["__DATEKEY__"],
            "day_chunk": day_chunk
        })
        .groupby(["__MONTHKEY__", "__DATEKEY__"], sort=False)["day_chunk"]
        .max()
        .rename("MAX_CHUNK")
        .reset_index()
        .sort_values(["__MONTHKEY__", "__DATEKEY__"], kind="mergesort")
    )

    day_tbl["DAY_START_SEQ0"] = (
        day_tbl.groupby("__MONTHKEY__")["MAX_CHUNK"]
        .transform(lambda x: (x + 1).cumsum() - (x + 1))
    )

    start_map = day_tbl.set_index(["__MONTHKEY__", "__DATEKEY__"])["DAY_START_SEQ0"]
    sales["__DAY_START_SEQ0__"] = start_map.loc[
        list(zip(sales["__MONTHKEY__"], sales["__DATEKEY__"]))
    ].to_numpy()

    seq = (sales["__DAY_START_SEQ0__"] + day_chunk + 1).astype(int)

    bd_yy = ((sales["__DATEKEY__"].dt.year + 543) % 100).astype(int)
    mm = sales["__DATEKEY__"].dt.month.astype(int)

    sales["NEW_BILLNO"] = (
        bill_prefix
        + bd_yy.map(lambda x: f"{x:02d}")
        + mm.map(lambda x: f"{x:02d}")
        + "-"
        + seq.map(lambda x: f"{x:03d}")
    )

    sales.drop(columns=["__DATEKEY__", "__MONTHKEY__", "__DAY_START_SEQ0__"], inplace=True)

    # ensure output cols exist
    for col in output_cols:
        if col not in sales.columns:
            sales[col] = pd.NA

    return sales[list(output_cols) + ["NEW_BILLNO"]].copy()


In [11]:
#!apt-get -y install libpango-1.0-0 libpangoft2-1.0-0 libcairo2 libgdk-pixbuf2.0-0 libffi-dev shared-mime-info
!pip -q install weasyprint

In [12]:
import os
import pandas as pd
from weasyprint import HTML

COMPANY_INFO = {
    "hq": {
        "name": "‡∏ö‡∏£‡∏¥‡∏©‡∏±‡∏ó ‡πÄ‡∏Å‡∏µ‡∏¢‡∏£‡∏ï‡∏¥‡∏ä‡∏±‡∏¢‡∏≠‡∏∞‡πÑ‡∏´‡∏•‡πà‡∏¢‡∏ô‡∏ï‡πå 2007 ‡∏à‡∏≥‡∏Å‡∏±‡∏î (‡∏™‡∏≥‡∏ô‡∏±‡∏Å‡∏á‡∏≤‡∏ô‡πÉ‡∏´‡∏ç‡πà)",
        "address": "‡∏ó‡∏µ‡πà‡∏≠‡∏¢‡∏π‡πà 305 ‡∏°.1 ‡∏ï.‡∏ä‡∏∏‡∏°‡πÅ‡∏™‡∏á ‡∏≠.‡∏ß‡∏±‡∏á‡∏à‡∏±‡∏ô‡∏ó‡∏£‡πå ‡∏à.‡∏£‡∏∞‡∏¢‡∏≠‡∏á 21210",
        "phone": "‡πÇ‡∏ó‡∏£. 038-666-078",
        "tax": "‡πÄ‡∏•‡∏Ç‡∏õ‡∏£‡∏∞‡∏à‡∏≥‡∏ï‡∏±‡∏ß‡∏ú‡∏π‡πâ‡πÄ‡∏™‡∏µ‡∏¢‡∏†‡∏≤‡∏©‡∏µ 0215560000262"
    },
    "syp": {
        "name": "‡∏ö‡∏£‡∏¥‡∏©‡∏±‡∏ó ‡πÄ‡∏Å‡∏µ‡∏¢‡∏£‡∏ï‡∏¥‡∏ä‡∏±‡∏¢‡∏≠‡∏∞‡πÑ‡∏´‡∏•‡πà‡∏¢‡∏ô‡∏ï‡πå 2007 ‡∏à‡∏≥‡∏Å‡∏±‡∏î (‡∏™‡∏≤‡∏Ç‡∏≤‡∏™‡∏µ‡πà‡πÅ‡∏¢‡∏Å‡∏û‡∏±‡∏í‡∏ô‡∏≤)",
        "address": "‡∏ó‡∏µ‡πà‡∏≠‡∏¢‡∏π‡πà 16/2 ‡∏°.2 ‡∏ï.‡∏´‡πâ‡∏ß‡∏¢‡∏ó‡∏±‡∏ö‡∏°‡∏≠‡∏ç ‡∏≠.‡πÄ‡∏Ç‡∏≤‡∏ä‡∏∞‡πÄ‡∏°‡∏≤ ‡∏à.‡∏£‡∏∞‡∏¢‡∏≠‡∏á 21110",
        "phone": "‡πÇ‡∏ó‡∏£. 063-2655387, 038-015818",
        "tax": "‡πÄ‡∏•‡∏Ç‡∏õ‡∏£‡∏∞‡∏à‡∏≥‡∏ï‡∏±‡∏ß‡∏ú‡∏π‡πâ‡πÄ‡∏™‡∏µ‡∏¢‡∏†‡∏≤‡∏©‡∏µ 0215560000262 (‡∏™‡∏≤‡∏Ç‡∏≤‡∏ó‡∏µ‡πà 00003)"
    }
}

TH_MONTHS_ABBR = [
    "‡∏°.‡∏Ñ.", "‡∏Å.‡∏û.", "‡∏°‡∏µ.‡∏Ñ.", "‡πÄ‡∏°.‡∏¢.", "‡∏û.‡∏Ñ.", "‡∏°‡∏¥.‡∏¢.",
    "‡∏Å.‡∏Ñ.", "‡∏™.‡∏Ñ.", "‡∏Å.‡∏¢.", "‡∏ï.‡∏Ñ.", "‡∏û.‡∏¢.", "‡∏ò.‡∏Ñ."
]

def get_company_info(new_billno: str):
    if str(new_billno).startswith("3"):
        return COMPANY_INFO["syp"]
    return COMPANY_INFO["hq"]

def thai_date(d) -> str:
    dt = pd.to_datetime(d).to_pydatetime()
    return f"{dt.day} {TH_MONTHS_ABBR[dt.month - 1]} {dt.year + 543}"

def _money(x):
    try:
        return f"{float(x):,.2f}"
    except Exception:
        return ""

def build_one_receipt_weasy_vat(
    group_df: pd.DataFrame,
    pdf_path: str,
    *,
    font_regular_path: str,   # e.g. "/content/drive/MyDrive/kcw_analytics/00_fonts/THSarabunNew.ttf"
    font_bold_path: str,      # e.g. "/content/drive/MyDrive/kcw_analytics/00_fonts/THSarabunNew-Bold.ttf"
    signature_img_path: str | None = None,
    doc_title: str = "‡πÉ‡∏ö‡πÄ‡∏™‡∏£‡πá‡∏à‡∏£‡∏±‡∏ö‡πÄ‡∏á‡∏¥‡∏ô/‡πÉ‡∏ö‡∏Å‡∏≥‡∏Å‡∏±‡∏ö‡∏†‡∏≤‡∏©‡∏µ‡∏≠‡∏¢‡πà‡∏≤‡∏á‡∏¢‡πà‡∏≠",   # üëà NEW
):
    font_regular_path = Path(font_regular_path).resolve().as_uri()
    font_bold_path = Path(font_bold_path).resolve().as_uri()
    signature_img_path = Path(signature_img_path).resolve().as_uri()
    
    df = group_df.copy()

    new_billno = str(df["NEW_BILLNO"].iloc[0])
    billdate = thai_date(df["BILLDATE"].iloc[0])
    src_billno = str(df["BILLNO"].iloc[0]) if "BILLNO" in df.columns else ""
    ref_billno = str(df["REF"].iloc[0]) if "REF" in df.columns else ""

    branch_text = "‡∏™‡∏≥‡∏ô‡∏±‡∏Å‡∏á‡∏≤‡∏ô‡πÉ‡∏´‡∏ç‡πà"
    if new_billno.startswith("3"):
        branch_text = "‡∏™‡∏µ‡πà‡πÅ‡∏¢‡∏Å‡∏û‡∏±‡∏í‡∏ô‡∏≤"

    info = get_company_info(new_billno)

    # numeric safety
    # df["QTY"] = pd.to_numeric(df.get("QTY", 0), errors="coerce").fillna(0)
    # df["MTP"] = pd.to_numeric(df.get("MTP", 1), errors="coerce").fillna(1)
    # df["PRICE"] = pd.to_numeric(df.get("PRICE", 0), errors="coerce").fillna(0)
    # df["AMOUNT"] = pd.to_numeric(df.get("AMOUNT", 0), errors="coerce").fillna(0)

    for col, default in {
      "QTY": 0,
      "MTP": 1,
      "PRICE": 0,
      "AMOUNT": 0,
    }.items():
      if col not in df.columns:
          df[col] = default

    df[col] = pd.to_numeric(df[col], errors="coerce").fillna(default)
    # per-line VAT split (AMOUNT is VAT-inclusive)
    # VAT portion for inclusive amount at 7% = amount * 7/107
    df["VAT_PORTION"] = df["AMOUNT"] * (7.0 / 107.0)
    df["BASE_EXVAT"] = df["AMOUNT"] - df["VAT_PORTION"]

    total_amount = float(df["AMOUNT"].sum())
    total_vat = float(df["VAT_PORTION"].sum())
    total_base = float(df["BASE_EXVAT"].sum())

    # rows HTML
    rows_html = []
    for _, r in df.iterrows():
        bcode = str(r.get("BCODE", ""))
        detail = str(r.get("DETAIL", ""))
        unit_price = _money(r.get("PRICE", 0))  # PRICE as UNIT_PRICE
        qty_val = r.get("QTY", 0)
        qty = _money(qty_val) if (qty_val % 1) else str(int(qty_val))
        unit = str(r.get("UI", ""))

        amount_incl = _money(r.get("AMOUNT", 0))
        vat_part = _money(r.get("VAT_PORTION", 0))

        rows_html.append(f"""
          <tr>
            <td class="c">{bcode}</td>
            <td class="l">{detail}</td>
            <td class="r">{unit_price}</td>
            <td class="r">{qty}</td>
            <td class="c">{unit}</td>
            <td class="r">{amount_incl}</td>
          </tr>
        """)

    html = f"""
<!doctype html>
<html>
<head>
  <meta charset="utf-8"/>
  <style>
    @page {{
      size: A4;
      margin: 18px 24px;
    }}

    @font-face {{
      font-family: "THSarabunNew";
      src: url("{font_regular_path}");
    }}
    @font-face {{
      font-family: "THSarabunNew";
      src: url("{font_bold_path}");
      font-weight: bold;
    }}

    body {{
      font-family: "THSarabunNew";
      font-size: 12pt;
      line-height: 1.35;
    }}

    .title {{
      margin-bottom: 6px;
      text-align:left;
      font-weight:700;
      font-size:20px;
    }}

    .right {{
      text-align: right;
    }}

    .kv b {{
      font-weight: bold;
    }}

    table {{
      width: 100%;
      border-collapse: collapse;
      margin-top: 8px;
    }}
    th, td {{
      border: 1px solid #000;
      padding: 4px 6px;
      vertical-align: top;
    }}
    th {{
      font-weight: bold;
      background: #f5f5f5;
      text-align: center;
    }}

    .l {{ text-align: left; }}
    .c {{ text-align: center; }}
    .r {{ text-align: right; }}

    .totals {{
      margin-top: 10px;
      width: 100%;
    }}
    .totals .row {{
      display: flex;
      justify-content: flex-end;
      gap: 10px;
    }}
    .totals .label {{
      min-width: 140px;
      text-align: right;
      font-weight: bold;
    }}
    .totals .val {{
      min-width: 120px;
      text-align: right;
    }}

    .sign-block {{
      margin-top: 18px;
      display: flex;
      flex-direction: column;
      align-items: flex-end;
      gap: 12px;
    }}
    .sign-row {{
      display: flex;
      align-items: center;
      gap: 10px;
    }}
    .sign-label {{
      width: 80px;
      text-align: center;
      font-weight: bold;
    }}
    .sign-box {{
      width: 200px;
      height: 60px;
      border: 1px solid #000;
      display: flex;
      align-items: center;
      justify-content: center;
    }}
    .sig-img {{
      max-width: 180px;
      max-height: 50px;
    }}

    .note {{
      margin-top: 10px;
      text-align: right;
    }}

    .header-row{{
        display:flex;
        justify-content:space-between;   /* push apart */
        align-items:flex-start;
        width:100%;
    }}

    .company{{
        text-align:left;
        font-size:14px;
        line-height:1.4;
        grid-column:1;
        grid-row:1; }}
    .company-name{{ font-weight:700; font-size:16px; }}
    .company-line{{ font-size:14px; line-height:1.35; }}
    .company-line.tax{{ margin-top:6px; }}
  </style>
</head>

<body>

  <div class="header-row">

    <div class="company">
      <div class="company-name">{info['name']}</div>
      <div class="company-line">{info['address']}</div>
      <div class="company-line">{info['phone']}</div>
      <div class="company-line tax">{info['tax']}</div>
    </div>

    <div>
      <div class="title">
        {doc_title}
      </div>
      <div class="right kv">
        <div><b>‡πÄ‡∏•‡∏Ç‡∏ó‡∏µ‡πà:</b> {new_billno}</div>
        <div><b>‡∏ß‡∏±‡∏ô‡∏ó‡∏µ‡πà:</b> {billdate}</div>
        <div><b>‡∏≠‡πâ‡∏≤‡∏á‡∏≠‡∏¥‡∏á:</b> {ref_billno}</div>
      </div>
    </div>

  </div>

  <table>
    <thead>
      <tr>
        <th style="width: 12%">‡∏£‡∏´‡∏±‡∏™‡∏™‡∏¥‡∏ô‡∏Ñ‡πâ‡∏≤</th>
        <th style="width: 36%">‡∏£‡∏≤‡∏¢‡∏Å‡∏≤‡∏£</th>
        <th style="width: 10%">‡∏£‡∏≤‡∏Ñ‡∏≤/‡∏´‡∏ô‡πà‡∏ß‡∏¢</th>
        <th style="width: 8%">‡∏à‡∏≥‡∏ô‡∏ß‡∏ô</th>
        <th style="width: 8%">‡∏´‡∏ô‡πà‡∏ß‡∏¢</th>
        <th style="width: 13%">‡∏£‡∏ß‡∏°‡∏¢‡∏≠‡∏î‡πÄ‡∏á‡∏¥‡∏ô<br/>(‡∏£‡∏ß‡∏° VAT)</th>
      </tr>
    </thead>
    <tbody>
      {''.join(rows_html)}
    </tbody>
  </table>

  <div class="totals">
    <div class="row"><div class="label">‡∏¢‡∏≠‡∏î‡∏Å‡πà‡∏≠‡∏ô VAT:</div><div class="val">{_money(total_base)}</div></div>
    <div class="row"><div class="label">VAT 7%:</div><div class="val">{_money(total_vat)}</div></div>
    <div class="row"><div class="label">‡∏£‡∏ß‡∏°‡∏ó‡∏±‡πâ‡∏á‡∏™‡∏¥‡πâ‡∏ô (‡∏£‡∏ß‡∏° VAT):</div><div class="val">{_money(total_amount)}</div></div>
  </div>

</body>
</html>
"""

    HTML(string=html, base_url="/").write_pdf(pdf_path)


def build_receipts_by_new_billno_weasy_vat(
    df: pd.DataFrame,
    out_dir: str,
    *,
    font_regular_path: str,
    font_bold_path: str,
    signature_img_path: str | None = None,
    doc_title: str = "‡πÉ‡∏ö‡πÄ‡∏™‡∏£‡πá‡∏à‡∏£‡∏±‡∏ö‡πÄ‡∏á‡∏¥‡∏ô/‡πÉ‡∏ö‡∏Å‡∏≥‡∏Å‡∏±‡∏ö‡∏†‡∏≤‡∏©‡∏µ‡∏≠‡∏¢‡πà‡∏≤‡∏á‡∏¢‡πà‡∏≠",   # üëà NEW
):
    os.makedirs(out_dir, exist_ok=True)

    if "NEW_BILLNO" not in df.columns:
        raise ValueError("df must contain NEW_BILLNO column")

    groups = list(df.groupby("NEW_BILLNO", sort=True))
    total = len(groups)

    print(f"Generating {total} receipts...\n")

    for i, (new_billno, g) in enumerate(groups, start=1):

        # ---- Progress Line ----
        pct = (i / total) * 100
        print(f"\rProgress: {i}/{total}  ({pct:6.2f}%)  -> {new_billno}", end="")

        pdf_path = os.path.join(out_dir, f"{new_billno}.pdf")

        build_one_receipt_weasy_vat(
            g,
            pdf_path,
            font_regular_path=font_regular_path,
            font_bold_path=font_bold_path,
            signature_img_path=signature_img_path,
            doc_title=doc_title,
        )

    print("\nDone.")
    return out_dir



In [13]:
import pandas as pd

def get_last_two_years_nonvat_sales_lines_last_purchase_vat(
    data,
    *,
    source: str,
    date_col: str = "BILLDATE",
    copy: bool = True,
    verbose: bool = True,
) -> pd.DataFrame:
    """
    Run get_nonvat_sales_lines_last_purchase_vat() for the latest year found in `data`
    and the previous year, then concat results.

    Assumes `data` is a dict-like object of dataframes (your KCW pattern).
    """

    # --- find any dataframe in `data` that has date_col
    candidate_years = []
    for _, obj in data.items():
        if isinstance(obj, pd.DataFrame) and date_col in obj.columns:
            y = pd.to_datetime(obj[date_col], errors="coerce").dt.year.max()
            if pd.notna(y):
                candidate_years.append(int(y))

    if not candidate_years:
        raise ValueError(f"Could not find any DataFrame in `data` with a valid {date_col}")

    latest_year = max(candidate_years)
    years = [latest_year - 1, latest_year]

    # --- loop and concat
    outs = []
    for y in years:
        out = get_nonvat_sales_lines_last_purchase_vat(data, year=y, source=source)
        out = out.copy() if copy else out
        out["YEAR"] = y  # optional but very useful
        outs.append(out)

    result = pd.concat(outs, ignore_index=True)

    if verbose:
        print(f"[last_two_years_nonvat] source={source} years={years} rows={len(result):,}")

    return result

In [14]:
nonvat_sales_lines_last_purchase_vat_hq = get_last_two_years_nonvat_sales_lines_last_purchase_vat(
    data, source="hq"
)

nonvat_sales_lines_last_purchase_vat_syp = get_last_two_years_nonvat_sales_lines_last_purchase_vat(
    data, source="syp"
)

[last_two_years_nonvat] source=hq years=[2025, 2026] rows=72,677
[last_two_years_nonvat] source=syp years=[2025, 2026] rows=15,171


In [15]:
nonvat_sales_lines_last_purchase_vat_hq = filter_last_year(nonvat_sales_lines_last_purchase_vat_hq, "BILLDATE")
nonvat_sales_lines_last_purchase_vat_syp = filter_last_year(nonvat_sales_lines_last_purchase_vat_syp, "BILLDATE")

[filter_last_year] latest=2026-02-26 | cutoff=2025-02-26 | rows=66,828/72,677
[filter_last_year] latest=2026-02-25 | cutoff=2025-02-25 | rows=15,171/15,171


In [16]:
mask = nonvat_sales_lines_last_purchase_vat_hq["BILLNO"].astype("string").str.contains("TF", na=False)

removed_tf = nonvat_sales_lines_last_purchase_vat_hq.loc[mask].copy()
nonvat_sales_lines_last_purchase_vat_hq = nonvat_sales_lines_last_purchase_vat_hq.loc[~mask].copy()

print(f"Removed TF/TFV lines: {len(removed_tf)}")

Removed TF/TFV lines: 11761


In [17]:
mask = nonvat_sales_lines_last_purchase_vat_syp["BILLNO"].astype("string").str.contains("TF", na=False)

removed_tf = nonvat_sales_lines_last_purchase_vat_syp.loc[mask].copy()
nonvat_sales_lines_last_purchase_vat_syp = nonvat_sales_lines_last_purchase_vat_syp.loc[~mask].copy()

print(f"Removed TF/TFV lines: {len(removed_tf)}")

Removed TF/TFV lines: 0


In [18]:
out_syp = nonvat_sales_lines_last_purchase_vat_syp
out_hq = nonvat_sales_lines_last_purchase_vat_hq

In [19]:
out_hq = filter_last_year(out_hq, "BILLDATE")
out_syp = filter_last_year(out_syp, "BILLDATE")

[filter_last_year] latest=2026-02-25 | cutoff=2025-02-25 | rows=55,067/55,067
[filter_last_year] latest=2026-02-25 | cutoff=2025-02-25 | rows=15,171/15,171


In [20]:
import pandas as pd

def split_negative_amount(
    df: pd.DataFrame,
    amount_col: str = "AMOUNT",
    *,
    copy: bool = True,
    verbose: bool = True,
):
    """
    Split dataframe into negative AMOUNT rows and non-negative rows.

    Returns
    -------
    df_negative, df_positive
    """

    if amount_col not in df.columns:
        raise ValueError(f"{amount_col} not found in dataframe")

    # ensure numeric (legacy POS safe)
    amount = pd.to_numeric(df[amount_col], errors="coerce")

    mask_neg = amount < 0

    df_negative = df.loc[mask_neg]
    df_positive = df.loc[~mask_neg]

    if copy:
        df_negative = df_negative.copy()
        df_positive = df_positive.copy()

    if verbose:
        print(
            f"[split_negative_amount] negative={len(df_negative):,} | "
            f"non_negative={len(df_positive):,} | total={len(df):,}"
        )

    return df_negative, df_positive


def join_po_from_simas(
    df_target: pd.DataFrame,
    df_simas: pd.DataFrame,
    *,
    key: str = "BILLNO",
    po_col: str = "PO",
    copy: bool = True,
    verbose: bool = True,
) -> pd.DataFrame:

    if key not in df_target.columns:
        raise ValueError(f"{key} not in df_target")
    if key not in df_simas.columns:
        raise ValueError(f"{key} not in df_simas")
    if po_col not in df_simas.columns:
        raise ValueError(f"{po_col} not in df_simas")

    # --- normalize join keys (non-destructive)
    tgt = df_target.copy()
    sim = df_simas.copy()

    tgt["_JOIN_KEY"] = tgt[key].astype("string").str.strip().str.upper()
    sim["_JOIN_KEY"] = sim[key].astype("string").str.strip().str.upper()

    # lookup table
    simas_lookup = (
        sim[["_JOIN_KEY", po_col]]
        .drop_duplicates(subset=["_JOIN_KEY"])
    )

    result = tgt.merge(
        simas_lookup,
        on="_JOIN_KEY",
        how="left"
    ).drop(columns=["_JOIN_KEY"])

    if copy:
        result = result.copy()

    if verbose:
        matched = result[po_col].notna().sum()
        print(f"[join_po_from_simas] matched PO rows: {matched:,}/{len(result):,}")

    return result

def join_new_billno_to_neg(
    out_hq_neg: pd.DataFrame,
    out_hq_pos: pd.DataFrame,
    *,
    pos_key: str = "BILLNO",
    neg_key: str = "PO",
    new_col: str = "NEW_BILLNO",
    copy: bool = True,
    verbose: bool = True,
) -> pd.DataFrame:

    neg = out_hq_neg.copy()
    pos = out_hq_pos.copy()

    # --- normalize join keys
    neg["_JOIN_KEY"] = neg[neg_key].astype("string").str.strip().str.upper()
    pos["_JOIN_KEY"] = pos[pos_key].astype("string").str.strip().str.upper()

    lookup = (
        pos[["_JOIN_KEY", new_col]]
        .dropna(subset=["_JOIN_KEY"])
        .drop_duplicates(subset=["_JOIN_KEY"])
    )

    result = neg.merge(
        lookup,
        on="_JOIN_KEY",
        how="left"
    ).drop(columns=["_JOIN_KEY"])

    if copy:
        result = result.copy()

    if verbose:
        matched = result[new_col].notna().sum()
        print(f"[join_new_billno_to_neg] matched {matched:,}/{len(result):,}")

    return result

In [21]:
import pandas as pd

def assign_neg_billno_cntar_be(
    df_neg: pd.DataFrame,
    *,
    billdate_col: str = "BILLDATE",
    group_col: str = "NEW_BILLNO",
    out_col: str = "NEG_BILLNO",
    prefix: str = "CNTAR",
    seq_width: int = 3,             # 001, 002, ...
    date_pick: str = "min",         # "min" or "first" date per group
    normalize_group: bool = True,   # normalize NEW_BILLNO (strip+upper)
    copy: bool = True,
    verbose: bool = True,
) -> pd.DataFrame:
    """
    Create negative bill no like: CNTAR6902-001
      - 69 = Buddhist year (AD+543) last 2 digits
      - 02 = month from BILLDATE
      - 001 = sequence per (BBYY,MM), one per NEW_BILLNO group
    """

    if billdate_col not in df_neg.columns:
        raise ValueError(f"{billdate_col} not in df_neg")
    if group_col not in df_neg.columns:
        raise ValueError(f"{group_col} not in df_neg")

    df = df_neg.copy() if copy else df_neg
    df[billdate_col] = pd.to_datetime(df[billdate_col], errors="coerce")

    # group key (stable even with case/spacing issues)
    gkey = df[group_col].astype("string")
    if normalize_group:
        gkey = gkey.str.strip().str.upper()
    df["_GKEY"] = gkey

    # one row per NEW_BILLNO group with the date to base YYMM on
    base = df.dropna(subset=["_GKEY", billdate_col]).copy()
    if date_pick == "first":
        grp = (
            base.sort_values(billdate_col)
                .groupby("_GKEY", as_index=False)
                .first()[["_GKEY", billdate_col]]
        )
    else:  # "min"
        grp = (
            base.groupby("_GKEY", as_index=False)[billdate_col].min()
        )

    # Buddhist year (AD + 543), last 2 digits
    be_year = grp[billdate_col].dt.year + 543
    grp["_BBYY"] = (be_year % 100).astype(int).astype(str).str.zfill(2)

    # month 01-12
    grp["_MM"] = grp[billdate_col].dt.month.astype(int).astype(str).str.zfill(2)

    # sequence per (BBYY,MM) across NEW_BILLNO groups
    grp["_YM"] = grp["_BBYY"] + grp["_MM"]
    grp = grp.sort_values(["_YM", "_GKEY"]).reset_index(drop=True)
    grp["_SEQ"] = grp.groupby("_YM").cumcount() + 1

    grp[out_col] = (
        prefix
        + grp["_YM"]
        + "-"
        + grp["_SEQ"].astype(int).astype(str).str.zfill(seq_width)
    )

    # map back to all rows
    df = df.merge(grp[["_GKEY", out_col]], on="_GKEY", how="left")
    df.drop(columns=["_GKEY"], inplace=True)

    if verbose:
        print(
            f"[assign_neg_billno_cntar_be] groups={len(grp):,} "
            f"assigned_rows={df[out_col].notna().sum():,}/{len(df):,}"
        )

    return df

In [22]:
# 1) split negative lines
out_hq_neg, out_hq_pos = split_negative_amount(out_hq)

# 2) enrich bill numbers to positive lines
out_hq_pos = enrich_sales_with_newbillno_only(
    out_hq_pos,
    source="hq"
)

# 3) getting reference bill from simas PO
df_simas = data['raw_hq_simas_sales_bills.csv'].copy()

out_hq_neg = join_po_from_simas(out_hq_neg, df_simas)

# step 4 ‚Äî map NEW_BILLNO from df_pos
out_hq_neg = join_new_billno_to_neg(out_hq_neg, out_hq_pos)

# step 5 - assign the neg billno
out_hq_neg = assign_neg_billno_cntar_be(out_hq_neg)

[split_negative_amount] negative=1,123 | non_negative=53,944 | total=55,067
[join_po_from_simas] matched PO rows: 46/1,123
[join_new_billno_to_neg] matched 12/1,123
[assign_neg_billno_cntar_be] groups=9 assigned_rows=12/1,123


In [23]:
# 1) split negative lines
out_syp_neg, out_syp_pos = split_negative_amount(out_syp)

# 2) enrich bill numbers to positive lines
out_syp_pos = enrich_sales_with_newbillno_only(
    out_syp_pos,
    source="hq"
)

# 3) getting reference bill from simas PO
df_simas = data['raw_hq_simas_sales_bills.csv'].copy()

out_syp_neg = join_po_from_simas(out_syp_neg, df_simas)

# step 4 ‚Äî map NEW_BILLNO from df_pos
out_syp_neg = join_new_billno_to_neg(out_syp_neg, out_syp_pos)

# step 5 - assign the neg billno
out_syp_neg = assign_neg_billno_cntar_be(out_syp_neg)

[split_negative_amount] negative=372 | non_negative=14,799 | total=15,171
[join_po_from_simas] matched PO rows: 0/372
[join_new_billno_to_neg] matched 0/372
[assign_neg_billno_cntar_be] groups=0 assigned_rows=0/372


In [24]:
import pandas as pd
import numpy as np

def build_bill_summary(
    df: pd.DataFrame,
    *,
    billno_col: str = "NEW_BILLNO",
    neg_billno_col: str = "NEG_BILLNO",   # <-- added
    billdate_col: str = "BILLDATE",
    detail_col: str = "DETAIL",
    amount_col: str = "AMOUNT",
    tax_rate: float = 0.07,
    tax_id_value: str = "0000000000000",
):
    """
    Thai-style VAT rounding:
    TOTAL_AMOUNT = sum(AMOUNT)  (VAT included)
    BEFORE_VAT   = round(TOTAL / (1+tax_rate), 2)
    VAT_AMOUNT   = TOTAL - BEFORE_VAT
    """

    out = df.copy()
    out[billno_col] = out[billno_col].astype("string")
    if neg_billno_col in out.columns:
        out[neg_billno_col] = out[neg_billno_col].astype("string")
    out[amount_col] = pd.to_numeric(out[amount_col], errors="coerce").fillna(0)

    # ===== pick DETAIL from row with highest AMOUNT =====
    idx_max_amt = out.groupby(billno_col)[amount_col].idxmax()
    detail_pick = (
        out.loc[idx_max_amt, [billno_col, detail_col]]
        .set_index(billno_col)[detail_col]
    )

    # ===== group totals + billdate + NEG_BILLNO =====
    agg_dict = {
        "TOTAL_AMOUNT": (amount_col, "sum"),
        "BILLDATE": (billdate_col, "first"),
    }
    # include NEG_BILLNO if present
    if neg_billno_col in out.columns:
        agg_dict["NEG_BILLNO"] = (neg_billno_col, "first")

    totals = out.groupby(billno_col, as_index=False).agg(**agg_dict)

    # ===== Thai VAT calculation =====
    divisor = 1 + tax_rate
    totals["BEFORE_VAT"] = (totals["TOTAL_AMOUNT"] / divisor).round(2)
    totals["VAT_AMOUNT"] = (totals["TOTAL_AMOUNT"] - totals["BEFORE_VAT"]).round(2)

    # attach DETAIL
    totals[detail_col] = totals[billno_col].map(detail_pick)

    # TAX ID (13 zeros)
    totals["TAX_ID"] = str(tax_id_value).zfill(13)[:13]

    # SEQ starting from 1
    totals = totals.sort_values(billno_col).reset_index(drop=True)
    totals["SEQ"] = np.arange(1, len(totals) + 1)

    return totals

In [25]:
hq_bill_pos_summary = build_bill_summary(out_hq_pos)
hq_bill_pos_summary.head()


Unnamed: 0,NEW_BILLNO,TOTAL_AMOUNT,BILLDATE,BEFORE_VAT,VAT_AMOUNT,DETAIL,TAX_ID,SEQ
0,TAR6802-001,14700.0,2025-02-26,13738.32,961.68,‡πÅ‡∏Ç‡∏ô‡∏Å‡∏•‡∏≤‡∏á ‡∏¢‡∏≤‡∏ß 57 ‡πÄ‡∏ã‡∏ô F/6640-TS90,0,1
1,TAR6802-002,9878.0,2025-02-26,9231.78,646.22,"‡∏´‡∏±‡∏ß‡∏´‡∏°‡∏π‡∏Ñ‡∏•‡∏±‡∏ä 12"" ‡∏£‡∏∏‡πà‡∏ô‡∏ï‡∏µ‡∏ô‡∏ú‡∏µ F/6600",0,2
2,TAR6802-003,28180.0,2025-02-26,26336.45,1843.55,‡πÅ‡∏ö‡∏ï‡πÄ‡∏ï‡∏≠‡∏£‡∏µ‡πà FB HYBRID N120 (‡∏ù‡∏≤‡∏î‡∏≥),0,3
3,TAR6802-004,11480.0,2025-02-26,10728.97,751.03,‡∏ô.‡∏°.‡∏Æ. ptt 18LT Hydraulic HLP68,0,4
4,TAR6802-005,24910.0,2025-02-26,23280.37,1629.63,‡∏ô.‡∏°.‡∏Æ. #7884 18LT #7884,0,5


In [26]:
hq_bill_neg_summary = build_bill_summary(out_hq_neg)
hq_bill_neg_summary.head()


Unnamed: 0,NEW_BILLNO,TOTAL_AMOUNT,BILLDATE,NEG_BILLNO,BEFORE_VAT,VAT_AMOUNT,DETAIL,TAX_ID,SEQ
0,TAR6809-031,-2650.0,2026-02-24,CNTAR6902-001,-2476.64,-173.36,‡πÅ‡∏ö‡∏ï‡πÄ‡∏ï‡∏≠‡∏£‡∏µ‡πà FB GOLD DIN 45 L - SMF FB,0,1
1,TAR6809-211,-820.0,2025-10-15,CNTAR6810-001,-766.36,-53.64,CATCH PLATE DELPHI,0,2
2,TAR6901-190,-130.0,2026-02-25,CNTAR6902-002,-121.5,-8.5,‡πÑ‡∏™‡πâ‡∏Å‡∏£‡∏≠‡∏á‡πÇ‡∏ã‡∏•‡πà‡∏≤ ‡∏Å‡∏£‡∏∞‡∏î‡∏≤‡∏© T/T VIGO D-MAX AISIN,0,3
3,TAR6902-179,-3400.0,2026-02-23,CNTAR6902-003,-3177.57,-222.43,‡∏•‡∏π‡∏Å‡∏õ‡∏∑‡∏ô‡∏Ñ‡∏•‡∏±‡∏ä ‡∏Ç‡∏≤‡πÄ‡∏ï‡∏µ‡πâ‡∏¢ DS BIG-M BDI 9. NTN,0,4
4,TAR6902-182,-225.0,2026-02-23,CNTAR6902-004,-210.28,-14.72,‡πÅ‡∏´‡∏ß‡∏ô‡∏•‡πá‡∏≠‡∏Ñ‡πÉ‡∏ô ‡∏ß‡∏á‡∏ô‡∏≠‡∏Å 79.5 ‡∏°‡∏¥‡∏• MCP,0,5


In [27]:
syp_bill_pos_summary = build_bill_summary(out_syp_pos)
syp_bill_pos_summary.head()

Unnamed: 0,NEW_BILLNO,TOTAL_AMOUNT,BILLDATE,BEFORE_VAT,VAT_AMOUNT,DETAIL,TAX_ID,SEQ
0,TAR6806-001,1555.0,2025-06-23,1453.27,101.73,‡∏ô.‡∏°.‡∏Å. PZ GL5 5LT 90 GL5 PZ,0,1
1,TAR6806-002,2097.0,2025-06-24,1959.81,137.19,‡∏ô.‡∏°.‡∏Ñ.‡πÄ‡∏û‡∏≤‡∏ã‡πà‡∏≤‡∏£‡πå‡∏•‡∏≠‡∏á‡πÑ‡∏•‡πâ‡∏ó‡πå Z-7 5+1LT20W-50,0,2
2,TAR6806-003,3260.0,2025-06-25,3046.73,213.27,‡πÅ‡∏ö‡∏ï‡πÄ‡∏ï‡∏≠‡∏£‡∏µ‡πà FB F135 R (WET CHARGE),0,3
3,TAR6806-004,8435.0,2025-06-26,7883.18,551.82,‡πÑ‡∏î‡∏ä‡∏≤‡∏£‡πå‡∏ó 75A 12V ‡∏ô‡∏¥‡∏™‡∏™‡∏±‡∏ô BIG M ‡∏°‡∏µ‡∏ï‡∏π‡∏î‡∏õ‡∏±,0,4
4,TAR6806-005,8280.0,2025-06-26,7738.32,541.68,‡∏Ñ‡∏≤‡∏™‡∏ï‡∏£‡∏≠‡∏• ‡πÅ‡∏°‡πá‡∏Å‡∏ô‡∏≤‡πÄ‡∏ó‡πá‡∏ÅPU 6+1LT 10W30 CI,0,5


In [28]:
syp_bill_neg_summary = build_bill_summary(out_syp_neg)
syp_bill_neg_summary.head()

Unnamed: 0,NEW_BILLNO,TOTAL_AMOUNT,BILLDATE,NEG_BILLNO,BEFORE_VAT,VAT_AMOUNT,DETAIL,TAX_ID,SEQ


In [29]:
import logging

# WeasyPrint
logging.getLogger("weasyprint").setLevel(logging.ERROR)
logging.getLogger("weasyprint.progress").setLevel(logging.ERROR)
logging.getLogger("weasyprint.CSS").setLevel(logging.ERROR)
logging.getLogger("weasyprint.HTML").setLevel(logging.ERROR)

# fontTools (the spam you're seeing)
logging.getLogger("fontTools").setLevel(logging.ERROR)
logging.getLogger("fontTools.subset").setLevel(logging.ERROR)
logging.getLogger("fontTools.ttLib").setLevel(logging.ERROR)

# Optional: also silence warnings from fontTools tables
logging.getLogger("fontTools.ttLib.tables").setLevel(logging.ERROR)

In [30]:
dt = pd.to_datetime(hq_bill_pos_summary["BILLDATE"], errors="coerce")

YEAR  = dt.dt.year.max()
MONTH = dt[dt.dt.year == YEAR].dt.month.max()

print(YEAR, MONTH)

2026 2


In [31]:
hq_bill_pos_summary = filter_year_month(hq_bill_pos_summary, YEAR, MONTH )
hq_bill_neg_summary = filter_year_month(hq_bill_neg_summary, YEAR, MONTH )
syp_bill_pos_summary = filter_year_month(syp_bill_pos_summary, YEAR, MONTH )
syp_bill_neg_summary = filter_year_month(syp_bill_neg_summary, YEAR, MONTH )

In [32]:
import os

kcwdir = os.path.join(BASE_FOLDER, "KCW-Data")
print(kcwdir)

G:\Shared drives\KCW-Data


In [33]:
out_hq_neg = out_hq_neg.rename(columns={
    "NEW_BILLNO": "REF",
    "NEG_BILLNO": "NEW_BILLNO"
})

out_syp_neg = out_syp_neg.rename(columns={
    "NEW_BILLNO": "REF",
    "NEG_BILLNO": "NEW_BILLNO"
})

out_hq_pos_current_month = filter_year_month(out_hq_pos, YEAR, MONTH )
out_hq_neg_current_month = filter_year_month(out_hq_neg, YEAR, MONTH )
out_syp_pos_current_month = filter_year_month(out_syp_pos, YEAR, MONTH )
out_syp_neg_current_month = filter_year_month(out_syp_neg, YEAR, MONTH )

In [34]:
from pathlib import Path

font_regular_path = f"{kcwdir}/kcw_analytics/00_fonts/THSarabunNew/THSarabunNew.ttf"
font_bold_path = f"{kcwdir}/kcw_analytics/00_fonts/THSarabunNew/THSarabunNew-Bold.ttf"
signature_img_path = f"{kcwdir}/kcw_analytics/00_fonts/Signature.jpg"

In [35]:
out_dir = Path(
    kcwdir,
    "kcw_analytics",
    "04_outputs",
    "3TAR",
    f"3TAR_{YEAR}_{MONTH:02d}",
    "PDF"
)

out = build_receipts_by_new_billno_weasy_vat(
    out_syp_pos_current_month,
    out_dir,
    font_regular_path=font_regular_path,
    font_bold_path=font_bold_path,
    signature_img_path=signature_img_path,
)
print("Saved to:", out)

Generating 113 receipts...

Progress: 113/113  (100.00%)  -> TAR6902-113
Done.
Saved to: G:\Shared drives\KCW-Data\kcw_analytics\04_outputs\3TAR\3TAR_2026_02\PDF


In [36]:
out_dir = Path(
    kcwdir,
    "kcw_analytics",
    "04_outputs",
    "3TAR",
    f"3TAR_{YEAR}_{MONTH:02d}",
    "PDF"
)

out = build_receipts_by_new_billno_weasy_vat(
    out_syp_neg_current_month,
    out_dir,
    font_regular_path=font_regular_path,
    font_bold_path=font_bold_path,
    signature_img_path=signature_img_path,
    doc_title="‡πÉ‡∏ö‡∏•‡∏î‡∏´‡∏ô‡∏µ‡πâ",
)
print("Saved to:", out)

Generating 0 receipts...


Done.
Saved to: G:\Shared drives\KCW-Data\kcw_analytics\04_outputs\3TAR\3TAR_2026_02\PDF


In [37]:
out_dir = Path(
    kcwdir,
    "kcw_analytics",
    "04_outputs",
    "TAR",
    f"TAR_{YEAR}_{MONTH:02d}",
    "PDF"
)

out = build_receipts_by_new_billno_weasy_vat(
    out_hq_pos_current_month,
    out_dir,
    font_regular_path=font_regular_path,
    font_bold_path=font_bold_path,
    signature_img_path=signature_img_path,
)
print("Saved to:", out)

Generating 216 receipts...

Progress: 216/216  (100.00%)  -> TAR6902-216
Done.
Saved to: G:\Shared drives\KCW-Data\kcw_analytics\04_outputs\TAR\TAR_2026_02\PDF


In [38]:
out_dir = Path(
    kcwdir,
    "kcw_analytics",
    "04_outputs",
    "TAR",
    f"TAR_{YEAR}_{MONTH:02d}",
    "PDF"
)

out = build_receipts_by_new_billno_weasy_vat(
    out_hq_neg_current_month,
    out_dir,
    font_regular_path=font_regular_path,
    font_bold_path=font_bold_path,
    signature_img_path=signature_img_path,
    doc_title="‡πÉ‡∏ö‡∏•‡∏î‡∏´‡∏ô‡∏µ‡πâ",
)
print("Saved to:", out)

Generating 8 receipts...

Progress: 8/8  (100.00%)  -> CNTAR6902-008
Done.
Saved to: G:\Shared drives\KCW-Data\kcw_analytics\04_outputs\TAR\TAR_2026_02\PDF


In [39]:
import os

output_dir = Path(
    kcwdir,
    "kcw_analytics",
    "04_outputs",
    "TAR"
)
out_hq_pos.to_csv(os.path.join(output_dir, "out_hq_pos.csv"), index=False, encoding="utf-8-sig")
out_hq_neg.to_csv(os.path.join(output_dir, "out_hq_neg.csv"), index=False, encoding="utf-8-sig")

output_dir = f"{kcwdir}/kcw_analytics/04_outputs/3TAR"

out_syp_pos.to_csv(os.path.join(output_dir, "out_syp_pos.csv"), index=False, encoding="utf-8-sig")
out_syp_neg.to_csv(os.path.join(output_dir, "out_syp_neg.csv"), index=False, encoding="utf-8-sig")

output_dir_hq = Path(
    kcwdir,
    "kcw_analytics",
    "04_outputs",
    "TAR",
    f"TAR_{YEAR}_{MONTH:02d}",
    "CSV"
)
os.makedirs(output_dir_hq, exist_ok=True)

hq_bill_pos_summary.to_csv(os.path.join(output_dir_hq, "TAR_{YEAR}_{MONTH}_summary.csv"), index=False, encoding="utf-8-sig")
hq_bill_neg_summary.to_csv(os.path.join(output_dir_hq, "CNTAR_{YEAR}_{MONTH}_summary.csv"), index=False, encoding="utf-8-sig")

output_dir_syp = Path(
    kcwdir,
    "kcw_analytics",
    "04_outputs",
    "3TAR",
    f"3TAR_{YEAR}_{MONTH:02d}",
    "CSV"
)

os.makedirs(output_dir_syp, exist_ok=True)

syp_bill_pos_summary.to_csv(os.path.join(output_dir_syp, "3TAR_{YEAR}_{MONTH}_summary.csv"), index=False, encoding="utf-8-sig")
syp_bill_neg_summary.to_csv(os.path.join(output_dir_syp, "3CNTAR_{YEAR}_{MONTH}_summary.csv"), index=False, encoding="utf-8-sig")
