In [8]:
import pandas as pd
import csv
from sqlalchemy import create_engine
import re

# Build Supabase engine
DATABASE_DSN = "postgresql+psycopg://postgres.avcznjglmqhmzqtsrlfg:Czheyuan0227@aws-0-us-east-2.pooler.supabase.com:6543/postgres?sslmode=require"
engine = create_engine(DATABASE_DSN, pool_pre_ping=True)

#SO
SO_INV = pd.read_sql_table("wo_structured", con=engine, schema="public")
SO = SO_INV[['Order Date', 'Ship Date', 'QB Num', "P. O. #", "Name",'Qty(+)', 'Qty(-)', 'Item', 'Pre/Bare', 'On Hand', "On Hand - WIP"]]
# SO.to_csv('open sales2.csv',index=False,columns =SO)

#"POD"
pod = pd.read_sql_table("Open_Purchase_Orders", con=engine, schema="public")
# pod.to_csv('open purchase2.csv', index=False)

print(f"Loaded POD: {len(pod)} rows")
print(f"Loaded SO : {len(SO)} rows")

Loaded POD: 375 rows
Loaded SO : 866 rows


In [9]:
# Expand Pre-installed items into components
print("=== EXPANDING PRE-INSTALLED ITEMS ===")

NAV= pd.read_sql_table("NT Shipping Schedule", con=engine, schema="public")
# NAV.to_csv('NAV1.csv', index=False)

# ---- helpers ---------------------------------------------------------------

INCL_SPLIT = re.compile(r"\bincluding\b", re.IGNORECASE)
QTYX_RE = re.compile(r"^\s*(\d+)\s*x\s*(.+)\s*$", re.IGNORECASE) # "2x SSD-1TB"

def clean_space(s: str) -> str:
    if not isinstance(s, str):
        return ""
    # Normalize NBSP etc.
    return s.replace('\u00A0', ' ').replace('\u3000', ' ').strip()

def parse_description(desc: str) -> tuple[str, list[str]]:
    """
    Returns (parent_code, component_tokens[])
    e.g. "SEMIL-2047GC-CRL, including i9-13900E, 2x SSD-1TB"
    -> ("SEMIL-2047GC-CRL", ["i9-13900E", "2x SSD-1TB"])
    """
    s = clean_space(desc)
    parts = INCL_SPLIT.split(s, maxsplit=1)
    # parent part may have a trailing ", ..." — keep only before first comma
    parent = clean_space(parts[0].split(",")[0])
    comps = []
    if len(parts) > 1:
        comps = [clean_space(x) for x in parts[1].split(",") if clean_space(x)]
    return parent, comps

def parse_component_token(token: str) -> tuple[str, float]:
    """
    Parses a component token possibly with 'Nx ' prefix.
    Returns (item_code, qty_per_parent).
    """
    m = QTYX_RE.match(token)
    if m:
        qty = float(m.group(1))
        item = clean_space(m.group(2))
        return item, qty
    return clean_space(token), 1.0

def expand_preinstalled_row(row: pd.Series) -> pd.DataFrame:
    """
    For a single NAV row (Pre-installed), expand into multiple rows:
    - one row per component with Qty(+) multiplied by qty_per_parent
    - (optional) one row for the parent itself if you want to track it too
    """
    parent, tokens = parse_description(row.get("Description", ""))
    base_qty = float(row.get("Qty(+)", 0) or 0)
    parent_item = parent or clean_space(str(row.get("Item", "")))

    # Build component rows
    comp_rows = []
    for tok in tokens:
        item, qty_per = parse_component_token(tok)
        out = row.copy()
        out["Parent_Item"] = parent_item
        out["Item"] = item
        out["Qty_per_parent"] = qty_per
        out["Qty(+)"] = base_qty * qty_per  # multiply
        out["IsParent"] = False
        comp_rows.append(out)

    # Always include the parent row too (helps tracing/diagnostics).
    parent_row = row.copy()
    parent_row["Parent_Item"] = parent_item
    parent_row["Item"] = parent_item
    parent_row["Qty_per_parent"] = 1.0
    parent_row["IsParent"] = True

    if comp_rows:
        return pd.concat([pd.DataFrame(comp_rows), pd.DataFrame([parent_row])],
                         ignore_index=True)
    else:
        # No components in description → only the parent
        return pd.DataFrame([parent_row])

# ---- pipeline --------------------------------------------------------------

def expand_nav_preinstalled(NAV: pd.DataFrame) -> pd.DataFrame:
    NAV = NAV.copy()

    # --- sanity columns ---
    for col in ["Pre/Bare", "Qty(+)", "Item"]:
        if col not in NAV.columns:
            raise ValueError(f"NAV must contain '{col}' column.")
    if "Description" not in NAV.columns:
        NAV["Description"] = ""

    NAV["Description"] = NAV["Description"].astype(str).apply(clean_space)

    # --- split (make copies to avoid SettingWithCopyWarning) ---
    pre_mask = NAV["Pre/Bare"].astype(str).str.strip().str.casefold().eq("pre")
    nav_pre   = NAV.loc[pre_mask].copy()
    nav_other = NAV.loc[~pre_mask].copy()

    # --- expand all Pre rows ---
    expanded_parts = [expand_preinstalled_row(r) for _, r in nav_pre.iterrows()]
    expanded_pre = (
        pd.concat(expanded_parts, ignore_index=True)
        if expanded_parts else nav_pre.copy()
    )

    # --- ensure consistent columns via reindex (no chained assigns) ---
    needed_cols = list(NAV.columns) + ["Parent_Item", "Qty_per_parent", "IsParent"]

    expanded_pre = expanded_pre.reindex(columns=needed_cols, fill_value=pd.NA)
    nav_other    = nav_other.reindex(columns=needed_cols, fill_value=pd.NA)

    # --- set natural parent and flags for non-Pre ---
    nav_other.loc[:, "Parent_Item"]     = nav_other["Item"]
    nav_other.loc[:, "Qty_per_parent"]  = 1.0
    nav_other.loc[:, "IsParent"]        = True  # single line is its own parent

    # --- merge back ---
    expanded_all = pd.concat([expanded_pre, nav_other], ignore_index=True)

    # --- enforce dtypes ---
    expanded_all["Qty(+)"]         = pd.to_numeric(expanded_all["Qty(+)"], errors="coerce").fillna(0.0)
    expanded_all["Qty_per_parent"] = pd.to_numeric(expanded_all["Qty_per_parent"], errors="coerce").fillna(1.0)
    expanded_all["IsParent"]       = expanded_all["IsParent"].astype(bool)
    expanded_all["Date"] = pd.to_datetime(expanded_all["Ship Date"], errors="coerce") + pd.Timedelta(days=5)


    return expanded_all

NAV_EXP = expand_nav_preinstalled(NAV)

print(f"Original NAV: {len(NAV)} rows")
print(f"Expanded to: {len(NAV_EXP)} total rows")
print(f"Expanded pre-installed compoents: {len(NAV_EXP.loc[(NAV_EXP['Pre/Bare'] == 'Pre') & (NAV_EXP['IsParent'] == False) ])} total rows")
print(f"Pre: {len(NAV_EXP[NAV_EXP['Pre/Bare'] == 'Pre'])}")
print(f"Bare: {len(NAV_EXP[NAV_EXP['Pre/Bare'] == 'Bare'])}")

=== EXPANDING PRE-INSTALLED ITEMS ===
Original NAV: 349 rows
Expanded to: 481 total rows
Expanded pre-installed compoents: 132 total rows
Pre: 194
Bare: 287


In [6]:
NAV_EXP.loc[(NAV_EXP['Pre/Bare'] == 'Pre')].to_excel('Pre-installed items.xlsx')

## ENHANCE POD

In [10]:
# Enhanced POD Processing with Pre/Bare Logic
print("=== ENHANCED POD PROCESSING ===")

import pandas as pd
from pandas.api.types import is_datetime64_any_dtype as is_dt

# --- 0) column names you want to match on ---
KEYS = ["QB Num", "Item", "Qty(+)"]

def _norm_qty(s: pd.Series, ndigits: int = 3) -> pd.Series:
    """Round qty for stable joins (24 vs 24.0)."""
    s = pd.to_numeric(s, errors="coerce")
    return s.round(ndigits)

def _norm_str(s: pd.Series) -> pd.Series:
    return s.astype(str).str.strip()

# --- 1) Normalize both frames ---
nav = NAV_EXP.copy()
pod_ = pod.copy()

# Ensure Date/Deliv Date are datetime
if not is_dt(nav.get("Date", pd.Series([], dtype="datetime64[ns]"))):
    nav["Date"] = pd.to_datetime(nav["Date"], errors="coerce")
if not is_dt(pod_.get("Deliv Date", pd.Series([], dtype="datetime64[ns]"))):
    pod_["Deliv Date"] = pd.to_datetime(pod_["Deliv Date"], errors="coerce")

# Normalize join keys
for c in ["QB Num", "Item"]:
    nav[c] = _norm_str(nav[c])
    pod_[c] = _norm_str(pod_[c])

nav["Qty(+)"] = _norm_qty(nav["Qty(+)"])
pod_["Qty(+)"] = _norm_qty(pod_["Qty(+)"])

# --- 2) Collapse NAV to one row per key (earliest Date) ---
nav_keyed = (
    nav.dropna(subset=["Date"])[KEYS + ["Date"]]
       .groupby(KEYS, as_index=False)["Date"].min()
       .rename(columns={"Date": "Date_from_NAV"})
)

# --- 3) Join and update ---
merged = pod_.merge(nav_keyed, on=KEYS, how="left")

# keep a backup if you want to audit
merged["Deliv Date (old)"] = merged["Deliv Date"]

# overwrite when we have a NAV date
mask = merged["Date_from_NAV"].notna()
merged.loc[mask, "Deliv Date"] = merged.loc[mask, "Date_from_NAV"]

# optional: report what changed / unmatched
updated_rows = int(mask.sum())
unmatched = merged.loc[~mask, KEYS].copy()

print(f"Updated Deliv Date for {updated_rows} rows.")
if not unmatched.empty:
    print("No NAV date match for these key rows:")
    print(unmatched.to_string(index=False))

# final result
pod_updated = merged.drop(columns=["Date_from_NAV"])

=== ENHANCED POD PROCESSING ===
Updated Deliv Date for 235 rows.
No NAV date match for these key rows:
    QB Num                                           Item  Qty(+)
POD-251404            Cbl-M12A5F-OT2-Black-Red-Fuse-100CM     4.0
POD-251474                     Cbl-W4M-M12A5F-40CM-PK-CAN     4.0
POD-251261                                     DtC-M12-WP    18.0
POD-251474                                     DtC-M12-WP     4.0
POD-251463                      TmP-XR-HL-29.5x29.5x5.0mm    10.0
POD-251465                                    FAN-CPU-RM1     1.0
POD-251351                                       i5-14500     8.0
POD-251470                                       i7-12700     2.0
POD-251465                                       i7-14700     1.0
POD-251406                                      i7-9700TE     5.0
POD-251348                                       i9-13900     4.0
POD-251446                                      i9-13900E    37.0
POD-251474                             

In [11]:
pod_updated

Unnamed: 0,Order Date,QB Num,Name,Item,Deliv Date,Qty(+),Deliv Date (old)
0,2025/07/24,POD-251047,Neousys Technology Incorp.,AccsyBx-6AntiVG-POC-551VTC,2025-11-26,24.0,2025-11-21
1,2025/07/24,POD-251048,Neousys Technology Incorp.,AccsyBx-6AntiVG-POC-551VTC,2025-12-24,29.0,2025-12-19
2,2025/10/13,POD-251444,Neousys Technology Incorp.,AccsyBx-Cardholder-10108GC-5080_70_70Ti,2025-10-27,5.0,2025-10-22
3,2025/10/03,POD-251418,Neousys Technology Incorp.,AccsyBx-FAN-NRU-100,2025-10-27,5.0,2025-10-22
4,2025/10/13,POD-251444,Neousys Technology Incorp.,AccsyBx-FAN-Nuvo9531_9501,2025-10-27,15.0,2025-10-22
...,...,...,...,...,...,...,...
370,2025/04/21,POD-250574,Neousys Technology Incorp.,,2025-11-07,1.0,2025-11-07
371,2025/04/21,POD-250575,Neousys Technology Incorp.,Certification,2025-11-07,1.0,2025-11-07
372,2025/08/28,POD-251228,Neousys Technology Incorp.,Engineer,2025-09-12,1.0,2025-09-12
373,2025/09/04,POD-251262,Neousys Technology Incorp.,Engineer,2025-09-19,1.0,2025-09-19


In [16]:
import pandas as pd
from pandas.api.types import is_datetime64_any_dtype as is_dt

def split_pod_by_nav(pod: pd.DataFrame, nav_exp: pd.DataFrame, tol=1e-6):
    """
    Returns:
      pod_splitted : POD split into NAV-covered partial rows + residual rows
      pod_unmatched: POD rows with no NAV rows for the same (QB Num, Item)
    """
    def _norm_str(s): return s.astype(str).str.strip()
    def _norm_qty(s): return pd.to_numeric(s, errors="coerce")
    def _norm_dt(x, col):
        if not is_dt(x.get(col, pd.Series([], dtype="datetime64[ns]"))):
            x[col] = pd.to_datetime(x[col], errors="coerce")
        return x

    pod_ = pod.copy()
    nav_ = nav_exp.copy()

    # Normalize
    for df in (pod_, nav_):
        for c in ["QB Num", "Item"]:
            if c in df: df[c] = _norm_str(df[c])
        if "Qty(+)" in df: df["Qty(+)"] = _norm_qty(df["Qty(+)"])

    pod_ = _norm_dt(pod_, "Deliv Date")
    nav_ = _norm_dt(nav_, "Date")

    # NAV rows we can allocate against
    nav_trim = nav_.loc[nav_["Qty(+)"] > 0, ["QB Num", "Item", "Qty(+)", "Date"]].copy()
    nav_trim.sort_values(["QB Num", "Item", "Date"], inplace=True)

    # ---- UNMATCHED POD (no NAV rows for this (QB Num, Item)) ----
    nav_pairs = nav_trim[["QB Num", "Item"]].drop_duplicates()
    pod_unmatched = (
        pod_.merge(nav_pairs, on=["QB Num", "Item"], how="left", indicator=True)
            .query('_merge == "left_only"')
            .drop(columns="_merge")
            .assign(Split_Note="Unmatched (no NAV receipts)")
    )

    # Build lookup for matched pairs
    nav_groups = {
        k: g[["Date", "Qty(+)"]].reset_index(drop=True)
        for k, g in nav_trim.groupby(["QB Num", "Item"])
    }

    out_rows = []
    # Allocate for POD rows that DO have at least one NAV row
    pod_matched_mask = pod_.merge(nav_pairs, on=["QB Num", "Item"], how="left", indicator=True)["_merge"] == "both"
    for _, r in pod_.loc[pod_matched_mask].iterrows():
        qb = r["QB Num"]; it = r["Item"]
        qty_needed = float(r.get("Qty(+)", 0) or 0)
        orig_deliv = r.get("Deliv Date")

        nav_list = nav_groups.get((qb, it))
        if nav_list is not None:
            for i in range(len(nav_list)):
                if qty_needed <= tol: break
                take = min(qty_needed, float(nav_list.loc[i, "Qty(+)"] or 0))
                if take > tol:
                    row = r.copy()
                    row["Qty(+)"] = take
                    row["Deliv Date"] = pd.to_datetime(nav_list.loc[i, "Date"])
                    row["Split Note"] = "Allocated from NAV"
                    out_rows.append(row)
                    qty_needed -= take

        # Residual not covered by NAV
        if qty_needed > tol:
            row = r.copy()
            row["Qty(+)"] = qty_needed
            row["Deliv Date"] = orig_deliv
            row["Split Note"] = "Residual (not yet covered by NAV)"
            out_rows.append(row)

    pod_splitted = pd.DataFrame(out_rows)

    # Column order and sort
    prefer = ["Order Date", "QB Num", "Name", "Item", "Qty(+)", "Deliv Date", "Split Note"]
    cols = [c for c in prefer if c in pod_splitted.columns] + [c for c in pod_splitted.columns if c not in prefer]
    if len(pod_splitted):
        pod_splitted = pod_splitted[cols].sort_values(["QB Num", "Item", "Deliv Date"], kind="mergesort")

    return pod_splitted, pod_unmatched



In [18]:
pod_splitted, pod_unmatched = split_pod_by_nav(pod, NAV_EXP)

# 1) All split/allocated/residual rows:
display(pod_splitted)

# 2) POD rows with zero NAV receipts (what you asked for):
display(pod_unmatched)

# 3) Only residuals (partially covered but still short):
residuals = pod_splitted[pod_splitted["Split Note"] == "Residual (not yet covered by NAV)"]
display(residuals)

pod_unmatched.to_excel('Unmatched items.xlsx')


Unnamed: 0,Order Date,QB Num,Name,Item,Qty(+),Deliv Date,Split Note
22,2025/05/05,POD-250648,Neousys Technology Incorp.,Cbl-M12S4F-OW4-180CM1,10.0,2025-11-24,Allocated from NAV
308,2025/05/05,POD-250648,Neousys Technology Incorp.,SEMIL-1708-FF,10.0,2025-11-24,Allocated from NAV
67,2025/05/05,POD-250648,Neousys Technology Incorp.,i7-9700TE,10.0,2025-11-24,Allocated from NAV
23,2025/05/05,POD-250649,Neousys Technology Incorp.,Cbl-M12S4F-OW4-180CM1,10.0,2025-12-22,Allocated from NAV
309,2025/05/05,POD-250649,Neousys Technology Incorp.,SEMIL-1708-FF,10.0,2025-12-22,Allocated from NAV
...,...,...,...,...,...,...,...
138,2025/10/17,POD-251468,Neousys Technology Incorp.,DDR5-8GB-48-SM,10.0,NaT,Allocated from NAV
278,2025/10/17,POD-251468,Neousys Technology Incorp.,POC-715-UL,10.0,NaT,Allocated from NAV
232,2025/10/17,POD-251469,Neousys Technology Incorp.,Nuvo-9160GC-PoE,8.0,NaT,Allocated from NAV
77,2025/10/17,POD-251472,Neousys Technology Incorp.,AccsyBx-Pnl-Nuvo-9208VTC-CST,25.0,NaT,Allocated from NAV


Unnamed: 0,Order Date,QB Num,Name,Item,Deliv Date,Qty(+),Split_Note
19,2025/10/02,POD-251404,Neousys Technology Incorp.,Cbl-M12A5F-OT2-Black-Red-Fuse-100CM,2025-10-29,4.0,Unmatched (no NAV receipts)
39,2025/10/20,POD-251474,Neousys Technology Incorp.,Cbl-W4M-M12A5F-40CM-PK-CAN,2025-11-12,4.0,Unmatched (no NAV receipts)
43,2025/09/04,POD-251261,Neousys Technology Incorp.,DtC-M12-WP,2025-10-08,18.0,Unmatched (no NAV receipts)
45,2025/10/20,POD-251474,Neousys Technology Incorp.,DtC-M12-WP,2025-11-12,4.0,Unmatched (no NAV receipts)
59,2025/10/16,POD-251465,"Newegg Business, Inc.",FAN-CPU-RM1,2025-10-24,1.0,Unmatched (no NAV receipts)
...,...,...,...,...,...,...,...
370,2025/04/21,POD-250574,Neousys Technology Incorp.,,2025-11-07,1.0,Unmatched (no NAV receipts)
371,2025/04/21,POD-250575,Neousys Technology Incorp.,Certification,2025-11-07,1.0,Unmatched (no NAV receipts)
372,2025/08/28,POD-251228,Neousys Technology Incorp.,Engineer,2025-09-12,1.0,Unmatched (no NAV receipts)
373,2025/09/04,POD-251262,Neousys Technology Incorp.,Engineer,2025-09-19,1.0,Unmatched (no NAV receipts)


Unnamed: 0,Order Date,QB Num,Name,Item,Qty(+),Deliv Date,Split Note


In [237]:
# Normalize
nav_items = set(Bare_NAV['Item'].unique())
pod_items = set(pod['Item'].unique())

# Common + uncommon
common_bare_items = nav_items & pod_items
only_in_nav = nav_items - pod_items          # items in NAV Bare, not in POD
only_in_pod = pod_items - nav_items          # items in POD, not in NAV
uncommon = nav_items ^ pod_items             # in exactly one side

print(f"Common: {len(common_bare_items)}")
print(f"Only in NAV: {len(only_in_nav)}")
print(f"Only in POD: {len(only_in_pod)}")
print(f"Uncommon (symmetric diff): {len(uncommon)}")

only_in_nav


Common: 113
Only in NAV: 5
Only in POD: 130
Uncommon (symmetric diff): 135


{'AccsyBx-Cardholder-9160GC-2000EAda',
 'Cbl-M12A17M-VGA-180CM3',
 'Cbl-M12A8M-2DB9M_OW2-180CM1',
 'PA-280W-CW6P-2P-1',
 'RGS-8805GC'}