## Pdf_WO VS. Open sales order.xlsx

In [2]:
mappings = {
    'M.280-SSD-256GB-PCIe44-TLC5WT-T': 'M.280-SSD-256GB-PCIe44-TLC5WT-TD',
    'M.280-SSD-512GB-PCIe44-TLC5WT-T': 'M.280-SSD-512GB-PCIe44-TLC5WT-TD',
    'M.242-SSD-256GB-PCIe34-TLC5WT-T': 'M.242-SSD-256GB-PCIe34-TLC5WT-TD',
    'M.242-SSD-512GB-PCIe34-TLC5WT-T': 'M.242-SSD-512GB-PCIe34-TLC5WT-TD',
    'M.242-SSD-128GB-PCIe34-TLC5WT-T': 'M.242-SSD-128GB-PCIe34-TLC5WT-TD',
    'Cblkit-FP-NRU-230V-AWP_NRU-240S': 'Cblkit-FP-NRU-230V-AWP_NRU-240S-AWP',
}

In [14]:
import logging
from config import DATABASE_DSN, DB_SCHEMA, TBL_INVENTORY, TBL_STRUCTURED, TBL_SALES_ORDER, TBL_POD, TBL_Shipping, TBL_LEDGER, TBL_ITEM_SUMMARY
from io_ops import (
    extract_inputs, write_to_db, write_final_sales_order_to_gsheet,
    save_not_assigned_so, fetch_word_files_df, fetch_pdf_orders_df_from_supabase
)
from core import (
    transform_sales_order, transform_inventory, transform_pod, transform_shipping,
    build_structured_df, prepare_erp_view, add_onhand_minus_wip
)
from ledger import build_ledger, expand_nav_preinstalled
import pandas as pd

logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")

def transform_sales_order(df_sales_order: pd.DataFrame) -> pd.DataFrame:
    df = df_sales_order.copy()
    df["partial"] = df["Qty"] != df["Backordered"]
    df = df.drop(columns = ['Qty', 'Item'])
    df = df.rename(columns={"Unnamed: 0": "Item", "Num": "QB Num", "Backordered": "Qty(-)", "Date":"Order Date"})
    df["Item"] = df["Item"].ffill().astype(str).str.strip()
    df = df[~df["Item"].str.startswith("total", na=False)]
    df = df[~df["Item"].str.lower().isin(["forwarding charge", "tariff (estimation)"])]
    df = df[df["Inventory Site"] == "WH01S-NTA"]
    return df

so_raw, inv_raw, ship_raw, pod_raw = extract_inputs()
so_full = transform_sales_order(so_raw)
inv     = transform_inventory(inv_raw)
pod     = transform_pod(pod_raw)
ship    = transform_shipping(ship_raw)
word_files_df = fetch_word_files_df("http://192.168.60.133:5001/api/word-files")
pdf_orders_df = fetch_pdf_orders_df_from_supabase(DATABASE_DSN)
# Structured output
structured, final_sales_order = build_structured_df(so_full, word_files_df, inv, pdf_orders_df, pod)

so_full[so_full['QB Num'] == 'SO-20251368']

Unnamed: 0,Item,Type,Order Date,Ship Date,Deliv Date,Terms,Due Date,QB Num,P. O. #,Name,Invoiced,Qty(-),Amount,Rep,Open Balance,Inventory Site,partial
624,DDR4-32GB-WT32-SM,Sales Order,09/26/2025,01/09/2026,,Prepay,09/26/2025,SO-20251368,PO-16732,Maritime Applied Physics Corporation,0.0,30.0,6210.0,A777,6210.0,WH01S-NTA,False
754,Wmkit-POC465AWP,Sales Order,09/26/2025,01/09/2026,,Prepay,09/26/2025,SO-20251368,PO-16732,Maritime Applied Physics Corporation,0.0,30.0,360.0,A777,360.0,WH01S-NTA,False
1097,Cblkit-M12-POC-465AWP,Sales Order,09/26/2025,01/09/2026,,Prepay,09/26/2025,SO-20251368,PO-16732,Maritime Applied Physics Corporation,0.0,30.0,3900.0,A777,3900.0,WH01S-NTA,False
1117,POC-465AWP,Sales Order,09/26/2025,01/09/2026,,Prepay,09/26/2025,SO-20251368,PO-16732,Maritime Applied Physics Corporation,0.0,30.0,22170.0,A777,22170.0,WH01S-NTA,False
1432,M.280-SSD-1TB-SATA-TLC5WT-TD,Sales Order,09/26/2025,01/09/2026,,Prepay,09/26/2025,SO-20251368,PO-16732,Maritime Applied Physics Corporation,0.0,30.0,5400.0,A777,5400.0,WH01S-NTA,False


In [3]:
import os
import pandas as pd
from sqlalchemy import create_engine

# 1. Build engine
DATABASE_DSN = (
    "postgresql://postgres.avcznjglmqhmzqtsrlfg:Czheyuan0227@"
    "aws-0-us-east-2.pooler.supabase.com:6543/postgres?sslmode=require"
)
engine = create_engine(DATABASE_DSN, pool_pre_ping=True)


def fetch_pdf_orders_df_from_supabase(dsn: str) -> pd.DataFrame:
    """
    Reads order_id + extracted_data from public.pdf_file_log and returns a
    two-column DataFrame with ['WO','Product Number'] rows, one per item in JSON.
    """
    eng = create_engine(dsn, pool_pre_ping=True)
    rows = pd.read_sql('SELECT order_id, extracted_data FROM public.pdf_file_log', eng)

    def rows_from_json(extracted_data, order_id=""):
        # extracted_data may be JSON string or dict
        if isinstance(extracted_data, str):
            try:
                extracted_data = json.loads(extracted_data)
            except Exception:
                extracted_data = {}
        data = extracted_data or {}
        wo = data.get("wo", order_id)
        items = data.get("items") or []

        # one row per item; if none, keep a placeholder
        if not items:
            return [{"WO": wo, "Product Number": ""}]
        out = []
        for it in items:
            pn = (
                it.get("product_number")
                or it.get("part_number")
                or it.get("product")
                or it.get("part")
                or ""
            )
            out.append({"WO": wo, "Product Number": pn})
        return out

    all_rows = []
    for _, r in rows.iterrows():
        all_rows.extend(rows_from_json(r.get("extracted_data"), r.get("order_id")))

    return pd.DataFrame(all_rows, columns=["WO", "Product Number"])

pdf_orders_df = fetch_pdf_orders_df_from_supabase(DATABASE_DSN)

pdf_orders_df[pdf_orders_df['WO'] == 'SO-20251329']

Unnamed: 0,WO,Product Number
14587,SO-20251329,POC-410
14588,SO-20251329,DDR4-16GB-32-SM
14589,SO-20251329,M.280-SSD-256GB-SATA-TLC5WT-TD
14590,SO-20251329,Win11IoT24-Entry
14591,SO-20251329,SSD-512GB-TLC5ET-PN


In [4]:
ref = pdf_orders_df.copy()
ref['__pos_out'] = ref.groupby('WO').cumcount()                 # position within WO
ref['__occ'] = ref.groupby(['WO','Product Number']).cumcount()  # occurrence index for duplicates
ref_key = ref[['WO','Product Number','__occ','__pos_out']]
ref[ref['WO'] == 'SO-20251329']

Unnamed: 0,WO,Product Number,__pos_out,__occ
14587,SO-20251329,POC-410,0,0
14588,SO-20251329,DDR4-16GB-32-SM,1,0
14589,SO-20251329,M.280-SSD-256GB-SATA-TLC5WT-TD,2,0
14590,SO-20251329,Win11IoT24-Entry,3,0
14591,SO-20251329,SSD-512GB-TLC5ET-PN,4,0


In [5]:
import json, re, numpy as np, pandas as pd
df_sales_order = pd.read_sql_table("open_sales_orders", con=engine, schema="public")
# df_sales_order = df_sales_order = pd.read_csv(r"C:\Users\Admin\OneDrive - neousys-tech\Share NTA Warehouse\Daily Update\Open Sales Order 9_22_2025.CSV", encoding="ISO-8859-1")

def transform_sales_order(df_sales_order: pd.DataFrame) -> pd.DataFrame:
    df = df_sales_order.copy()
    df = df.rename(columns={"Unnamed: 0": "Component", "Num": "WO_Number", "Backordered": "Qty"})
    df["Component"] = df["Component"].ffill().astype(str).str.strip()
    df = df[~df["Component"].str.startswith("total", na=False)]
    df = df[~df["Component"].str.lower().isin(["forwarding charge", "tariff (estimation)"])]
    if "Inventory Site" in df.columns:
        df = df[df["Inventory Site"] == "WH01S-NTA"]
    df['Component'] = df['Component'].replace(mappings)
    return df

df_sales_order = transform_sales_order(df_sales_order)

# Build df_out from Sales Order 
needed_cols = {
    "Name": "Customer",
    "P. O. #": "Customer PO",
    "WO_Number": "WO",
    "Component": "Product Number",
    "Backordered": "Qty",
    "Ship Date": "Lead Time"
}
for c in ["Customer","PO"]:
    if c not in df_sales_order.columns:
        df_sales_order[c] = ""

df_out = df_sales_order.rename(columns=needed_cols)[list(needed_cols.values())]

# Sort to group visually by WO, then by Product Number
df_out = df_out.sort_values(['WO', 'Product Number']).reset_index(drop=True)

tgt = df_out.copy()
tgt['__occ'] = tgt.groupby(['WO','Product Number']).cumcount()

# 3) Merge positions from output_df to df_out rows (match by WO + Product + occurrence)
merged = tgt.merge(ref_key, on=['WO','Product Number','__occ'], how='left')

# 4) For rows not present in output_df, keep their original within-WO order but push them after the matched ones
merged['__fallback'] = merged.groupby('WO').cumcount()
merged['__pos_out'] = merged['__pos_out'].fillna(np.inf)

df_sales_order[df_sales_order['WO_Number'] == 'SO-20251329']

Unnamed: 0,Component,Type,Date,Ship Date,Deliv Date,Terms,Due Date,WO_Number,P. O. #,Name,Invoiced,Qty,Amount,Item,Rep,Open Balance,Inventory Site,Customer,PO
440,DDR4-16GB-32-SM,Sales Order,09/18/2025,10/03/2025,,Net 30,10/18/2025,SO-20251329,00505698(2),LASERAX INC,0.0,3.0,306.0,Memory Module:DDR4-16GB-32-SM,A702,306.0,WH01S-NTA,,
665,POC-410,Sales Order,09/18/2025,10/03/2025,,Net 30,10/18/2025,SO-20251329,00505698(2),LASERAX INC,0.0,3.0,1536.0,POC-400 Series:POC-410,A702,1536.0,WH01S-NTA,,
829,M.280-SSD-256GB-SATA-TLC5WT-TD,Sales Order,09/18/2025,10/03/2025,,Net 30,10/18/2025,SO-20251329,00505698(2),LASERAX INC,0.0,3.0,183.0,Storage:M.280-SSD-256GB-SATA-TLC5WT-TD,A702,183.0,WH01S-NTA,,
909,SSD-512GB-TLC5ET-PN,Sales Order,09/18/2025,10/03/2025,,Net 30,10/18/2025,SO-20251329,00505698(2),LASERAX INC,0.0,1.0,84.0,Storage:SSD-512GB-TLC5ET-PN,A702,84.0,WH01S-NTA,,
935,Win11IoT24-Entry,Sales Order,09/18/2025,10/03/2025,,Net 30,10/18/2025,SO-20251329,00505698(2),LASERAX INC,0.0,3.0,153.0,Windows OS:Win11IoT24-Entry,A702,153.0,WH01S-NTA,,


In [6]:
merged[merged['WO'] == 'SO-20251329']

Unnamed: 0,Customer,Customer.1,Customer PO,WO,Product Number,Qty,Lead Time,__occ,__pos_out,__fallback
917,LASERAX INC,,00505698(2),SO-20251329,DDR4-16GB-32-SM,3.0,10/03/2025,0,1.0,0
918,LASERAX INC,,00505698(2),SO-20251329,M.280-SSD-256GB-SATA-TLC5WT-TD,3.0,10/03/2025,0,2.0,1
919,LASERAX INC,,00505698(2),SO-20251329,POC-410,3.0,10/03/2025,0,0.0,2
920,LASERAX INC,,00505698(2),SO-20251329,SSD-512GB-TLC5ET-PN,1.0,10/03/2025,0,4.0,3
921,LASERAX INC,,00505698(2),SO-20251329,Win11IoT24-Entry,3.0,10/03/2025,0,3.0,4


In [7]:
inf =merged[merged['__pos_out'] == np.inf]
inf.to_excel(r"C:\Users\Admin\OneDrive - neousys-tech\Desktop\Output.xlsx", sheet_name="Sheet3", index=False)

In [8]:
def reorder_df_out_by_output(output_df: pd.DataFrame, df_out: pd.DataFrame) -> pd.DataFrame:
    # 1) output_df is from PDFs, df_out is from Excel open sales order
    ref = output_df.copy()
    ref['__pos_out'] = ref.groupby('WO').cumcount()                 # position within WO
    ref['__occ'] = ref.groupby(['WO','Product Number']).cumcount()  # occurrence index for duplicates
    ref_key = ref[['WO','Product Number','__occ','__pos_out']]

    # 2) On df_out, tag each duplicate with its own occurrence index
    tgt = df_out.copy()
    tgt['__occ'] = tgt.groupby(['WO','Product Number']).cumcount()

    # 3) Merge positions from output_df to df_out rows (match by WO + Product + occurrence)
    merged = tgt.merge(ref_key, on=['WO','Product Number','__occ'], how='left')

    # 4) For rows not present in output_df, keep their original within-WO order but push them after the matched ones
    merged['__fallback'] = merged.groupby('WO').cumcount()
    merged['__pos_out'] = merged['__pos_out'].fillna(np.inf)

    # 5) Final order: by WO, then by output_df position; if missing, by original order
    ordered = (merged
               .sort_values(['WO','__pos_out','__fallback'])
               .drop(columns=['__occ','__pos_out','__fallback'])
               .reset_index(drop=True))
    return ordered


final_sales_order = reorder_df_out_by_output(pdf_orders_df, df_out)

final_sales_order['Product Number'] = final_sales_order['Product Number'].replace(mappings)

final_sales_order = final_sales_order.loc[:, ~final_sales_order.columns.duplicated()]


In [9]:
final_sales_order[final_sales_order['WO'] == 'SO-20251329']

Unnamed: 0,Customer,Customer PO,WO,Product Number,Qty,Lead Time
917,LASERAX INC,00505698(2),SO-20251329,POC-410,3.0,10/03/2025
918,LASERAX INC,00505698(2),SO-20251329,DDR4-16GB-32-SM,3.0,10/03/2025
919,LASERAX INC,00505698(2),SO-20251329,M.280-SSD-256GB-SATA-TLC5WT-TD,3.0,10/03/2025
920,LASERAX INC,00505698(2),SO-20251329,Win11IoT24-Entry,3.0,10/03/2025
921,LASERAX INC,00505698(2),SO-20251329,SSD-512GB-TLC5ET-PN,1.0,10/03/2025


In [10]:
wo = "SO-20251329"
pdf_set = set(ref.loc[ref['WO'] == wo, 'Product Number'])
xls_set = set(tgt.loc[tgt['WO'] == wo, 'Product Number'])
print("Only in Excel:", xls_set - pdf_set)
print("Only in PDF:",  pdf_set - xls_set)


Only in Excel: set()
Only in PDF: set()


In [1]:
from __future__ import annotations
import re
import numpy as np
import pandas as pd
from pandas.api.types import CategoricalDtype
from core import _norm_cols

## 1) NAV (shipping) → expand pre-installed components
INCL_SPLIT = re.compile(r"\bincluding\b", re.IGNORECASE)
QTYX_RE = re.compile(r"^\s*(\d+)\s*x\s*(.+)\s*$", re.IGNORECASE)  # "2x SSD-1TB"

def clean_space(s: str) -> str:
    if not isinstance(s, str):
        return ""
    return s.replace("\u00A0", " ").replace("\u3000", " ").strip()

def parse_description(desc: str) -> tuple[str, list[str]]:
    s = clean_space(desc)
    parts = INCL_SPLIT.split(s, maxsplit=1)
    parent = clean_space(parts[0].split(",")[0])
    comps = []
    if len(parts) > 1:
        comps = [clean_space(x) for x in parts[1].split(",") if clean_space(x)]
    return parent, comps


parse_description("FLYC-300-EC, including GC-Jetson-NX16G-Orin-Nvidia-JetPack 6.0, M.230-SSD-1TB-PCIe4-TLC-TD")   

('FLYC-300-EC',
 ['GC-Jetson-NX16G-Orin-Nvidia-JetPack 6.0', 'M.230-SSD-1TB-PCIe4-TLC-TD'])

In [8]:
pattern_mappings = [
    (
        re.compile(
            r"^GC-Jetson-AGX64GB-Orin-Nvidia(?:[- ]?JetPack[-_ ]?[\d\.]+)?$",
            re.IGNORECASE,
        ),
        "GC-Jetson-AGX64GB-Orin-Nvidia",
    ),
    (
        re.compile(
            r"^GC-Jetson-AGX32GB-Orin-Nvidia(?:[- ]?JetPack[-_ ]?[\d\.]+)?$",
            re.IGNORECASE,
        ),
        "GC-Jetson-AGX32GB-Orin-Nvidia",
    ),
    (
        re.compile(
            r"^GC-Jetson-NX16G-Orin-Nvidia(?:[- ]?JetPack[-_ ]?[\d\.]+)?$",
            re.IGNORECASE,
        ),
        "GC-Jetson-NX16G-Orin-Nvidia",
    ),
]

def normalize_item(name: str) -> str:
    """
    1. Try direct dictionary mapping.
    2. If not found, try pattern-based normalization.
    """
    # # direct match
    # if name in mappings:
    #     return mappings[name]

    # regex match
    for pattern, replacement in pattern_mappings:
        if pattern.match(name):
            return replacement

    # no match → return original
    return name


def transform_shipping(df_shipping_schedule: pd.DataFrame) -> pd.DataFrame:

    df = df_shipping_schedule[df_shipping_schedule['Ship to'] == 'Neousys Technology America, Inc.'].copy()

    # --- make sure the columns exist (create empty ones if missing) ---
    need = ['SO NO.', 'Customer PO No.', 'Model Name', 'Ship Date', 'Qty', 'Description']
    for c in need:
        if c not in df.columns:
            df[c] = np.nan

    # --- select and rename ---
    Ship = df.loc[:, need].copy()
    Ship.rename(columns={
        "Customer PO No.": "QB Num",
        "Model Name": "Item",
        "Qty": "Qty(+)"
    }, inplace=True)

    # --- basic cleaning ---
    # QB Num: strip anything after '('
    Ship["QB Num"] = Ship["QB Num"].astype(str).str.split("(").str[0].str.strip()

    # types
    Ship["Item"] = Ship["Item"].astype(str).str.strip()
    Ship["Description"] = Ship["Description"].astype(str)

    Ship["Ship Date"] = pd.to_datetime(Ship["Ship Date"], errors="coerce")

    # Qty(+) numeric
    Ship["Qty(+)"] = pd.to_numeric(Ship["Qty(+)"], errors="coerce").fillna(0).astype(int)

    # --- Pre/Bare logic ---
    model_ok = Ship["Item"].str.upper().str.startswith(("N", "SEMIL", "POC"), na=False)
    # accept English or Chinese comma: ", including" or "， including"
    including_ok = Ship["Description"].str.contains(r"[，,]\s*including\b", case=False, na=False)

    pre_mask = model_ok & including_ok
    Ship["Pre/Bare"] = np.where(pre_mask, "Pre", "Bare")

    # optional: tidy column order
    desired = ["SO NO.", "QB Num", "Item", "Description", "Ship Date", "Qty(+)", "Pre/Bare"]
    Ship = Ship.reindex(columns=[c for c in desired if c in Ship.columns] +
                               [c for c in Ship.columns if c not in desired])
    
    Ship["Item"] = Ship["Item"].astype(str).map(normalize_item)

    return Ship

In [None]:
ship_raw=pd.read_excel(r"C:\Users\Admin\OneDrive - neousys-tech\Share NTA Warehouse\Daily Update\NTA_Shipping schedule_20251125.xlsx")

ship  = transform_shipping(ship_raw)


PermissionError: [Errno 13] Permission denied: 'C:\\Users\\Admin\\OneDrive - neousys-tech\\Share NTA Warehouse\\Daily Update\\NTA_Shipping schedule_20251125.xlsx'

In [15]:
pattern_mappings = [
    (
        re.compile(
            r"^GC-Jetson-AGX64GB-Orin-Nvidia(?:[- ]?JetPack[-_ ]?[\d\.]+)?$",
            re.IGNORECASE,
        ),
        "GC-Jetson-AGX64GB-Orin-Nvidia",
    ),
    (
        re.compile(
            r"^GC-Jetson-AGX32GB-Orin-Nvidia(?:[- ]?JetPack[-_ ]?[\d\.]+)?$",
            re.IGNORECASE,
        ),
        "GC-Jetson-AGX32GB-Orin-Nvidia",
    ),
    (
        re.compile(
            r"^GC-Jetson-NX16G-Orin-Nvidia(?:[- ]?JetPack[-_ ]?[\d\.]+)?$",
            re.IGNORECASE,
        ),
        "GC-Jetson-NX16G-Orin-Nvidia",
    ),
]


def normalize_item(name: str) -> str:
    """
    1. Try direct dictionary mapping.
    2. If not found, try pattern-based normalization.
    """

    # regex match
    for pattern, replacement in pattern_mappings:
        if pattern.match(name):
            return replacement

    # no match → return original
    return name



## 1) NAV (shipping) → expand pre-installed components
INCL_SPLIT = re.compile(r"\bincluding\b", re.IGNORECASE)
QTYX_RE = re.compile(r"^\s*(\d+)\s*x\s*(.+)\s*$", re.IGNORECASE)  # "2x SSD-1TB"

def clean_space(s: str) -> str:
    if not isinstance(s, str):
        return ""
    return s.replace("\u00A0", " ").replace("\u3000", " ").strip()

def parse_description(desc: str) -> tuple[str, list[str]]:
    s = clean_space(desc)
    parts = INCL_SPLIT.split(s, maxsplit=1)
    parent = clean_space(parts[0].split(",")[0])
    comps = []
    if len(parts) > 1:
        comps = [clean_space(x) for x in parts[1].split(",") if clean_space(x)]
    return parent, comps

def parse_component_token(token: str) -> tuple[str, float]:
    m = QTYX_RE.match(token)
    if m:
        qty = float(m.group(1))
        item = clean_space(m.group(2))
        return item, qty
    return clean_space(token), 1.0

def expand_preinstalled_row(row: pd.Series) -> pd.DataFrame:
    parent, tokens = parse_description(row.get("Description", ""))
    base_qty = float(row.get("Qty(+)", 0) or 0)
    parent_item = parent or clean_space(str(row.get("Item", "")))

    comp_rows = []
    for tok in tokens:
        item, qty_per = parse_component_token(tok)
        out = row.copy()
        out["Parent_Item"] = parent_item
        out["Item"] = item
        out["Qty_per_parent"] = qty_per
        out["Qty(+)"] = base_qty * qty_per
        out["IsParent"] = False
        comp_rows.append(out)

    parent_row = row.copy()
    parent_row["Parent_Item"] = parent_item
    parent_row["Item"] = parent_item
    parent_row["Qty_per_parent"] = 1.0
    parent_row["IsParent"] = True

    if comp_rows:
        return pd.concat([pd.DataFrame(comp_rows), pd.DataFrame([parent_row])], ignore_index=True)
    return pd.DataFrame([parent_row])


def expand_nav_preinstalled(NAV: pd.DataFrame) -> pd.DataFrame:
    NAV = NAV.copy()
    for col in ["Pre/Bare", "Qty(+)", "Item"]:
        if col not in NAV.columns:
            raise ValueError(f"NAV must contain '{col}' column.")
    if "Description" not in NAV.columns:
        NAV["Description"] = ""

    NAV["Description"] = NAV["Description"].astype(str).apply(clean_space)

    pre_mask = NAV["Pre/Bare"].astype(str).str.strip().str.casefold().eq("pre")
    nav_pre   = NAV.loc[pre_mask].copy()
    nav_other = NAV.loc[~pre_mask].copy()

    expanded_parts = [expand_preinstalled_row(r) for _, r in nav_pre.iterrows()]
    expanded_pre = (pd.concat(expanded_parts, ignore_index=True) if expanded_parts else nav_pre.copy())

    needed_cols = list(NAV.columns) + ["Parent_Item", "Qty_per_parent", "IsParent"]
    expanded_pre = expanded_pre.reindex(columns=needed_cols, fill_value=pd.NA)
    nav_other    = nav_other.reindex(columns=needed_cols, fill_value=pd.NA)

    nav_other.loc[:, "Parent_Item"]    = nav_other["Item"]
    nav_other.loc[:, "Qty_per_parent"] = 1.0
    nav_other.loc[:, "IsParent"]       = True

    expanded_all = pd.concat([expanded_pre, nav_other], ignore_index=True)

    expanded_all["Qty(+)"]         = pd.to_numeric(expanded_all["Qty(+)"], errors="coerce").fillna(0.0)
    expanded_all["Qty_per_parent"] = pd.to_numeric(expanded_all["Qty_per_parent"], errors="coerce").fillna(1.0)
    expanded_all["IsParent"]       = expanded_all["IsParent"].astype(bool)
    expanded_all["Date"] = pd.to_datetime(expanded_all["Ship Date"], errors="coerce") + pd.Timedelta(days=5)
    expanded_all["Item"] = expanded_all["Item"].astype(str).map(normalize_item)
    return expanded_all



NAV = expand_nav_preinstalled(ship)

In [17]:
NAV.loc[NAV["QB Num"] == "POD-251584"]

Unnamed: 0,SO NO.,QB Num,Item,Description,Ship Date,Qty(+),Pre/Bare,Parent_Item,Qty_per_parent,IsParent,Date
256,SO25110177,POD-251584,FLYC-300-EC-JON16-NS,"FLYC-300-EC, including GC-Jetson-NX16G-Orin-N...",2025-11-28,1.0,Bare,FLYC-300-EC-JON16-NS,1.0,True,2025-12-03
257,SO25110177,POD-251584,Cblkit-FLYC-300,"FLYC-300 cable kit, including 1xCbl-W1.25F4P-U...",2025-11-28,1.0,Bare,Cblkit-FLYC-300,1.0,True,2025-12-03
258,SO25110177,POD-251584,PA-60W-FLYC300,"60W AC/DC power adapter(GST60A12-JD) with 12V,...",2025-11-28,1.0,Bare,PA-60W-FLYC300,1.0,True,2025-12-03
259,SO25110177,POD-251584,CUSTOMER SERVICES,To產線：請協助將機器與930-PA000004-000|930-CK000009-000配對包裝,2025-11-28,1.0,Bare,CUSTOMER SERVICES,1.0,True,2025-12-03


In [19]:
a = parse_description('FLYC-300-EC, including  GC-Jetson-NX16G-Orin-Nvidia-JetPack 6.0, M.230-SSD-1TB-PCIe4-TLC-TD')
a

('FLYC-300-EC',
 ['GC-Jetson-NX16G-Orin-Nvidia-JetPack 6.0', 'M.230-SSD-1TB-PCIe4-TLC-TD'])