In [5]:
import pdfplumber
import pandas as pd
import re
from openpyxl import load_workbook
from openpyxl.styles import Font, PatternFill
from tabulate import tabulate

# === TARIFF LOGIC ===
def calculate_tariff(hs_code: str, origin: str) -> float:
    hs_code = hs_code.replace('.', '').strip()
    origin = origin.strip().upper()

    if hs_code.startswith('847141'):
        return 0.0
    elif hs_code.startswith('847330'):
        return 45.0 if origin == 'CHINA' else 0.0
    elif hs_code.startswith('850440'):
        return 10.0 if origin == 'TAIWAN' else 55.0 if origin == 'CHINA' else 0.0
    elif hs_code.startswith('854442'):
        return 12.6 if origin == 'TAIWAN' else 57.6 if origin == 'CHINA' else 0.0
    elif hs_code.startswith('852910'):
        return 10.0 if origin == 'TAIWAN' else 30.0 if origin == 'CHINA' else 0.0
    elif hs_code.startswith('852589'):
        return 10.0 if origin == 'TAIWAN' else 55.0 if origin == 'CHINA' else 0.0
    return 0.0

# === BLOCK PARSER ===
def parse_item_block(block_lines):
    first_line = block_lines[0]
    item_name = None
    amount = None
    country = "NA"
    hs_code = "NA"
    htsus = "NA"

    match = re.search(r'POD-\d{6,7}[A-Z()]*\s+(.*?)\s+\d+\s+PCS\s+([\d,]+)', first_line)
    if match:
        raw_item = match.group(1).strip()
        item_name = re.sub(r'\s+\d+$', '', raw_item)
        amount = match.group(2).replace(",", "")
    else:
        return None

    for line in block_lines:
        if "Country of Origin" in line:
            origin_match = re.search(r"Country of Origin\s*[:：]?\s*([A-Z]+)", line, re.IGNORECASE)
            if origin_match:
                country = origin_match.group(1).upper()
        if "HS Code" in line:
            hs_match = re.search(r"HS Code\s*[:：]?\s*(\d+)", line)
            if hs_match:
                hs_code = hs_match.group(1)
        if "HTSUS" in line:
            htsus_match = re.search(r"HTSUS\s*[:：]?\s*([\d\.]+)", line)
            if htsus_match:
                htsus = htsus_match.group(1)

    tariff = calculate_tariff(hs_code, country)

    return {
        "Item": item_name,
        "Amount": amount,
        "Country of Origin": country,
        "HS Code": hs_code,
        "HTSUS": htsus,
        "Tariff (%)": tariff
    }

# === PDF PARSER ===
def extract_items_from_invoice(pdf_path):
    items = []
    with pdfplumber.open(pdf_path) as pdf:
        for page in pdf.pages:
            lines = page.extract_text().split('\n') if page.extract_text() else []
            current_block = []
            for line in lines:
                if 'PCS' in line:
                    if current_block:
                        item = parse_item_block(current_block)
                        if item:
                            items.append(item)
                    current_block = [line]
                else:
                    if current_block:
                        current_block.append(line)
            if current_block:
                item = parse_item_block(current_block)
                if item:
                    items.append(item)

    df = pd.DataFrame(items)
    if not df.empty:
        df['Amount'] = pd.to_numeric(df['Amount'], errors='coerce').fillna(0)
        df['Tariff Amount'] = df['Amount'] * df['Tariff (%)'] / 100
        total_tariff = df['Tariff Amount'].sum()
        service_fee = df['Amount'].sum() * 0.003464
        total_fee = total_tariff + service_fee
        return df, total_tariff, service_fee, total_fee
    return df, 0.0, 0.0, 0.0

# === EXCEL WRITER ===
def write_tariff_to_excel(df, total_tariff, service_fee, total_fee, output_path):
    df.to_excel(output_path, index=False)

    wb = load_workbook(output_path)
    ws = wb.active

    start_row = ws.max_row + 2
    labels = ["TOTAL TARIFF", "MERCHANDISE SERVICE FEE", "TOTAL FEE"]
    values = [total_tariff, service_fee, total_fee]
    fill = PatternFill(start_color="FFFF00", end_color="FFFF00", fill_type="solid")

    for i, (label, value) in enumerate(zip(labels, values)):
        row = start_row + i
        ws.cell(row=row, column=1, value=label).font = Font(bold=True)
        ws.cell(row=row, column=1).fill = fill
        cell = ws.cell(row=row, column=2, value=value)
        cell.font = Font(bold=True)
        cell.number_format = '"$"#,##0.00'
        cell.fill = fill

    wb.save(output_path)
    print(f"✅ Full tariff table with summary saved to: {output_path}")

# === USAGE ===
pdf_path = r"C:\Users\Admin\OneDrive - neousys-tech\Desktop\Invoice\IN.PL_IN250716003_NTA.pdf"
output_path = r"C:\Users\Admin\OneDrive - neousys-tech\Share NTA Warehouse\06 Payment\APCC-W250700_Entry Summary_IN250_traiff excel.xlsx"

df_items, total_tariff, service_fee, total_fee = extract_items_from_invoice(pdf_path)
# write_tariff_to_excel(df_items, total_tariff, service_fee, total_fee, output_path)


print("\n" + "=" * 60)
print("\033[1;44m\033[1;37m     🚢  TARIFF CALCULATION SUMMARY  📦     \033[0m")
print("=" * 60)

print(f"\033[1;33m🔶 Total Tariff :\033[0m   \033[1m${total_tariff:,.2f}\033[0m")
print(f"\033[1;36m🔷 Service Fee  :\033[0m   \033[1m${service_fee:,.2f}\033[0m")
print(f"\033[1;32m🟩 Entry Fee    :\033[0m   \033[1m${total_fee:,.2f}\033[0m")

print("=" * 60 + "\n")

print("\033[1;34m📄 Tariff Breakdown Table:\033[0m")
print(tabulate(df_items, headers='keys', tablefmt='fancy_grid', showindex=False))



[1;44m[1;37m     🚢  TARIFF CALCULATION SUMMARY  📦     [0m
[1;33m🔶 Total Tariff :[0m   [1m$2,288.64[0m
[1;36m🔷 Service Fee  :[0m   [1m$448.46[0m
[1;32m🟩 Entry Fee    :[0m   [1m$2,737.10[0m

[1;34m📄 Tariff Breakdown Table:[0m
╒═════════════════════════════════════════╤══════════╤═════════════════════╤═══════════╤════════════╤══════════════╤═════════════════╕
│ Item                                    │   Amount │ Country of Origin   │   HS Code │ HTSUS      │   Tariff (%) │   Tariff Amount │
╞═════════════════════════════════════════╪══════════╪═════════════════════╪═══════════╪════════════╪══════════════╪═════════════════╡
│ Ant-RP_SMAM-WiFi-196MM1                 │      210 │ TAIWAN              │    852910 │ NA         │         10   │          21     │
├─────────────────────────────────────────┼──────────┼─────────────────────┼───────────┼────────────┼──────────────┼─────────────────┤
│ Nuvo-9006E-IFCN-CF3                     │    23604 │ TAIWAN              │    847