In [None]:
from ipywidgets import widgets
import pandas as pd
from io import BytesIO

# Create the file upload widget
uploader = widgets.FileUpload(
    accept='.xlsx',
    multiple=False
)

# Create an output widget to display results
output = widgets.Output()

# Global variable to store the DataFrame
df = pd.DataFrame()

# Define a function to handle file upload
def on_upload_change(change):
    global df  # Use global to make df accessible outside the function
    with output:
        output.clear_output()
        # Check if a file was uploaded
        if uploader.value:
            # Get the uploaded file (it's a tuple)
            uploaded_file = uploader.value[0]
            # Read the file content
            content = uploaded_file['content']
            # Convert to DataFrame with UTF-8 encoding
            df = pd.read_excel(BytesIO(content), engine='openpyxl')
            print("File uploaded successfully!")
            print(f"DataFrame shape: {df.shape}")
            print("\nFirst few rows:")
            print(df.head())

# Attach the handler to the upload widget
uploader.observe(on_upload_change, names='value')

# Display the widgets
display(uploader)
display(output)

# Now you can use 'df' in subsequent cells after uploading



FileUpload(value=(), accept='.xlsx', description='Upload')

Output()

In [3]:
from shared.parser import parse_excel
from pydantic import TypeAdapter
from typing import TypedDict
from datetime import datetime

upload_df = parse_excel(df.fillna(''))

orders = upload_df[[
    'sub',
    'main',
    'date_created',
    'customer_no',
    'creator',
    'customer',
    'dept_sales',
    'dept_fulfillment',
    'dept_shipping',
    'dept_pickup',
    'transport',
    'thermo',
    'status',
    'time_delivery_start',
    'time_delivery_end',
    'date_delivery',
    'date_shipping',
    'comment_1',
    'comment_2',
    'comment_3',
    'telephone_day',
    'mobile',
    'recipient',
    'custom',
    'last_updated'
]]

order_address = upload_df.query('transport == "黑貓宅急便"')[[
    'sub',
    'address',
    'postnumber'
]]

full_order_address = upload_df[[
    'sub',
    'address',
    'postnumber'
]]

order_items = upload_df[[
    'sub',
    'sku',
    'product',
    'qty',
    'comment'
]]
order_custom = upload_df.query('custom == True')[[

    'sub',
    'custom_sku',
    'custom_product',
    'custom_qty',
]]
order_tracking = upload_df.query('transport == "黑貓宅急便"')[[
    'sub'
]]

print(len(orders['sub'].drop_duplicates()))
print(len(full_order_address.drop_duplicates()))

print(len(order_address.drop_duplicates()))
print(len(order_tracking.drop_duplicates()))

print(len(orders.query('custom == True')))
print(len(order_custom))



                sub customer creator dept_sales date_created  \
13194  O26003747001      簡綺萱  網購公共帳號    9000 網購   2026-01-25   
13195  O26003902001      陳語珮  網購公共帳號    9000 網購   2026-01-26   
13196  O26003902001      陳語珮  網購公共帳號    9000 網購   2026-01-26   
13197  O26003907001    Amber  網購公共帳號    9000 網購   2026-01-26   
13198  O25011517001      蔡昊辰  網購公共帳號    9000 網購   2025-12-30   

       dept_fulfillment dept_shipping transport thermo date_delivery  ...  \
13194  0001 總倉(2F廠務辦公室)     0006 台中門市      門市自取           2026-02-15  ...   
13195  0001 總倉(2F廠務辦公室)     0006 台中門市      門市自取           2026-02-15  ...   
13196  0001 總倉(2F廠務辦公室)     0006 台中門市      門市自取           2026-02-15  ...   
13197  0001 總倉(2F廠務辦公室)     0006 台中門市      門市自取           2026-02-15  ...   
13198  0001 總倉(2F廠務辦公室)     0006 台中門市      門市自取           2026-02-16  ...   

            main customer_no dept_pickup date_shipping time_delivery_start  \
13194  O26003747     0050790   0006 台中門市          None                0900

In [4]:

import asyncio
from shared.tcat_api import *
order_address = order_address.drop_duplicates().reset_index(drop=True)

batch_size = 100
batch_list = []


for i in range(0, len(order_address), batch_size):
    batch_addresses = order_address.iloc[i : i + batch_size]
    batch_list.append(batch_addresses)

async with httpx.AsyncClient() as client:
    tasks = [Batch_Label_PostNumber(client, batch) for batch in batch_list]
    results = await asyncio.gather(*tasks, return_exceptions=True)
    

[{'Search': '811高雄市楠梓區壽民路68巷1號', 'PostNumber': '83-834-05-G'}, {'Search': '831高雄市大寮區鳳屏一路189巷20號', 'PostNumber': '81-803-24-H'}, {'Search': '330桃園市桃園區大興西路一段166號14樓之1', 'PostNumber': '30-397-73-B'}, {'Search': '831高雄市大寮區義和里義和路175之2號', 'PostNumber': '81-802-24-E'}, {'Search': '737台南市鹽水區三明里舊營81-11號', 'PostNumber': '76-740-16-D'}, {'Search': '265宜蘭縣羅東鎮忠孝路90巷13號', 'PostNumber': '92-964-14-C'}, {'Search': '235新北市中和區民享街92巷38號5樓', 'PostNumber': '40-692-33-C'}, {'Search': '220新北市板橋區大觀路二段287號4樓', 'PostNumber': '40-667-31-A'}, {'Search': '832高雄市林園區港埔里港埔三路56號', 'PostNumber': '81-801-21-G'}, {'Search': '270宜蘭縣蘇澳鎮隘丁里城東路68巷11號', 'PostNumber': '92-967-21-D'}, {'Search': '100台北市中正區武昌街一段58號2樓', 'PostNumber': '11-105-51-C'}, {'Search': '330桃園市桃園區龍安街167-1號13樓', 'PostNumber': '30-393-31-C'}, {'Search': '510彰化縣員林市永興街125號5樓', 'PostNumber': '66-512-33-H'}, {'Search': '504彰化縣秀水鄉安溪村彰水路一段466號', 'PostNumber': '64-501-85-P'}, {'Search': '242新北市新莊區八德街206號7樓 管理室收取', 'PostNumber': '40-214-44-A'}, {'Search': '702台南市南區美

In [None]:
result_list = []
for i in results:
    if isinstance(i, list):
        result_list = result_list.extend(i)

print(len(result_list))
# flat_list = sum(results_list, [])
postnumber_df = pd.DataFrame(result_list)
postnumber_df = postnumber_df.rename(columns = {'Search' : 'address', 'PostNumber' : 'postnumber'})


3643


NameError: name 'results_list' is not defined

In [None]:

final_order_address = pd.merge(full_order_address.drop(columns=['postnumber']), postnumber_df.drop_duplicates(), how='left', on='address')
print(final_order_address.fillna('').drop_duplicates().reset_index(drop=True))


In [None]:
from manager import *

session = DatabaseManager()

await session.connect()

try:    
    print(orders.head())
    await session.upsert_dataframe(
        df=orders.drop_duplicates(),
        table_name='orders',
        key_columns=['sub']
    )
    await session.upsert_dataframe(
        df=full_order_address.drop_duplicates(),
        table_name='order_address',
        key_columns=['sub']
    )
    await session.upsert_dataframe(
        df=order_custom.drop_duplicates(),
        table_name='order_custom',
        key_columns=['sub', 'sku', 'custom_sku']
    )
    await session.upsert_dataframe(
        df=order_items.drop_duplicates(),
        table_name='order_items',
        key_columns=['sub', 'sku']
    )
finally:
    # Always close the connection pool when done
    await session.close()
