In [1]:
import pandas as pd
from openai import OpenAI
from dotenv import load_dotenv
from concurrent.futures import ThreadPoolExecutor
from tqdm import tqdm
import os

load_dotenv()
client = OpenAI()

def get_australian_barcode_via_api(product_name, api_client):
    if not isinstance(product_name, str) or not product_name.strip():
        return "无效的产品名称"

    messages = [
        {
            "role": "system",
            "content": "You are a helpful assistant that provides Australian product barcodes. When asked for a barcode, return only the numerical barcode number or 'Not Found'. If you cannot find it, return 'Not Found'.",
        },
        {
            "role": "user",
            "content": f"What is the Australian barcode for the product '{product_name}'?",
        }
    ]
    try:
        completion = api_client.chat.completions.create(
            model="o4-mini",  # 您可以根据需要更换模型
            messages=messages,
        )
        barcode = completion.choices[0].message.content.strip()
        return barcode
    except Exception as e:
        print(f"调用API获取 '{product_name}' 的条形码时出错: {e}")
        return "API调用失败"

def main():
    if not client or not client.api_key or client.api_key.startswith("sk-xxx"):
        print("API客户端未正确配置，程序即将退出。")
        return

    excel_file = 'coles_data.xlsx'
    if not os.path.exists(excel_file):
        print(f"错误: 文件 '{excel_file}' 不存在。请确保文件与脚本在同一目录下。")
        return

    try:
        df = pd.read_excel(excel_file)
        if '产品名称' not in df.columns:
            print(f"错误: Excel文件 '{excel_file}' 中缺少 '产品名称' 列。")
            return
    except Exception as e:
        print(f"读取Excel文件 '{excel_file}' 时出错: {e}")
        return
    product_names = df['产品名称'].tolist()
    barcodes = [None] * len(product_names)
    MAX_WORKERS = 1000 

    with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
        future_to_index = {
            executor.submit(get_australian_barcode_via_api, name, client): i 
            for i, name in enumerate(product_names)
        }
        for future in tqdm(future_to_index.keys(), total=len(product_names), desc="正在获取条形码"):
            index = future_to_index[future]
            try:
                barcodes[index] = future.result()
            except Exception as e:
                product_name = product_names[index]
                print(f"处理 '{product_name}' 时发生意外错误: {e}")
                barcodes[index] = "处理异常"

    df['条形码'] = barcodes
    output_file_path = excel_file.replace('.xlsx', '_with_barcodes.xlsx')
    try:
        df.to_excel(output_file_path, index=False, engine='openpyxl')
        print("\n" + "="*50)
        print(f"🎉 文件处理完成！")
        print(f"已将条形码成功写入到 '{output_file_path}'。")
        print("="*50)
    except Exception as e:
        print(f"保存到Excel文件 '{output_file_path}' 时出错: {e}")

if __name__ == "__main__":
    main()

正在获取条形码: 100%|██████████| 1479/1479 [01:24<00:00, 17.60it/s]


🎉 文件处理完成！
已将条形码成功写入到 'coles_data_with_barcodes.xlsx'。





In [13]:
import pandas as pd
from openai import OpenAI
from dotenv import load_dotenv
from concurrent.futures import ThreadPoolExecutor
from tqdm import tqdm
import os

load_dotenv()
client = OpenAI()

def get_australian_barcode_via_api(product_name, api_client):
    if not isinstance(product_name, str) or not product_name.strip():
        return "无效的产品名称"

    messages = [
        {
            "role": "system",
            "content": "You are a helpful assistant that provides Australian product barcodes. When asked for a barcode, return only the numerical barcode number or 'Not Found'. If you cannot find it, return 'Not Found'.",
        },
        {
            "role": "user",
            "content": f"What is the Australian barcode for the product '{product_name}'?",
        }
    ]
    try:
        completion = api_client.chat.completions.create(
            model="gpt-4.1-mini",  # 您可以根据需要更换模型
            messages=messages,
            temperature=0,
        )
        barcode = completion.choices[0].message.content.strip()
        return barcode
    except Exception as e:
        print(f"调用API获取 '{product_name}' 的条形码时出错: {e}")
        return "API调用失败"

def main():
    if not client or not client.api_key or client.api_key.startswith("sk-xxx"):
        print("API客户端未正确配置，程序即将退出。")
        return

    excel_file = 'woolworths_data.xlsx'
    if not os.path.exists(excel_file):
        print(f"错误: 文件 '{excel_file}' 不存在。请确保文件与脚本在同一目录下。")
        return

    try:
        df = pd.read_excel(excel_file)
        if '产品名称' not in df.columns:
            print(f"错误: Excel文件 '{excel_file}' 中缺少 '产品名称' 列。")
            return
    except Exception as e:
        print(f"读取Excel文件 '{excel_file}' 时出错: {e}")
        return
    product_names = df['产品名称'].tolist()
    barcodes = [None] * len(product_names)
    MAX_WORKERS = 10 

    with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
        future_to_index = {
            executor.submit(get_australian_barcode_via_api, name, client): i 
            for i, name in enumerate(product_names)
        }
        for future in tqdm(future_to_index.keys(), total=len(product_names), desc="正在获取条形码"):
            index = future_to_index[future]
            try:
                barcodes[index] = future.result()
            except Exception as e:
                product_name = product_names[index]
                print(f"处理 '{product_name}' 时发生意外错误: {e}")
                barcodes[index] = "处理异常"

    df['条形码'] = barcodes
    output_file_path = excel_file.replace('.xlsx', '_with_barcodes.xlsx')
    try:
        df.to_excel(output_file_path, index=False, engine='openpyxl')
        print("\n" + "="*50)
        print(f"🎉 文件处理完成！")
        print(f"已将条形码成功写入到 '{output_file_path}'。")
        print("="*50)
    except Exception as e:
        print(f"保存到Excel文件 '{output_file_path}' 时出错: {e}")

if __name__ == "__main__":
    main()

正在获取条形码: 100%|██████████| 2650/2650 [03:42<00:00, 11.91it/s]


🎉 文件处理完成！
已将条形码成功写入到 'woolworths_data_with_barcodes.xlsx'。



