In [1]:
import time

def process_dataset(dataset_name: str):
    print(f"Start processing {dataset_name}...")
    time.sleep(2)  # 模拟耗时操作，比如训练模型或数据清洗
    print(f"Finished processing {dataset_name}!")

if __name__ == "__main__":
    datasets = ["dataset_A", "dataset_B", "dataset_C"]

    for ds in datasets:
        process_dataset(ds)


Start processing dataset_A...
Finished processing dataset_A!
Start processing dataset_B...
Finished processing dataset_B!
Start processing dataset_C...
Finished processing dataset_C!


In [2]:
import time
from concurrent.futures import ThreadPoolExecutor, as_completed

def process_dataset(dataset_name: str):
    print(f"Start processing {dataset_name}...")
    time.sleep(2)  # 模拟耗时操作
    print(f"Finished processing {dataset_name}!")
    return dataset_name

if __name__ == "__main__":
    datasets = ["dataset_A", "dataset_B", "dataset_C"]

    # 用线程池并行跑
    with ThreadPoolExecutor(max_workers=3) as executor:
        futures = [executor.submit(process_dataset, ds) for ds in datasets]

        for future in as_completed(futures):
            result = future.result()
            print(f"{result} done.")


Start processing dataset_A...
Start processing dataset_B...
Start processing dataset_C...
Finished processing dataset_A!
Finished processing dataset_C!
dataset_A done.
dataset_C done.
Finished processing dataset_B!
dataset_B done.


In [None]:
from concurrent.futures import ProcessPoolExecutor, as_completed
import time

def process_dataset(dataset_name: str):
    print(f"Start processing {dataset_name}...")
    time.sleep(2)  # 模拟耗时操作
    print(f"Finished processing {dataset_name}!")
    return dataset_name

if __name__ == "__main__":
    datasets = ["dataset_A", "dataset_B", "dataset_C"]

    with ProcessPoolExecutor(max_workers=3) as executor:
        futures = [executor.submit(process_dataset, ds) for ds in datasets]

        for future in as_completed(futures):
            result = future.result()
            print(f"{result} done.")
