In [1]:
import multiprocessing
from multiprocessing import Process

In [3]:
print("number of cpu cores: ", multiprocessing.cpu_count())

number of cpu cores:  2


In [4]:
import pandas as pd
from time import sleep, time

def process_dataframe(chunk_id, chunk_data: pd.DataFrame):
    print(f"Processing chunk {chunk_id}...")
    sleep(5)
    print(f"Chunk {chunk_id} has been processed successfully.")

In [8]:
data = [
    ['tom', 10],
    ['nick', 15],
    ['john', 20],
    ['jane', 25],
    ['jill', 30],
    ['jack', 35],
    ['joseph', 40],
    ['james', 45],
    ['jerry', 50],
    ['jasmine', 55]
]

df = pd.DataFrame(data, columns=['name', 'age'])

chunk_size = 2
chunks = [
    df[i:i+chunk_size].to_numpy() 
    for i in range(0, len(df), chunk_size)
]

start = time()

procs = []

for i, chunk in enumerate(chunks):
    # define our process but not start it yet
    proc = Process(target=process_dataframe, args=(i, chunk))

    # start the process
    proc.start()

    # print the process ID
    print(f"processID: {proc.pid}")

    # append the process to the list
    procs.append(proc)

# wait for all processes to finish
for proc in procs:
    proc.join()

print(f"total time taken: {time() - start:.2f} seconds")

processID: 8286
Processing chunk 0...processID: 8289


Processing chunk 1...processID: 8294
processID: 8301
Processing chunk 2...
Processing chunk 3...
Processing chunk 4...
processID: 8306
Chunk 0 has been processed successfully.
Chunk 1 has been processed successfully.
Chunk 2 has been processed successfully.Chunk 3 has been processed successfully.

Chunk 4 has been processed successfully.
total time taken: 5.06 seconds


In [10]:
from multiprocessing import Pool

NUM_PROCESSES = 2

pool = Pool(processes=NUM_PROCESSES)

procs = []

for i, chunk in enumerate(chunks):
    procs.append(
        pool.apply_async(process_dataframe, args=(i, chunk))
    )

for proc in procs:
    proc.get()

Processing chunk 0...Processing chunk 1...



Chunk 1 has been processed successfully.
Chunk 0 has been processed successfully.Processing chunk 2...

Processing chunk 3...
Chunk 2 has been processed successfully.
Processing chunk 4...
Chunk 3 has been processed successfully.
Chunk 4 has been processed successfully.


Processing chunk 1...Processing chunk 0...

Chunk 1 has been processed successfully.Chunk 0 has been processed successfully.


Processing chunk 2...Processing chunk 3...
Chunk 2 has been processed successfully.
Processing chunk 4...
Chunk 3 has been processed successfully.
Chunk 4 has been processed successfully.
Processing chunk 1...Processing chunk 0...

Chunk 0 has been processed successfully.Chunk 1 has been processed successfully.

Processing chunk 2...Processing chunk 3...

Chunk 2 has been processed successfully.
Processing chunk 4...Chunk 3 has been processed successfully.

Chunk 4 has been processed successfully.


In [12]:
inputs = [(i, chunk) for i, chunk in enumerate(chunks)]

outputs = pool.starmap(process_dataframe, inputs)

print(outputs)

[None, None, None, None, None]
