In [None]:
from dask.distributed import Client, progress, get_worker
import numpy as np
import pyarrow as pa
import pyfletcher as pf

# Insert the scheduler IP below (available after running the "dask-scheduler" command)
client = Client("tcp://XXX.XXX.XXX.XXX:8786")
client

In [None]:
def run_on_worker(batch):
    platform = pf.Platform()                         # Create an interface to an auto-detected FPGA Platform.
    platform.init()                                  # Initialize the Platform.

    context = pf.Context(platform)                   # Create a Context for our data on the Platform.
    context.queue_record_batch(batch)                # Queue the RecordBatch to the Context.
    context.enable()                                 # Enable the Context, (potentially transferring the data to FPGA).

    kernel = pf.Kernel(context)                      # Set up an interface to the Kernel, supplying the Context.    
    kernel.start()                                   # Start the kernel.
    kernel.wait_for_finish()                         # Wait for the kernel to finish.

    result = kernel.get_return(np.dtype(np.int32))  # Obtain the result.
    print("Sum: *****************************************" + str(result))                     # Print the on the worker.
    return result

In [None]:
import time

t0 = time.time()

# Create a new field named "number" of type int64 that is not nullable.
number_field = pa.field('number', pa.int64(), nullable=False)

# Create a list of fields for pa.schema()
schema_fields = [number_field]

# Create a new schema from the fields.
schema = pa.schema(schema_fields)

# Construct some metadata to explain Fletchgen that it
# should allow the FPGA kernel to read from this schema.
metadata = {b'fletcher_mode': b'read',
            b'fletcher_name': b'ExampleBatch'}

# Add the metadata to the schema
schema = schema.with_metadata(metadata)

# Create a list of PyArrow Arrays. Every Array can be seen
# as a 'Column' of the RecordBatch we will create.
data = [pa.array([10,20,30,40,50,60,70,80])]

# Create a RecordBatch from the Arrays.
recordbatch = pa.RecordBatch.from_arrays(data, schema=schema)


# Split input data based on no. of workers
num_of_workers = len(client.scheduler_info()["workers"])
data_split = []
start = 0
chunk_size = int(len(recordbatch)/num_of_workers)
for i in range(num_of_workers):
    data_split.append((recordbatch[start: start+chunk_size]))
    start += chunk_size
print(f"Split recordbatch of size {len(recordbatch)} into {num_of_workers} chunk(s)")
    

# Scatter the data to the workers before calling run_on_worker on the workers
distributed_data = client.scatter(data_split)
futures = client.map(run_on_worker, distributed_data)

#Print the output returned by the workers
results = client.gather(futures)
print("Received from workers: ", results)
print("Total sum ", sum(results))
t1 = time.time()

print("TOTAL EXECUTION TIME: ", t1 - t0)