In [None]:
from dask.distributed import Client, progress, protocol
from distributed.protocol import serialize,deserialize
import pyarrow as pa
import pyfletcher as pf
import numpy as np


client = Client("tcp://172.31.84.37:8786")
client

In [None]:
# function which executes on the workers
def run_on_worker(batch):
    print("Received data of length: ", len(batch))
    platform = pf.Platform("aws", False)

    # Create a context
    context = pf.Context(platform)

    # Initialize the platform
    platform.init()

    # Prepare the recordbatch
    context.queue_record_batch(batch)
    context.enable()

    # Create UserCore
    uc = pf.UserCore(context)

    # Reset it
    uc.reset()

    # Determine size of table
    last_index = batch.num_rows
    uc.set_range(0, last_index)

    # Start the FPGA user function
    uc.start()
    uc.wait_for_finish(1000)

    #Get the sum from UserCore
    sum_fpga = uc.get_return(np.dtype(np.int64))
    return sum_fpga

In [None]:
number_field = pa.field('number', pa.int64(), nullable=False)

schema_fields = [number_field]
schema = pa.schema(schema_fields)
metadata = {b'fletcher_mode': b'read',
            b'fletcher_name': b'ExampleBatch'}
schema = schema.add_metadata(metadata)

NUM_INTEGERS = 1000000

#Initialise random array of size NUM_INTEGERS
input_arr = np.random.randint(0,1000, NUM_INTEGERS)

#Split data evenly amongst all workers
num_of_workers = len(client.scheduler_info()["workers"])
data_split = []
chunk_size = int(NUM_INTEGERS/num_of_workers)
start = 0
for w in range(num_of_workers):
    data_split.append(pa.RecordBatch.from_arrays([pa.array(input_arr[start: start+chunk_size])], schema))        
    start += chunk_size

#Execute run_on_worker on all workers
x = client.map(run_on_worker, data_split)
total_sum = 0
#Sum individual outputs from workers
for o in x:
    total_sum += o.result()

print(f"Total sum: {total_sum}")
