# Parallel dataflows

##### Parallel dataflows can be developed by passing data between Apps. 
###### This example is taken from the Parsl documentation ( section Parallel Dataflow).
###### In this example we create a set of files, each with a random number, 
###### we then concatenate these files into a single file and compute the sum of all numbers in that file.

###### The files will be created with the sandbox_app.
###### The transfer of files between apps is done through the use of the workflow: // schema

In [1]:
import parsl
from parsl.app.app import sandbox_app, python_app
from parsl.data_provider.files import File
import os

print(parsl.version.VERSION)

1.1.0a1


In [2]:
parsl.load()
project="helloSandbox"

In [3]:
# The first sandbox_app()
# this app generates a semi-random number ( between 0 and 32,767 ) and saves it to a file.

@sandbox_app()
def generate(project=project):
    return "echo $(( RANDOM )) &> out.txt"


In [4]:
#The second sandbox_app()
#this app concatenates input files into a single output file
#Only the workflow://schema is passed to the app
@sandbox_app()
def concat(inputs=[],project=project):
    return "cat {0} > out.txt".format(" ".join([i for i in inputs]))

In [5]:
# App that calculates the sum of values in a list of input files
@python_app
def total(inputs=[]):
    total = 0
    with open(inputs[0], 'r') as f:
        for l in f:
            total += int(l)
    return total

In [6]:
output_files = []

for i in range (2):
    output_files.append(generate())

In [7]:
# Concatenate the files into a single file passing workflow_schema

cc = concat(inputs=[i.workflow_schema+"/out.txt" for i in output_files])

In [8]:
# Calculate the sum of the random numbers
total = total(inputs=[File(project+"/"+cc.workflow_schema.replace("workflow://","")+"/out.txt")])
print (total.result())


48433
