In [1]:
# Clone the 'Hello World' template into a local folder.
from flowserv.client import Flowserv

client = Flowserv(basedir='.flowserv', clear=True)
workflow_id = client.install('helloworld', multi_user=True, ignore_postproc=True)

In [2]:
# Create ten run results for the 'Hello World' workflow.
import random
from io import StringIO


# List of names and greetings for random input generation.
GREETINGS = ['Hi', 'Hello', 'Hey', 'Welcome', 'Bye', 'Adios', 'See ya']
NAMES = [
    'Alice', 'Bob', 'Claire', 'Dave', 'Elizabeth-Anne', 'Francisco', 'Georgia', 'Howard', 'Isabella', 'Jacob',
    'Kristina', 'Luke', 'Michelle', 'Norman', 'Olivia', 'Patrick', 'Qiana', 'Rafael', 'Sandy', 'Tom', 'Ursula',
    'Victor', 'Whitney', 'Xiong', 'Yvette', 'Zack'
]

# Keep track of the relevant output files for each run. The
# post-processing workflow for the Hello World demo only
# considers the 'results/greetings.txt' file.
result_key = 'results/greetings.txt'
# To prepare the post-processing data we need to provide a list of
# 3-tuples with (run_id, run_id, handle for result file) where
# each tuple represents a reference to the result file of the respective
# run.
runs = list()

# Run workflow for each greeting phrase with a random selection of 10 names.
# For each run we create a new submssion first with a different user.
for i, greeting in enumerate(GREETINGS):
    # Create submission for new user.
    username = NAMES[i]
    client.register(username=username, password='mypwd')
    client.login(username=username, password='mypwd')
    submission_id = client.create_submission(workflow_id, name='Team {}'.format(username))
    submission = client.submission(workflow_id=workflow_id, group_id=submission_id)
    # Create random input set.
    names = set()
    while len(names) < 10:
        names.add(random.choice(NAMES))
    namesfile = StringIO('\n'.join(names))
    args = dict({
        'greeting': greeting,
        'sleeptime': 0,
        'names': namesfile
    })
    # Run the workflow.
    run = submission.start_run(args)
    assert run.is_success()
    run_id = run.run_id
    runs.append((run_id, run_id, [(result_key, run.get_file(result_key).load())]))

In [3]:
# Prepare the folder with run results that is passed as input
# to the post-processing workflow.
import os

from flowserv.service.postproc.util import copy_postproc_files

datadir = os.path.join(client.basedir, 'postprocdata')
copy_postproc_files(
    runs=runs,
    outputdir=datadir
)

outputfile = os.path.join(client.basedir, 'ngrams.csv')


In [4]:
from collections import Counter

import argparse
import sys
import time

from flowserv.service.postproc.client import Runs


def main(rundir, k=25, timeout=10, outputfile=None):
    """Create a csv file containing the frequency of the k most frequent
    n-grams in the greeting files of all runs. Counts only those n-grams that
    do not contain a whitespace character.
    """
    # Count frequency of n-grams for all runs.
    ngrams = Counter()
    for run in Runs(rundir):
        with open(run.get_file('results/greetings.txt'), 'r') as f:
            for line in f:
                line = line.strip()
                if len(line) >= 3:
                    for i in range(len(line) - 2):
                        ng = line[i:i + 3].upper()
                        if ' ' not in ng:
                            ngrams[ng] += 1
        # Delay execution to allow for testing running post-processing
        # workflows
        time.sleep(timeout)
    # Output csv file with two columns: ngram,count
    with open(outputfile, 'w') as f:
        for ngram, count in ngrams.most_common(k):
            f.write('{},{}\n'.format(ngram, count))

main(rundir=datadir, timeout=1, outputfile=outputfile)

In [5]:
# Print contents of the generated output file
with open(outputfile, 'r') as f:
    for line in f:
        ngrams, count = line.strip().split(',')
        print('{} {}'.format(ngrams, count))

ELL 17
HEL 13
LLO 10
HEY 10
WEL 10
ELC 10
LCO 10
COM 10
OME 10
BYE 10
ADI 10
DIO 10
IOS 10
SEE 10
ABE 8
OLI 5
LIV 5
IVI 5
VIA 5
XIO 4
ION 4
ONG 4
ISA 4
SAB 4
BEL 4


In [6]:
# Remove all generated files
client.erase()
