In [1]:
# Clone the 'Hello World' exmaple into a temporary directory.
from flowserv.tests.workflow import clone_helloworld

workflowdir = clone_helloworld()

In [2]:
# List of names for random input generation.

NAMES = [
    'Alice', 'Bob', 'Claire', 'Dave', 'Elizabeth-Anne', 'Francisco', 'Georgia', 'Howard', 'Isabella', 'Jacob',
    'Kristina', 'Luke', 'Michelle', 'Norman', 'Olivia', 'Patrick', 'Qiana', 'Rafael', 'Sandy', 'Tom', 'Ursula',
    'Victor', 'Whitney', 'Xiong', 'Yvette', 'Zack'
]

# List of greeting phrases for different workflow runs
GREETINGS = ['Hi', 'Hello', 'Hey', 'Welcome', 'Bye', 'Adios', 'See ya']

In [3]:
# Run workflow for each greeting phrase with a random selection of 10 names.
import os
import random
import shutil

from flowserv.tests.workflow import run_workflow, INPUTFILE

runsdir = os.path.abspath('runs')
if os.path.exists(runsdir):
    shutil.rmtree(runsdir)
os.makedirs(runsdir)

runs = list()
for i, greeting in enumerate(GREETINGS):
    names = set()
    while len(names) < 10:
        names.add(random.choice(NAMES))
    namesfile = os.path.join(runsdir, 'names{}.txt'.format(i))
    with open(namesfile, 'w') as f:
        for name in names:
            f.write('{}\n'.format(name))
    rundir = os.path.join(runsdir, 'helloworld{}'.format(i))
    args = dict({'greeting': greeting, 'sleeptime': 0, 'names': INPUTFILE(namesfile)})
    state = run_workflow(workflowdir, arguments=args, rundir=rundir)
    assert state.is_success()
    runs.append(rundir)

In [4]:
# Create input folder for post-processing code.

from flowserv.tests.workflow import prepare_postproc_data

datadir = prepare_postproc_data(os.path.join(workflowdir, 'benchmark.yaml'), runs)

In [5]:
# Output file for post-processing results
outputfile = os.path.join(runsdir, 'ngrams.csv')

In [6]:
from collections import Counter

import argparse
import sys
import time

from flowserv.service.postproc.client import Runs


def main(rundir, k=25, timeout=10, outputfile=None):
    """Create a csv file containing the frequency of the k most frequent
    n-grams in the greeting files of all runs. Counts only those n-grams that
    do not contain a whitespace character.
    """
    # Count frequency of n-grams for all runs.
    ngrams = Counter()
    for run in Runs(rundir):
        with open(run.get_file('results/greetings.txt'), 'r') as f:
            for line in f:
                line = line.strip()
                if len(line) >= 3:
                    for i in range(len(line) - 2):
                        ng = line[i:i + 3].upper()
                        if ' ' not in ng:
                            ngrams[ng] += 1
        # Delay execution to allow for testing running post-processing
        # workflows
        time.sleep(timeout)
    # Output csv file with two columns: ngram,count
    with open(outputfile, 'w') as f:
        for ngram, count in ngrams.most_common(k):
            f.write('{},{}\n'.format(ngram, count))

main(rundir=datadir, timeout=1, outputfile=outputfile)

In [7]:
# Print contents of the generated output file
with open(outputfile, 'r') as f:
    for line in f:
        ngrams, count = line.strip().split(',')
        print('{} {}'.format(ngrams, count))

ELL 17
HEL 13
LLO 10
HEY 10
WEL 10
ELC 10
LCO 10
COM 10
OME 10
BYE 10
ADI 10
DIO 10
IOS 10
SEE 10
ABE 7
YVE 6
VET 6
ETT 6
TTE 6
ISA 4
SAB 4
BEL 4
LLA 4
DAV 4
AVE 4


In [8]:
# Remove all generated files
shutil.rmtree(workflowdir)
shutil.rmtree(runsdir)
shutil.rmtree(datadir)
