In [1]:
# Clone the 'Hello World' template into a local folder.
from flowserv import Flowserv

flowenv = Flowserv(basedir='.flowserv', clear=True)
workflow = flowenv.install('helloworld', ignore_postproc=True)

In [2]:
# Create ten run results for the 'Hello World' workflow.
import random
from io import StringIO


# List of names and greetings for random input generation.
GREETINGS = ['Hi', 'Hello', 'Hey', 'Welcome', 'Bye', 'Adios', 'See ya']
NAMES = [
    'Alice', 'Bob', 'Claire', 'Dave', 'Elizabeth-Anne', 'Francisco', 'Georgia', 'Howard', 'Isabella', 'Jacob',
    'Kristina', 'Luke', 'Michelle', 'Norman', 'Olivia', 'Patrick', 'Qiana', 'Rafael', 'Sandy', 'Tom', 'Ursula',
    'Victor', 'Whitney', 'Xiong', 'Yvette', 'Zack'
]


# Run workflow for each greeting phrase with a random selection of 10 names.
runs = list()
for i, greeting in enumerate(GREETINGS):
    names = set()
    while len(names) < 10:
        names.add(random.choice(NAMES))
    namesfile = StringIO('\n'.join(names))
    args = dict({
        'greeting': greeting,
        'sleeptime': 0,
        'names': namesfile
    })
    run = workflow.start_run(args)
    assert run.is_success()
    runs.append(run)

In [3]:
# Prepare the folder with run results that is passed as input
# to the post-processing workflow.
import os

datadir = os.path.join(flowenv.basedir, 'postprocdata')
workflow.prepare_postproc_data(outputdir=datadir, runs=runs)

# Output file for post-processing results
outputfile = os.path.join(flowenv.basedir, 'ngrams.csv')

In [4]:
from collections import Counter

import argparse
import sys
import time

from flowserv.service.postproc.client import Runs


def main(rundir, k=25, timeout=10, outputfile=None):
    """Create a csv file containing the frequency of the k most frequent
    n-grams in the greeting files of all runs. Counts only those n-grams that
    do not contain a whitespace character.
    """
    # Count frequency of n-grams for all runs.
    ngrams = Counter()
    for run in Runs(rundir):
        with open(run.get_file('results/greetings.txt'), 'r') as f:
            for line in f:
                line = line.strip()
                if len(line) >= 3:
                    for i in range(len(line) - 2):
                        ng = line[i:i + 3].upper()
                        if ' ' not in ng:
                            ngrams[ng] += 1
        # Delay execution to allow for testing running post-processing
        # workflows
        time.sleep(timeout)
    # Output csv file with two columns: ngram,count
    with open(outputfile, 'w') as f:
        for ngram, count in ngrams.most_common(k):
            f.write('{},{}\n'.format(ngram, count))

main(rundir=datadir, timeout=1, outputfile=outputfile)

In [5]:
# Print contents of the generated output file
with open(outputfile, 'r') as f:
    for line in f:
        ngrams, count = line.strip().split(',')
        print('{} {}'.format(ngrams, count))

ELL 15
HEL 12
LLO 10
HEY 10
WEL 10
ELC 10
LCO 10
COM 10
OME 10
BYE 10
ADI 10
DIO 10
IOS 10
SEE 10
ABE 5
TOM 4
XIO 4
ION 4
ONG 4
ALI 4
LIC 4
ICE 4
GEO 4
EOR 4
ORG 4


In [6]:
# Remove all generated files
flowenv.erase()
