Skip to content

Commit

Permalink
[test/count-procs] Count syscalls too.
Browse files Browse the repository at this point in the history
- Print a table of syscalls
- Sort horizontally by shell performance
- Add more test cases
- Clean up log output

Addresses issue #254.
  • Loading branch information
Andy Chu committed Mar 27, 2020
1 parent b1d6de9 commit 49dc6da
Show file tree
Hide file tree
Showing 2 changed files with 146 additions and 58 deletions.
80 changes: 45 additions & 35 deletions test/count-procs.sh
Expand Up @@ -7,6 +7,15 @@ set -o nounset
set -o pipefail
set -o errexit

source build/dev-shell.sh

# TODO: Add yash to test/spec-bin.sh, since it starts the fewest number of
# processes!
readonly -a SHELLS=(dash bash mksh zsh ash osh yash)

readonly BASE_DIR='_tmp/syscall' # What we'll publish
readonly RAW_DIR='_tmp/syscall-raw' # Raw data

# Run it against the dev version of OSH
REPO_ROOT=$(cd $(dirname $(dirname $0)) && pwd)

Expand All @@ -25,58 +34,52 @@ count-procs() {
strace -ff -o $out_prefix -- $sh -c "$code"
}

readonly -a SHELLS=(dash bash mksh zsh osh)
readonly BASE_DIR='_tmp/count-procs'

run-case() {
### Run a test case with many shells

local num=$1
local code_str=$2

echo
echo "==="
echo "$code_str"
echo "==="

local base_dir=$BASE_DIR

for sh in "${SHELLS[@]}"; do
local out_prefix=$base_dir/$num-$sh
echo "--- $sh ---"
local out_prefix=$RAW_DIR/$num-$sh
echo "--- $sh"
count-procs $out_prefix $sh "$code_str"
done

return
echo "Process counts"

for sh in "${SHELLS[@]}"; do
echo "--- $sh ---"
ls $base_dir/$sh | wc -l
done
}

print-cases() {
# format: number, whitespace, then an arbitrary code string
egrep -v '^[[:space:]]*(#|$)' <<EOF
# 1 process of course
# builtin
echo hi
# external command
date
# two external commands
date; date
# dash and zsh somehow optimize this to 1
(echo hi)
# command sub
echo \$(ls)
echo \$(date)
# command sub with builtin
echo \$(echo hi)
# command sub with useless subshell (some scripts use this)
echo \$( ( date ) )
# command sub with other subshell
echo \$( ( date ); echo hi )
# 2 processes for all shells
( echo hi ); echo done
# 3 processes
ls | wc -l
date | wc -l
# every shell does 3
echo a | wc -l
Expand All @@ -85,14 +88,14 @@ echo a | wc -l
command echo a | wc -l
# bash does 4 here!
command ls / | wc -l
command date | wc -l
# 3 processes for all?
# osh gives FIVE??? But others give 3. That's bad.
( ls ) | wc -l
( date ) | wc -l
# 3 processes for all shells except zsh and osh, which have shopt -s lastpipe!
ls | read x
date | read x
# osh has 3, but should be 2 like zsh?
# hm how can zsh do 2 here? That seems impossible.
Expand All @@ -104,7 +107,7 @@ ls | read x
{ echo a; echo b; } | wc -l; echo done
# this is all over the map too. 3 4 4 2.
{ echo a; ls /; } | wc -l
{ echo a; date; } | wc -l
# osh does 4 when others do 3. So every shell optimizes this extra pipeline.
( echo a; echo b ) | wc -l
Expand All @@ -121,22 +124,26 @@ number-cases() {
}

readonly MAX_CASES=100
#readonly MAX_CASES=5

run-cases() {
mkdir -p $BASE_DIR
mkdir -p $RAW_DIR $BASE_DIR

shopt -s nullglob
rm -f -v $BASE_DIR/*
rm -f -v $RAW_DIR/* $BASE_DIR/*

number-cases > $BASE_DIR/cases.txt
cat $BASE_DIR/cases.txt | head -n $MAX_CASES | while read -r num code_str; do
echo $num
echo "[$code_str]"
echo
echo '==='
echo "$num $code_str"
echo

run-case $num "$code_str"
done

ls -1 $BASE_DIR | tee $BASE_DIR/listing.txt
# omit total line
( cd $RAW_DIR && wc -l * ) | head -n -1 > $BASE_DIR/counts.txt
summarize
}

Expand All @@ -145,11 +152,14 @@ print-table() {
}

summarize() {
cat $BASE_DIR/listing.txt \
| egrep -o '^[0-9]+-[a-z]+' \
| sed 's/-/ /g' \
cat $BASE_DIR/counts.txt \
| print-table $BASE_DIR/cases.txt | tee $BASE_DIR/table.txt
}

# TODO:
# - assert failures
# - clean up output -- spaces not tabs for numbers
# - publish to 'toil'


"$@"
124 changes: 101 additions & 23 deletions test/count_procs.py
Expand Up @@ -6,77 +6,155 @@
Input looks like
01 osh
01 osh
01 dash
01 dash
01-dash
01-dash
01-osh
01-osh
01-osh
...
"""
from __future__ import print_function

import collections
import re
import sys

from core import ansi


def Color(i):
#return str(i)
return '*' * i
s = '^' * i
return '%6s' % s


# lines look like this:
#
# 554 01-osh.1234
# 553 01-osh.1235

WC_LINE = re.compile(r'''
\s*
(\d+) # number of lines
\s+
(\d{2}) # case ID
-
([a-z]+) # shell name
''', re.VERBOSE)

assert WC_LINE.match(' 68 01-ash.19610')

# TODO:
# - Print the test snippet somewhere

def main(argv):
code_strs = {}
with open(argv[1]) as f:
for line in f:
case_id, code_str = line.split(None, 1)
case_id, code_str = line.split(None, 1) # whitespace
code_strs[case_id] = code_str

cases = set()
shells = set()

d = collections.defaultdict(int)
num_procs = collections.defaultdict(int)
procs_by_shell = collections.defaultdict(int)

num_syscalls = collections.defaultdict(int)
syscalls_by_shell = collections.defaultdict(int)

for line in sys.stdin:
case, sh = line.split()
m = WC_LINE.match(line)
if not m:
raise RuntimeError('Invalid line %r' % line)
num_sys, case, sh = m.groups()
num_sys = int(num_sys)

cases.add(case)
shells.add(sh)

d[case, sh] += 1
num_procs[case, sh] += 1
num_syscalls[case, sh] += num_sys

procs_by_shell[sh] += 1
syscalls_by_shell[sh] += num_sys

f = sys.stdout

f.write("\t")
for sh in sorted(shells):
f.write(sh + "\t")
f.write('result\t')
f.write('code')
f.write("\n")
# Orders for shells
proc_sh = sorted(procs_by_shell, key=lambda sh: procs_by_shell[sh])
syscall_sh = sorted(syscalls_by_shell, key=lambda sh: syscalls_by_shell[sh])
#print(proc_sh)
#print(syscall_sh)

# Print Number of processes

f.write('Number of Processes Started, by shell and code string\n\n')

def WriteHeader(shells):
f.write("\t")
for sh in shells:
f.write("%6s\t" % sh)
f.write('osh>min\t')
f.write('code')
f.write("\n")

WriteHeader(proc_sh)

for case_id in sorted(cases):
for case_id in sorted(cases):
f.write(case_id + "\t")
min_procs = 20
for sh in sorted(shells):
num_procs = d[case_id, sh]
f.write(Color(num_procs) + "\t")
min_procs = min(num_procs, min_procs)
for sh in proc_sh:
n = num_procs[case_id, sh]
f.write(Color(n) + "\t")
min_procs = min(n, min_procs)

osh_count = d[case_id, 'osh']
osh_count = num_procs[case_id, 'osh']
if osh_count != min_procs:
f.write('%sx%s %d>%d\t' % (
ansi.RED + ansi.BOLD, ansi.RESET, osh_count, min_procs))
f.write('%d>%d\t' % (osh_count, min_procs))
else:
f.write('\t')

f.write(code_strs[case_id])
f.write("\n")

f.write("TOTAL\t")
for sh in proc_sh:
f.write('%6d\t' % procs_by_shell[sh])
f.write('\n\n')


#
# Print
#

f.write('Number of Syscalls\n\n')

WriteHeader(syscall_sh)

for case_id in sorted(cases):
f.write(case_id + "\t")
#min_procs = 20
for sh in syscall_sh:
n = num_syscalls[case_id, sh]
f.write('%6d\t' % n)
#min_procs = min(n, min_procs)

#osh_count = num_procs[case_id, 'osh']
if False: #osh_count != min_procs:
f.write('%sx%s %d>%d\t' % (L, R, osh_count, min_procs))
else:
f.write('\t')

f.write(code_strs[case_id])
f.write("\n")

f.write("TOTAL\t")
for sh in syscall_sh:
f.write('%6d\t' % syscalls_by_shell[sh])
f.write('\n\n')


if __name__ == '__main__':
try:
Expand Down

0 comments on commit 49dc6da

Please sign in to comment.