View
@@ -182,7 +182,7 @@ def OshMain(argv, login_shell):
spec.LongFlag('--help')
spec.LongFlag('--version')
spec.LongFlag('--ast-format',
['text', 'abbrev-text', 'html', 'abbrev-html', 'oheap'],
['text', 'abbrev-text', 'html', 'abbrev-html', 'oheap', 'none'],
default='abbrev-text')
spec.LongFlag('--show-ast') # execute and show
spec.LongFlag('--fix')
@@ -352,8 +352,11 @@ def OshMain(argv, login_shell):
if exec_opts.noexec:
do_exec = False
if exec_opts.noexec or opts.show_ast: # -n shows the AST
if opts.ast_format == 'oheap':
# -n prints AST, --show-ast prints and executes
if exec_opts.noexec or opts.show_ast:
if opts.ast_format == 'none':
print('AST not printed.', file=sys.stderr)
elif opts.ast_format == 'oheap':
# TODO: Make this a separate flag?
if sys.stdout.isatty():
raise RuntimeError('ERROR: Not dumping binary data to a TTY.')
View
@@ -1,4 +1,5 @@
#!/usr/bin/env python
from __future__ import print_function
# Copyright 2016 Andy Chu. All rights reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -33,16 +34,19 @@ def testTokens(self):
print(Kind.Eof)
print(Kind.Left)
print('--')
num_kinds = 0
for name in dir(Kind):
if name[0].isupper():
print(name, getattr(Kind, name))
num_kinds += 1
print('Number of Kinds:', num_kinds)
# 233 out of 256 tokens now
print('Number of IDs:', len(id_kind._ID_NAMES))
# Make sure we're not exporting too much
print(dir(id_kind))
# 206 out of 256 tokens now
print(len(id_kind._ID_NAMES))
t = ast.token(Id.Arith_Plus, '+')
self.assertEqual(Kind.Arith, LookupKind(t.id))
t = ast.token(Id.Arith_CaretEqual, '^=')
View
@@ -6,6 +6,25 @@
TODO:
- \0 should be Id.Op_Newline or Id.WS_Newline. And then the higher level Lexer
should return the Id.Eof_Real token, as it does now.
Remaining constructs:
Case terminators:
;;& Op_DSemiAmp for case
;& Op_Semi
Left Index:
_VAR_NAME_RE + '\[' Lit_LeftIndexLikeOpen
]= Lit_LeftIndexLikeClose
Indexed array and Associative array literals:
declare -A a=([key]=value [key2]=value2)
declare -a a=([1 + 2]=value [3 + 4]=value2) # parsed!
Lit_LBracket Lit_RBracketEqual
Left_Bracket, Right_BracketEqual?
Op_LBracket Op_RBracketEqual
"""
from core.id_kind import Id, Kind, ID_SPEC
@@ -248,7 +267,7 @@ def IsKeyword(name):
C('|', Id.Op_Pipe),
C(')', Id.Op_RParen), # maybe be translated to Id.ExtGlob_RParen
C('\0', Id.Eof_Real),
R('.', Id.Lit_Chars), # everything else is literal
R('.', Id.Lit_Other), # everything else is literal
]
LEXER_DEF[LexMode.BASH_REGEX] = [
View
@@ -15,10 +15,11 @@
-- * case fallthrough ;& and ;;&
-- Represented but Not Parsed:
-- * LeftIndex -- LHS of assignment
-- * LeftIndex -- a[foo]=bar (the arithmetic version is parsed)
-- * ArrayPair -- ([foo]=bar)
-- Parsed but Not Implemented
-- * extended glob
-- * Process sub -- <() and >()
-- * <>, >| redirects
View
@@ -64,8 +64,10 @@ all() {
# Just the parser
parser() {
# I'm counting brace detection/expansion here because it doesn't depend on
# runtime info.
echo 'Lexer/Parser'
wc -l osh/{*_parse.py,lex.py,parse_lib.py} core/word.py | sort -n
wc -l osh/{*_parse.py,lex.py,parse_lib.py} core/{word,braces}.py | sort -n
echo
echo 'AST and IDs'
@@ -74,6 +76,11 @@ parser() {
echo 'Common Algorithms'
wc -l core/{tdop,lexer}.py | sort -n
echo
echo 'Utilities'
wc -l core/{alloc,ui,reader}.py | sort -n
echo
}
# Stuff we might need to hand-port
@@ -83,9 +90,18 @@ parser-port() {
runtime() {
# NOTE: braces.py contains both parsing and runtime. It is a middle stage.
echo 'Core'
wc -l \
core/*_{exec,eval}.py core/{builtin,glob_,process,state}.py \
core/runtime.asdl | sort -n
core/*_{exec,eval}.py core/{process,state}.py core/runtime.asdl | sort -n
echo
echo 'Builtins'
wc -l core/{builtin,test_builtin}.py
echo
echo 'Libraries'
wc -l core/{args,glob_}.py | sort -n
echo
}
# count instructions, for fun
View
@@ -762,7 +762,7 @@ def WriteRow(self, i, line_num, row, desc):
def Options():
"""Returns an option parser instance."""
p = optparse.OptionParser('test_sh.py [options] TEST_FILE shell...')
p = optparse.OptionParser('sh_spec.py [options] TEST_FILE shell...')
p.add_option(
'-v', '--verbose', dest='verbose', action='store_true', default=False,
help='Show details about test execution')
View
@@ -1,7 +1,7 @@
#!/bin/bash
#
# Usage:
# ./shebang-is-shell.sh PATH
# ./shebang.sh is-shell PATH
# Test if the first line ends with 'sh'.
is-shell() {
View
@@ -189,6 +189,11 @@ all-manifests() {
# Operating Systems
#
# Skip the autoconf stuff here. Could skip it elsewhere too.
src=~/src/freebsd-11.1/usr/src
_manifest freebsd-11.1 $src \
$(find $src -name '*.sh' -a ! -name 'ltmain.sh' -a -printf '%P\n')
_sh-manifest ~/git/other/minix
_sh-manifest ~/git/other/illumos-gate
_sh-manifest ~/git/other/daemontools-encore
@@ -405,6 +410,30 @@ count-lines() {
tr '\n' '\0' | wc -l --files0-from - | sort -n
}
grep-features1() {
# Hm only 608 files out of 10,000 use a[x]=
# But it is used in
# /home/andy/src/linux-4.8.7/scripts/decode_stacktrace.sh
# portage, bash-completion, uses it
time abspaths | grep -v ltmain.sh |
xargs egrep '^[[:space:]]*[a-zA-Z0-9]+\[.*\]='
}
grep-features2() {
# Outside of illumos/ast/esoteric, there's only one real usage of associative
# array literals!
# /home/andy/git/other/tensorflow/tensorflow/tools/ci_build/builds/pip.sh: WHL_TAGS=(["2.7"]="cp27-none" ["3.4"]="cp34-cp34m" ["3.5"]="cp35-cp35m")
time abspaths | grep -v ltmain.sh |
xargs grep -F '=(['
}
grep-features3() {
# Wow this is ONLY used in a handful of files in bash-completions! And tests.
# That might be enough to justify it.
time abspaths | grep -v ltmain.sh |
xargs grep -F ';&'
}
# Takes ~15 seconds for 8,000+ files.
#
# NOTE: APKBUILD don't have shebang lines! So there are a bunch of false
@@ -427,6 +456,57 @@ find-tracebacks() {
xargs grep -l 'Traceback'
}
find-with-shebang() {
local dir=$1
# Look for files without an extension that have shell shebang lines.
# Bad things about find:
# * -regextype is part of the expression that always returns true, and has a
# side effect that only affects later clauses!
# * there are TEN different kinds
# * emacs is the default regex type!
# * -regex matches the whole path, whereas -name only matches the name
# - should be name ~ /regex/ and path ~ /regex/
# - there is no way to search just the name for a regex
# * no character classes in the default type
#
# https://www.gnu.org/software/findutils/manual/html_node/find_html/Regular-Expressions.html#Regular-Expressions
# The regex matches the whole path, e.g. so freebsd-11.1 must be matched.
# What might be faster here is to find all the executables first, then put
# them in a text file. test/shebang.sh can be invoked with stdin as a path
# list and filter them. It's not horribly slow though.
# Looking for *.sh misses 590 files in FreeBSD. There are 1088 .sh files.
# NOTE: Should get rid of most 'configure' scripts?
time find $dir \
\( -name .git -a -prune \) -o \
\( -regex '.+/[a-zA-Z0-9_\-]+' -a \
-type f -a \
-executable -a \
-exec test/shebang.sh is-shell {} ';' -a \
-printf '%p\n' \)
}
gentoo() {
# 38,000 ebuild files
local src
src=~/git/gentoo/gentoo
# 2M lines, because there are a lot of duplicate versions.
time find $src -name '*.ebuild' -a -print0 |
wc -l --files0-from - | sort -n
return
_manifest distro/gentoo $src \
$(find $src . -name '*.ebuild')
}
#
# Find Biggest Shell Scripts in Aboriginal Source Tarballs
#
View
@@ -40,7 +40,7 @@ def CreateOptionsParser():
p.add_option(
'--def', dest='defs', metavar="'NAME VALUE'", type='str',
default=[], action='append',
help='Define varaibles for use in format strings')
help='Define variables for use in format strings')
p.add_option(
'--as-percent', dest='percent_cols', metavar="COLNAME", type='str',
@@ -97,7 +97,8 @@ def PrintRow(row, col_names, col_formats, defs, percent_cols):
except ValueError:
pass
# Does it look lik an int?
# Does it look like an int? Format like 3,000.
# TODO: Put this before the float test?
try:
cell_int = int(cell)
cell_str = '{:,}'.format(cell_int)