From 46dd613ecc3c4d2ff5495443cd60467e10a8c4b5 Mon Sep 17 00:00:00 2001 From: Andy Chu Date: Sat, 13 Oct 2018 10:42:17 -0700 Subject: [PATCH] [interactive] Initial implementation of $PS1. Also implement ${x@P} so we can test the PS1 evaluation. Co-authored-by: okay --- bin/oil.py | 15 +++-- core/id_kind.py | 14 ++++ core/lexer_gen.py | 1 + core/reader.py | 19 +++--- core/ui.py | 123 +++++++++++++++++++++++++++++++++- core/word_eval.py | 17 ++++- native/fastlex.c | 27 ++++++++ osh/lex.py | 16 ++++- osh/match.py | 7 ++ osh/osh.asdl | 3 +- osh/word_parse.py | 8 ++- spec/ps1-replacements.test.sh | 112 +++++++++++++++++++++++++++++++ test/spec.sh | 5 ++ 13 files changed, 343 insertions(+), 24 deletions(-) create mode 100644 spec/ps1-replacements.test.sh diff --git a/bin/oil.py b/bin/oil.py index 92b3cb30b7..50243d5474 100755 --- a/bin/oil.py +++ b/bin/oil.py @@ -86,10 +86,6 @@ def _tlog(msg): _tlog('after imports') -# bash --noprofile --norc uses 'bash-4.3$ ' -OSH_PS1 = 'osh$ ' - - def _ShowVersion(): util.ShowAppVersion('Oil') @@ -115,6 +111,9 @@ def _ShowVersion(): OSH_SPEC.LongFlag('--parser-mem-dump', args.Str) OSH_SPEC.LongFlag('--runtime-mem-dump', args.Str) +# For bash compatibility +OSH_SPEC.LongFlag('--norc') + builtin.AddOptionsToArgSpec(OSH_SPEC) @@ -197,6 +196,10 @@ def OshMain(argv0, argv, login_shell): if e.errno != errno.ENOENT: raise + # Needed in non-interactive shells for @P + prompt = ui.Prompt(arena, parse_ctx, ex) + ui.PROMPT = prompt + if opts.c is not None: arena.PushSource('') line_reader = reader.StringLineReader(opts.c, arena) @@ -204,7 +207,7 @@ def OshMain(argv0, argv, login_shell): exec_opts.interactive = True elif opts.i: # force interactive arena.PushSource('') - line_reader = reader.InteractiveLineReader(OSH_PS1, arena) + line_reader = reader.InteractiveLineReader(arena, prompt) exec_opts.interactive = True else: try: @@ -212,7 +215,7 @@ def OshMain(argv0, argv, login_shell): except IndexError: if sys.stdin.isatty(): arena.PushSource('') - line_reader = reader.InteractiveLineReader(OSH_PS1, arena) + line_reader = reader.InteractiveLineReader(arena, prompt) exec_opts.interactive = True else: arena.PushSource('') diff --git a/core/id_kind.py b/core/id_kind.py index 032cdb998c..d8a638415e 100755 --- a/core/id_kind.py +++ b/core/id_kind.py @@ -258,6 +258,15 @@ def AddKinds(spec): ('Plus', '+' ), ]) + # Statically parse @P, so @x etc. is an error. + spec.AddKindPairs('VOp0', [ + ('Q', '@Q'), # ${x@Q} for quoting + ('E', '@E'), + ('P', '@P'), # ${PS1@P} for prompt eval + ('A', '@A'), + ('a', '@a'), + ]) + # String removal ops spec.AddKindPairs('VOp1', [ ('Percent', '%' ), @@ -377,6 +386,11 @@ def AddKinds(spec): 'Eof', ]) + # For parsing prompt strings like PS1. + spec.AddKind('PS', [ + 'Subst', 'Octal3', 'LBrace', 'RBrace', 'Literals', 'BadBackslash', + ]) + # Shared between [[ and test/[. _UNARY_STR_CHARS = 'zn' # -z -n diff --git a/core/lexer_gen.py b/core/lexer_gen.py index 80c81a9ee2..88d1d345f3 100755 --- a/core/lexer_gen.py +++ b/core/lexer_gen.py @@ -356,6 +356,7 @@ def main(argv): TranslateOshLexer(lex.LEXER_DEF) TranslateSimpleLexer('MatchEchoToken', lex.ECHO_E_DEF) TranslateSimpleLexer('MatchGlobToken', lex.GLOB_DEF) + TranslateSimpleLexer('MatchPS1Token', lex.PS1_DEF) TranslateRegexToPredicate(lex.VAR_NAME_RE, 'IsValidVarName') TranslateRegexToPredicate(pretty.PLAIN_WORD_RE, 'IsPlainWord') diff --git a/core/reader.py b/core/reader.py index dedf5627a5..8c19430984 100644 --- a/core/reader.py +++ b/core/reader.py @@ -10,6 +10,7 @@ """ import cStringIO +import sys from core import util log = util.log @@ -39,27 +40,27 @@ def Reset(self): _PS2 = '> ' - class InteractiveLineReader(_Reader): - def __init__(self, ps1, arena): + def __init__(self, arena, prompt): _Reader.__init__(self, arena) - self.ps1 = ps1 - self.prompt_str = ps1 + self.prompt = prompt + self.prompt_str = '' + self.Reset() # initialize self.prompt_str def _GetLine(self): + sys.stderr.write(self.prompt_str) try: - ret = raw_input(self.prompt_str) + '\n' # newline required + ret = raw_input('') + '\n' # newline required except EOFError: ret = None - self.prompt_str = _PS2 + self.prompt_str = _PS2 # TODO: Do we need $PS2? Would be easy. return ret def Reset(self): """Call this after command execution, to free memory taken up by the lines, and reset prompt string back to PS1. """ - self.prompt_str = self.ps1 - # free vector... + self.prompt_str = self.prompt.PS1() class FileLineReader(_Reader): @@ -97,7 +98,7 @@ def StringLineReader(s, arena): class VirtualLineReader(_Reader): """Read from lines we already read from the OS. - + Used for here docs and aliases. """ diff --git a/core/ui.py b/core/ui.py index b4c49d8d77..485545eba5 100644 --- a/core/ui.py +++ b/core/ui.py @@ -10,6 +10,9 @@ """ from __future__ import print_function +import os +import pwd +import socket # socket.gethostname() import sys from asdl import const @@ -17,7 +20,14 @@ from asdl import format as fmt from core import dev from osh import ast_lib -from osh.meta import ast +from osh import match +from osh.meta import ast, runtime, Id + +value_e = runtime.value_e + + +# bash --noprofile --norc uses 'bash-4.3$ ' +DEFAULT_PS1 = 'osh$ ' def Clear(): @@ -70,7 +80,6 @@ def Write(self, msg, *args): class TestStatusLine(object): - def __init__(self): pass @@ -81,6 +90,116 @@ def Write(self, msg, *args): print('\t' + msg) +# +# Prompt handling +# + +# Global instance set by main(). TODO: Use dependency injection. +PROMPT = None + +# NOTE: word_compile._ONE_CHAR has some of the same stuff. +_ONE_CHAR = { + 'a' : '\a', + 'e' : '\x1b', + '\\' : '\\', +} + + +def _GetCurrentUserName(): + uid = os.getuid() # Does it make sense to cache this somewhere? + try: + e = pwd.getpwuid(uid) + except KeyError: + return "" % uid + else: + return e.pw_name + + +class Prompt(object): + def __init__(self, arena, parse_ctx, ex): + self.arena = arena + self.parse_ctx = parse_ctx + self.ex = ex + + self.parse_cache = {} # PS1 value -> CompoundWord. + + def _ReplaceBackslashCodes(self, s): + ret = [] + non_printing = 0 + for id_, value in match.PS1_LEXER.Tokens(s): + # BadBacklash means they should have escaped with \\, but we can't + # make this an error. + if id_ in (Id.PS_Literals, Id.PS_BadBackslash): + ret.append(value) + + elif id_ == Id.PS_Octal3: + i = int(value[1:], 8) + ret.append(chr(i % 256)) + + elif id_ == Id.PS_LBrace: + non_printing += 1 + + elif id_ == Id.PS_RBrace: + non_printing -= 1 + + elif id_ == Id.PS_Subst: # \u \h \w etc. + char = value[1:] + if char == 'u': + r = _GetCurrentUserName() + + elif char == 'h': + r = socket.gethostname() + + elif char == 'w': + val = self.ex.mem.GetVar('PWD') + if val.tag == value_e.Str: + r = val.s + else: + r = '' + + elif char in _ONE_CHAR: + r = _ONE_CHAR[char] + + else: + raise NotImplementedError(char) + + ret.append(r) + + else: + raise AssertionError('Invalid token %r' % id_) + + return ''.join(ret) + + def PS1(self): + val = self.ex.mem.GetVar('PS1') + return self.EvalPS1(val) + + def EvalPS1(self, val): + if val.tag != value_e.Str: + return DEFAULT_PS1 + + ps1_str = val.s + + # NOTE: This is copied from the PS4 logic in Tracer. + try: + ps1_word = self.parse_cache[ps1_str] + except KeyError: + w_parser = self.parse_ctx.MakeWordParserForPlugin(ps1_str, self.arena) + + try: + ps1_word = w_parser.ReadPS() + except Exception as e: + error_str = '' + t = ast.token(Id.Lit_Chars, error_str, const.NO_INTEGER) + ps1_word = ast.CompoundWord([ast.LiteralPart(t)]) + + self.parse_cache[ps1_str] = ps1_word + + # e.g. "${debian_chroot}\u" -> '\u' + val2 = self.ex.word_ev.EvalWordToString(ps1_word) + return self._ReplaceBackslashCodes(val2.s) + + def PrintFilenameAndLine(span_id, arena, f=sys.stderr): line_span = arena.GetLineSpan(span_id) line_id = line_span.line_id diff --git a/core/word_eval.py b/core/word_eval.py index 3d4471b911..c385fc8170 100644 --- a/core/word_eval.py +++ b/core/word_eval.py @@ -7,13 +7,14 @@ from core import braces from core import expr_eval -from core import libstr from core import glob_ +from core import libstr from core import state from core import word_compile +from core import ui from core import util -from osh.meta import Id, Kind, LookupKind, ast, runtime +from osh.meta import ast, runtime, Id, Kind, LookupKind from osh import match word_e = ast.word_e @@ -137,6 +138,7 @@ def __init__(self, mem, exec_opts, splitter, arena): self.mem = mem # for $HOME, $1, etc. self.exec_opts = exec_opts # for nounset self.splitter = splitter + self.globber = glob_.Globber(exec_opts) # NOTE: Executor also instantiates one. self.arith_ev = expr_eval.ArithEvaluator(mem, exec_opts, self, arena) @@ -557,7 +559,16 @@ def _EvalBracedVarSub(self, part, part_vals, quoted): elif part.suffix_op: op = part.suffix_op - if op.tag == suffix_op_e.StringUnary: + if op.tag == suffix_op_e.StringNullary: + if op.op_id == Id.VOp0_P: + # TODO: Use dependency injection + #val = self.prompt._EvalPS1(val) + prompt = ui.PROMPT.EvalPS1(val) + val = runtime.Str(prompt) + else: + raise NotImplementedError(op.op_id) + + elif op.tag == suffix_op_e.StringUnary: if LookupKind(part.suffix_op.op_id) == Kind.VTest: # TODO: Change style to: # if self._ApplyTestOp(...) diff --git a/native/fastlex.c b/native/fastlex.c index bb91f68f81..0216bfef93 100644 --- a/native/fastlex.c +++ b/native/fastlex.c @@ -100,6 +100,31 @@ fastlex_MatchGlobToken(PyObject *self, PyObject *args) { return Py_BuildValue("(ii)", id, end_pos); } +static PyObject * +fastlex_MatchPS1Token(PyObject *self, PyObject *args) { + unsigned char* line; + int line_len; + + int start_pos; + if (!PyArg_ParseTuple(args, "s#i", &line, &line_len, &start_pos)) { + return NULL; + } + + // Bounds checking. + if (start_pos > line_len) { + PyErr_Format(PyExc_ValueError, + "Invalid MatchPS1Token call (start_pos = %d, line_len = %d)", + start_pos, line_len); + return NULL; + } + + int id; + int end_pos; + MatchPS1Token(line, line_len, start_pos, &id, &end_pos); + return Py_BuildValue("(ii)", id, end_pos); +} + + static PyObject * fastlex_IsValidVarName(PyObject *self, PyObject *args) { const char *name; @@ -130,6 +155,8 @@ static PyMethodDef methods[] = { "(line, start_pos) -> (id, end_pos)."}, {"MatchGlobToken", fastlex_MatchGlobToken, METH_VARARGS, "(line, start_pos) -> (id, end_pos)."}, + {"MatchPS1Token", fastlex_MatchPS1Token, METH_VARARGS, + "(line, start_pos) -> (id, end_pos)."}, {"IsValidVarName", fastlex_IsValidVarName, METH_VARARGS, "Is it a valid var name?"}, {"IsPlainWord", fastlex_IsPlainWord, METH_VARARGS, diff --git a/osh/lex.py b/osh/lex.py index 2031b73f7e..27c079f020 100644 --- a/osh/lex.py +++ b/osh/lex.py @@ -445,13 +445,26 @@ def IsKeyword(name): R(r'[^\\\0]+', Id.Char_Literals), ] +OCTAL3_RE = r'\\[0-7]{1,3}' + +# https://www.gnu.org/software/bash/manual/html_node/Controlling-the-Prompt.html#Controlling-the-Prompt +PS1_DEF = [ + R(OCTAL3_RE, Id.PS_Octal3), + R(r'\\[adehHjlnrstT@AuvVwW!#\\]', Id.PS_Subst), + C(r'\[', Id.PS_LBrace), # non-printing + C(r'\]', Id.PS_RBrace), + R(r'[^\\\0]+', Id.PS_Literals), + # e.g. \x is not a valid escape. + C('\\', Id.PS_BadBackslash), +] + # NOTE: Id.Ignored_LineCont is also not supported here, even though the whole # point of it is that supports other backslash escapes like \n! It just # becomes a regular backslash. LEXER_DEF[lex_mode_e.DOLLAR_SQ] = _C_STRING_COMMON + [ # Silly difference! In echo -e, the syntax is \0377, but here it's $'\377', # with no leading 0. - R(r'\\[0-7]{1,3}', Id.Char_Octal3), + R(OCTAL3_RE, Id.Char_Octal3), # ' is escaped in $'' mode, but not echo -e. Ditto fr ", not sure why. C(r"\'", Id.Char_OneChar), @@ -490,6 +503,7 @@ def IsKeyword(name): LEXER_DEF[lex_mode_e.VS_2] = \ ID_SPEC.LexerPairs(Kind.VTest) + \ + ID_SPEC.LexerPairs(Kind.VOp0) + \ ID_SPEC.LexerPairs(Kind.VOp1) + \ ID_SPEC.LexerPairs(Kind.VOp2) + [ C('}', Id.Right_VarSub), diff --git a/osh/match.py b/osh/match.py index 38090eb741..8a3d3b1e0e 100644 --- a/osh/match.py +++ b/osh/match.py @@ -109,16 +109,22 @@ def _MatchGlobToken_Fast(line, start_pos): tok_type, end_pos = fastlex.MatchGlobToken(line, start_pos) return IdInstance(tok_type), end_pos +def _MatchPS1Token_Fast(line, start_pos): + """Returns (id, end_pos).""" + tok_type, end_pos = fastlex.MatchPS1Token(line, start_pos) + return IdInstance(tok_type), end_pos if fastlex: MATCHER = _MatchOshToken_Fast ECHO_MATCHER = _MatchEchoToken_Fast GLOB_MATCHER = _MatchGlobToken_Fast + PS1_MATCHER = _MatchPS1Token_Fast IsValidVarName = fastlex.IsValidVarName else: MATCHER = _MatchOshToken_Slow(lex.LEXER_DEF) ECHO_MATCHER = _MatchTokenSlow(lex.ECHO_E_DEF) GLOB_MATCHER = _MatchTokenSlow(lex.GLOB_DEF) + PS1_MATCHER = _MatchTokenSlow(lex.PS1_DEF) # Used by osh/cmd_parse.py to validate for loop name. Note it must be # anchored on the right. @@ -129,3 +135,4 @@ def IsValidVarName(s): ECHO_LEXER = SimpleLexer(ECHO_MATCHER) GLOB_LEXER = SimpleLexer(GLOB_MATCHER) +PS1_LEXER = SimpleLexer(PS1_MATCHER) diff --git a/osh/osh.asdl b/osh/osh.asdl index 2266bc18c7..ded5dafef6 100644 --- a/osh/osh.asdl +++ b/osh/osh.asdl @@ -52,7 +52,8 @@ module osh | ArrayIndex(arith_expr expr) suffix_op = - StringUnary(id op_id, word arg_word) -- e.g. ${v:-default} + StringNullary(id op_id) -- ${x@Q} + | StringUnary(id op_id, word arg_word) -- e.g. ${v:-default} -- TODO: token for / to attribute errors | PatSub(word pat, word? replace, id replace_mode) -- begin is optional with ${array::1} diff --git a/osh/word_parse.py b/osh/word_parse.py index cc043e4f3b..273ac8fd4a 100644 --- a/osh/word_parse.py +++ b/osh/word_parse.py @@ -269,8 +269,6 @@ def _ParseVarExpr(self, arg_lex_mode): if self.token_type == Id.Right_VarSub: return part # no ops - # Or maybe this is a VarOpKind - op_kind = self.token_kind if op_kind == Kind.VTest: @@ -280,6 +278,12 @@ def _ParseVarExpr(self, arg_lex_mode): part.suffix_op = ast.StringUnary(op_id, arg_word) + elif op_kind == Kind.VOp0: + op_id = self.token_type + part.suffix_op = ast.StringNullary(op_id) + self._Next(lex_mode_e.VS_2) # Expecting } + self._Peek() + elif op_kind == Kind.VOp1: op_id = self.token_type arg_word = self._ReadVarOpArg(arg_lex_mode) diff --git a/spec/ps1-replacements.test.sh b/spec/ps1-replacements.test.sh new file mode 100644 index 0000000000..b7b00459ba --- /dev/null +++ b/spec/ps1-replacements.test.sh @@ -0,0 +1,112 @@ +#!/usr/bin/env bash +# +# For testing the Python sketch + +#### sh -i +echo 'echo foo' | PS1='$ ' $SH --norc -i +## STDOUT: +foo +## END +## STDERR: +$ echo foo +$ exit +## END + +#### [] are non-printing +PS1='\[foo\]$' +echo "${PS1@P}" +## STDOUT: +foo$ +## END + +#### \1004 +PS1='\1004$' +echo "${PS1@P}" +## STDOUT: +@4$ +## END + +#### \001 octal literals are supported +PS1='[\045]' +echo "${PS1@P}" +## STDOUT: +[%] +## END + +#### \555 is beyond max octal byte of \377 and wrapped to m +PS1='\555$' +echo "${PS1@P}" +## STDOUT: +m$ +## END + +#### \x55 hex literals not supported +PS1='[\x55]' +echo "${PS1@P}" +## STDOUT: +[\x55] +## END + +#### Single backslash +PS1='\' +echo "${PS1@P}" +## BUG bash stdout-json: "\\\u0002\n" +## STDOUT: +\ +## END + +#### Escaped backslash +PS1='\\' +echo "${PS1@P}" +## BUG bash stdout-json: "\\\u0002\n" +## STDOUT: +\ +## END + +#### \0001 octal literals are not supported +PS1='[\0455]' +echo "${PS1@P}" +## STDOUT: +[%5] +## END + +#### \u0001 unicode literals not supported +PS1='[\u0001]' +USER=$(whoami) +test "${PS1@P}" = "[${USER}0001]" +echo status=$? +## STDOUT: +status=0 +## END + +#### constant string +PS1='$ ' +echo "${PS1@P}" +## STDOUT: +$ +## END + +#### hostname +PS1='\h ' +test "${PS1@P}" = "$(hostname) " +echo status=$? +## STDOUT: +status=0 +## END + +#### username +PS1='\u ' +USER=$(whoami) +test "${PS1@P}" = "${USER} " +echo status=$? +## STDOUT: +status=0 +## END + +#### current working dir +PS1='\w ' +test "${PS1@P}" = "${PWD} " +echo status=$? +## STDOUT: +status=0 +## END diff --git a/test/spec.sh b/test/spec.sh index 764474d1f4..1a47f9c075 100755 --- a/test/spec.sh +++ b/test/spec.sh @@ -207,6 +207,11 @@ smoke() { sh-spec spec/smoke.test.sh ${REF_SHELLS[@]} $OSH_LIST "$@" } +ps1-replacements() { + sh-spec spec/ps1-replacements.test.sh --osh-failures-allowed 1 \ + $BASH $OSH_LIST "$@" +} + osh-only() { # 2 failures until we build in a JSON encoder. sh-spec spec/osh-only.test.sh --osh-failures-allowed 2 \