Skip to content
Permalink
Browse files

[types] osh/string_ops.py now passes.

  • Loading branch information
Andy Chu
Andy Chu committed Oct 31, 2019
1 parent 1be972f commit 6155a0f0e3bb286c52a67ddb2c0dce77f2eaa8cc
Showing with 27 additions and 4 deletions.
  1. +3 −1 native/libc.pyi
  2. +23 −1 osh/string_ops.py
  3. +1 −2 types/run.sh
@@ -1,4 +1,6 @@
from typing import List
from typing import List, Optional, Tuple

def gethostname() -> str: ...
def glob(pat: str) -> List[str]: ...
def fnmatch(pat: str, s: str) -> bool: ...
def regex_first_group_match(regex: str, s: str, pos: int) -> Optional[Tuple[int, int]]: ...
@@ -15,8 +15,13 @@

import libc

from typing import List, Tuple, TYPE_CHECKING
if TYPE_CHECKING:
from _devbuild.gen.syntax_asdl import suffix_op__Unary, suffix_op__PatSub


def Utf8Encode(code):
# type: (int) -> str
"""Return utf-8 encoded bytes from a unicode code point.
Based on https://stackoverflow.com/a/23502707
@@ -57,11 +62,13 @@ def Utf8Encode(code):


def _CheckContinuationByte(byte):
# type: (str) -> None
if (ord(byte) >> 6) != 0b10:
raise util.InvalidUtf8(INVALID_CONT)


def _Utf8CharLen(starting_byte):
# type: (int) -> int
if (starting_byte >> 7) == 0b0:
return 1
elif (starting_byte >> 5) == 0b110:
@@ -75,6 +82,7 @@ def _Utf8CharLen(starting_byte):


def _NextUtf8Char(s, i):
# type: (str, int) -> int
"""
Given a string and a byte offset, returns the byte position after
the character at this position. Usually this is the position of the
@@ -97,6 +105,7 @@ def _NextUtf8Char(s, i):


def _PreviousUtf8Char(s, i):
# type: (str, int) -> int
"""
Given a string and a byte offset, returns the position of the
character before that offset. To start (find the first byte of the
@@ -145,6 +154,7 @@ def _PreviousUtf8Char(s, i):


def CountUtf8Chars(s):
# type: (str) -> int
"""Returns the number of utf-8 characters in the byte string 's'.
TODO: Raise exception rather than returning a string, so we can set the exit
@@ -165,6 +175,7 @@ def CountUtf8Chars(s):


def AdvanceUtf8Chars(s, num_chars, byte_offset):
# type: (str, int, int) -> int
"""
Advance a certain number of UTF-8 chars, beginning with the given byte
offset. Returns a byte offset.
@@ -215,6 +226,7 @@ def AdvanceUtf8Chars(s, num_chars, byte_offset):
# - Compile time errors for [[:space:]] ?

def DoUnarySuffixOp(s, op, arg):
# type: (str, suffix_op__Unary, str) -> str
"""Helper for ${x#prefix} and family."""

# Fast path for constant strings.
@@ -334,6 +346,7 @@ def DoUnarySuffixOp(s, op, arg):


def _AllMatchPositions(s, regex):
# type: (str, str) -> List[Tuple[int, int]]
"""Returns a list of all (start, end) match positions of the regex against s.
(If there are no matches, it returns the empty list.)
@@ -352,6 +365,7 @@ def _AllMatchPositions(s, regex):


def _PatSubAll(s, regex, replace_str):
# type: (str, str, str) -> str
parts = []
prev_end = 0
for start, end in _AllMatchPositions(s, regex):
@@ -365,6 +379,8 @@ def _PatSubAll(s, regex, replace_str):
class GlobReplacer(object):

def __init__(self, regex, replace_str, slash_spid):
# type: (str, str, int) -> None

# TODO: It would be nice to cache the compilation of the regex here,
# instead of just the string. That would require more sophisticated use of
# the Python/C API in libc.c, which we might want to avoid.
@@ -373,9 +389,12 @@ def __init__(self, regex, replace_str, slash_spid):
self.slash_spid = slash_spid

def __repr__(self):
# type: () -> str
return '<_GlobReplacer regex %r r %r>' % (self.regex, self.replace_str)

def Replace(self, s, op):
# type: (str, suffix_op__PatSub) -> str

regex = '(%s)' % self.regex # make it a group

if op.replace_mode == Id.Lit_Slash:
@@ -400,6 +419,7 @@ def Replace(self, s, op):
# TODO: Replace with ShellQuoteOneLine? It may need more testing and
# optimization.
def ShellQuote(s):
# type: (str) -> str
"""Quote 's' in a way that can be reused as shell input.
It doesn't necessarily match bash byte-for-byte. IIRC bash isn't consistent
@@ -418,6 +438,8 @@ def ShellQuote(s):


def ShellQuoteOneLine(s):
# type: (str) -> str

# TODO: Could use a regex to speed this up
needs_dollar = False
for c in s:
@@ -438,6 +460,7 @@ def ShellQuoteOneLine(s):


def ShellQuoteB(s):
# type: (str) -> str
"""Quote by adding backslashes.
Used for autocompletion, so it's friendlier for display on the command line.
@@ -457,4 +480,3 @@ def ShellQuoteB(s):
# space because it separates words
return util.BackslashEscape(s, ' `~!$&*()[]{}\\|;\'"<>?')


@@ -69,12 +69,11 @@ iter-arith-asdl() {

typecheck-more-oil() {
#typecheck $flags osh/word_compile.py
#typecheck $flags osh/string_ops.py

local log=_tmp/typecheck-more-oil.txt

set +o errexit
typecheck $MYPY_FLAGS osh/glob_.py > $log
typecheck $MYPY_FLAGS osh/glob_.py osh/string_ops.py > $log

assert-one-error $log
}

0 comments on commit 6155a0f

Please sign in to comment.
You can’t perform that action at this time.