Skip to content

Commit

Permalink
Make isUpper (and variants) work for strings with non-alpha chars
Browse files Browse the repository at this point in the history
The other variants are isLower, isUpperAscii and isLowerAscii

Fixes nim-lang#7963.

This commit changes the behavior and signatures of:

- isUpper, isLower in the unicode module
- isUpperAscii, isLowerAscii in the strutils module

A second mandatory parameter skipNonAlpha is added to these 4 procs.

(This change affects only for the case where the input is a *string*.)

---

With skipNonAlpha set to true, the behavior mimics the Python isupper and
islower behavior i.e. non-alphabetic chars/runes are ignored when checking if
the string is upper-case or lower-case.

    Before this commit:

      doAssert(not isUpper("A B"))

    After this commit:

      doAssert(not isUpper("A B", false))    <-- old behavior
      doAssert isUpper("A B", true)

      Below two are equivalent:

                           isUpper("A B", true)

        isAlpha("A B") and isUpper("A B", false)

.. and the similar for other 3 procs.
  • Loading branch information
kaushalmodi committed Jun 8, 2018
1 parent 3e799d7 commit 155ab58
Show file tree
Hide file tree
Showing 3 changed files with 153 additions and 50 deletions.
6 changes: 6 additions & 0 deletions changelog.md
Expand Up @@ -42,6 +42,12 @@
- ``math.`mod` `` for floats now behaves the same as ``mod`` for integers
(previously it used floor division like Python). Use ``math.floorMod`` for the old behavior.

- For string inputs, ``unicode.isUpper`` and ``unicode.isLower`` now require a
second mandatory parameter ``skipNonAlpha``.

- For string inputs, ``strutils.isUpperAscii`` and ``strutils.isLowerAscii`` now
require a second mandatory parameter ``skipNonAlpha``.

#### Breaking changes in the compiler

- The undocumented ``#? braces`` parsing mode was removed.
Expand Down
83 changes: 63 additions & 20 deletions lib/pure/strutils.nim
Expand Up @@ -150,23 +150,52 @@ proc isSpaceAscii*(s: string): bool {.noSideEffect, procvar,
## characters and there is at least one character in `s`.
isImpl isSpaceAscii

proc isLowerAscii*(s: string): bool {.noSideEffect, procvar,
rtl, extern: "nsuIsLowerAsciiStr".} =
## Checks whether or not `s` contains all lower case characters.
template isCaseImpl(s, charProc, skipNonAlpha) =
var hasAtleastOneAlphaChar = false
if s.len == 0: return false
for c in s:
if skipNonAlpha:
var charIsAlpha = c.isAlphaAscii()
if not hasAtleastOneAlphaChar:
hasAtleastOneAlphaChar = charIsAlpha
if charIsAlpha and (not charProc(c)):
return false
else:
if not charProc(c):
return false
return if skipNonAlpha: hasAtleastOneAlphaChar else: true

proc isLowerAscii*(s: string, skipNonAlpha: bool): bool =
## Checks whether ``s`` is lower case.
##
## This checks ASCII characters only.
## Returns true if all characters in `s` are lower case
## and there is at least one character in `s`.
isImpl isLowerAscii
##
## If ``skipNonAlpha`` is true, returns true if all alphabetical
## characters in ``s`` are lower case. Returns false if none of the
## characters in ``s`` are alphabetical.
##
## If ``skipNonAlpha`` is false, returns true only if all characters
## in ``s`` are alphabetical and lower case.
##
## For either value of ``skipNonAlpha``, returns false if ``s`` is
## an empty string.
isCaseImpl(s, isLowerAscii, skipNonAlpha)

proc isUpperAscii*(s: string): bool {.noSideEffect, procvar,
rtl, extern: "nsuIsUpperAsciiStr".} =
## Checks whether or not `s` contains all upper case characters.
proc isUpperAscii*(s: string, skipNonAlpha: bool): bool =
## Checks whether ``s`` is upper case.
##
## This checks ASCII characters only.
## Returns true if all characters in `s` are upper case
## and there is at least one character in `s`.
isImpl isUpperAscii
##
## If ``skipNonAlpha`` is true, returns true if all alphabetical
## characters in ``s`` are upper case. Returns false if none of the
## characters in ``s`` are alphabetical.
##
## If ``skipNonAlpha`` is false, returns true only if all characters
## in ``s`` are alphabetical and upper case.
##
## For either value of ``skipNonAlpha``, returns false if ``s`` is
## an empty string.
isCaseImpl(s, isUpperAscii, skipNonAlpha)

proc toLowerAscii*(c: char): char {.noSideEffect, procvar,
rtl, extern: "nsuToLowerAsciiChar".} =
Expand Down Expand Up @@ -1863,7 +1892,7 @@ proc formatBiggestFloat*(f: BiggestFloat, format: FloatFormatMode = ffDefault,
of ffDefault:
{.emit: "`res` = `f`.toString();".}
of ffDecimal:
{.emit: "`res` = `f`.toFixed(`precision`);".}
{.emit: "`res` = `f`.to(`precision`);".}
of ffScientific:
{.emit: "`res` = `f`.toExponential(`precision`);".}
result = $res
Expand Down Expand Up @@ -2516,19 +2545,34 @@ when isMainModule:
doAssert(not isLowerAscii('A'))
doAssert(not isLowerAscii('5'))
doAssert(not isLowerAscii('&'))
doAssert(not isLowerAscii(' '))

doAssert isLowerAscii("abcd")
doAssert(not isLowerAscii("abCD"))
doAssert(not isLowerAscii("33aa"))
doAssert isLowerAscii("abcd", false)
doAssert(not isLowerAscii("33aa", false))
doAssert(not isLowerAscii("a b", false))

doAssert(not isLowerAscii("abCD", true))
doAssert isLowerAscii("33aa", true)
doAssert isLowerAscii("a b", true)
doAssert isLowerAscii("1, 2, 3 go!", true)
doAssert(not isLowerAscii(" ", true))
doAssert(not isLowerAscii("(*&#@(^#$ ", true)) # None of the string chars are alphabets

doAssert isUpperAscii('A')
doAssert(not isUpperAscii('b'))
doAssert(not isUpperAscii('5'))
doAssert(not isUpperAscii('%'))

doAssert isUpperAscii("ABC")
doAssert(not isUpperAscii("AAcc"))
doAssert(not isUpperAscii("A#$"))
doAssert isUpperAscii("ABC", false)
doAssert(not isUpperAscii("A#$", false))
doAssert(not isUpperAscii("A B", false))

doAssert(not isUpperAscii("AAcc", true))
doAssert isUpperAscii("A#$", true)
doAssert isUpperAscii("A B", true)
doAssert isUpperAscii("1, 2, 3 GO!", true)
doAssert(not isUpperAscii(" ", true))
doAssert(not isUpperAscii("(*&#@(^#$ ", true)) # None of the string chars are alphabets

doAssert rsplit("foo bar", seps=Whitespace) == @["foo", "bar"]
doAssert rsplit(" foo bar", seps=Whitespace, maxsplit=1) == @[" foo", "bar"]
Expand Down Expand Up @@ -2601,4 +2645,3 @@ bar
nonStaticTests()
staticTests()
static: staticTests()

114 changes: 84 additions & 30 deletions lib/pure/unicode.nim
Expand Up @@ -1392,7 +1392,7 @@ proc isCombining*(c: Rune): bool {.rtl, extern: "nuc$1", procvar.} =
(c >= 0xfe20 and c <= 0xfe2f))

template runeCheck(s, runeProc) =
## Common code for rune.isLower, rune.isUpper, etc
## Common code for isAlpha and isSpace.
result = if len(s) == 0: false else: true

var
Expand All @@ -1403,16 +1403,6 @@ template runeCheck(s, runeProc) =
fastRuneAt(s, i, rune, doInc=true)
result = runeProc(rune) and result

proc isUpper*(s: string): bool {.noSideEffect, procvar,
rtl, extern: "nuc$1Str".} =
## Returns true iff `s` contains all upper case unicode characters.
runeCheck(s, isUpper)

proc isLower*(s: string): bool {.noSideEffect, procvar,
rtl, extern: "nuc$1Str".} =
## Returns true iff `s` contains all lower case unicode characters.
runeCheck(s, isLower)

proc isAlpha*(s: string): bool {.noSideEffect, procvar,
rtl, extern: "nuc$1Str".} =
## Returns true iff `s` contains all alphabetic unicode characters.
Expand All @@ -1423,6 +1413,56 @@ proc isSpace*(s: string): bool {.noSideEffect, procvar,
## Returns true iff `s` contains all whitespace unicode characters.
runeCheck(s, isWhiteSpace)

template runeCaseCheck(s, runeProc, skipNonAlpha) =
## Common code for rune.isLower and rune.isUpper.
if len(s) == 0: return false

var
i = 0
rune: Rune
hasAtleastOneAlphaRune = false

while i < len(s):
fastRuneAt(s, i, rune, doInc=true)
if skipNonAlpha:
var runeIsAlpha = isAlpha(rune)
if not hasAtleastOneAlphaRune:
hasAtleastOneAlphaRune = runeIsAlpha
if runeIsAlpha and (not runeProc(rune)):
return false
else:
if not runeProc(rune):
return false
return if skipNonAlpha: hasAtleastOneAlphaRune else: true

proc isLower*(s: string, skipNonAlpha: bool): bool =
## Checks whether ``s`` is lower case.
##
## If ``skipNonAlpha`` is true, returns true if all alphabetical
## runes in ``s`` are lower case. Returns false if none of the
## runes in ``s`` are alphabetical.
##
## If ``skipNonAlpha`` is false, returns true only if all runes in
## ``s`` are alphabetical and lower case.
##
## For either value of ``skipNonAlpha``, returns false if ``s`` is
## an empty string.
runeCaseCheck(s, isLower, skipNonAlpha)

proc isUpper*(s: string, skipNonAlpha: bool): bool =
## Checks whether ``s`` is upper case.
##
## If ``skipNonAlpha`` is true, returns true if all alphabetical
## runes in ``s`` are upper case. Returns false if none of the
## runes in ``s`` are alphabetical.
##
## If ``skipNonAlpha`` is false, returns true only if all runes in
## ``s`` are alphabetical and upper case.
##
## For either value of ``skipNonAlpha``, returns false if ``s`` is
## an empty string.
runeCaseCheck(s, isUpper, skipNonAlpha)

template convertRune(s, runeProc) =
## Convert runes in `s` using `runeProc` as the converter.
result = newString(len(s))
Expand Down Expand Up @@ -1755,25 +1795,39 @@ when isMainModule:
doAssert(not isSpace(""))
doAssert(not isSpace("ΑΓc \td"))

doAssert isLower("a")
doAssert isLower("γ")
doAssert(not isLower("Γ"))
doAssert(not isLower("4"))
doAssert(not isLower(""))

doAssert isLower("abcdγ")
doAssert(not isLower("abCDΓ"))
doAssert(not isLower("33aaΓ"))

doAssert isUpper("Γ")
doAssert(not isUpper("b"))
doAssert(not isUpper("α"))
doAssert(not isUpper(""))
doAssert(not isUpper(""))

doAssert isUpper("ΑΒΓ")
doAssert(not isUpper("AAccβ"))
doAssert(not isUpper("A#$β"))
doAssert(not isLower(' '.Rune))

doAssert isLower("a", false)
doAssert isLower("γ", true)
doAssert(not isLower("Γ", false))
doAssert(not isLower("4", true))
doAssert(not isLower("", false))
doAssert isLower("abcdγ", false)
doAssert(not isLower("33aaΓ", false))
doAssert(not isLower("a b", false))

doAssert(not isLower("abCDΓ", true))
doAssert isLower("a b", true)
doAssert isLower("1, 2, 3 go!", true)
doAssert(not isLower(" ", true))
doAssert(not isLower("(*&#@(^#$✓ ", true)) # None of the string runes are alphabets

doAssert(not isUpper(' '.Rune))

doAssert isUpper("Γ", false)
doAssert(not isUpper("α", false))
doAssert(not isUpper("", false))
doAssert isUpper("ΑΒΓ", false)
doAssert(not isUpper("A#$β", false))
doAssert(not isUpper("A B", false))

doAssert(not isUpper("b", true))
doAssert(not isUpper("", true))
doAssert(not isUpper("AAccβ", true))
doAssert isUpper("A B", true)
doAssert isUpper("1, 2, 3 GO!", true)
doAssert(not isUpper(" ", true))
doAssert(not isUpper("(*&#@(^#$✓ ", true)) # None of the string runes are alphabets

doAssert toUpper("Γ") == "Γ"
doAssert toUpper("b") == "B"
Expand Down

0 comments on commit 155ab58

Please sign in to comment.