Skip to content

Commit

Permalink
Fix isUpper, isLower for strings with non-alpha unicode chars
Browse files Browse the repository at this point in the history
Fixes nim-lang#7963.

This commit changes the behavior of isUpper and isLower in the
unicode module -- only for the case where the input is a *string*
with a mix of alphabetic and non-alphabetic unicode characters.

The new behavior mimics the Python isupper and islower behavior
i.e. non-alphabetic chars are ignored when checking if the whole
string is upper-case or lower-case.

    Before: doAssert(not "A B".isUpper) passed
    Now:    doAssert "A B".isUpper passes

.. and the similar for isLower.

To get the old behavior (the old behavior checked if the whole string
contained only alphabetic unicode characters AND if all characters
were upper/lower case), simply do:

    str.isAlpha and str.isUpper
    str.isAlpha and str.isLower

(where str is a variable of type string)
  • Loading branch information
kaushalmodi committed Jun 7, 2018
1 parent bf394ed commit a0a105b
Showing 1 changed file with 52 additions and 11 deletions.
63 changes: 52 additions & 11 deletions lib/pure/unicode.nim
Original file line number Diff line number Diff line change
Expand Up @@ -1392,7 +1392,7 @@ proc isCombining*(c: Rune): bool {.rtl, extern: "nuc$1", procvar.} =
(c >= 0xfe20 and c <= 0xfe2f))

template runeCheck(s, runeProc) =
## Common code for rune.isLower, rune.isUpper, etc
## Common code for isAlpha and isSpace.
result = if len(s) == 0: false else: true

var
Expand All @@ -1403,16 +1403,6 @@ template runeCheck(s, runeProc) =
fastRuneAt(s, i, rune, doInc=true)
result = runeProc(rune) and result

proc isUpper*(s: string): bool {.noSideEffect, procvar,
rtl, extern: "nuc$1Str".} =
## Returns true iff `s` contains all upper case unicode characters.
runeCheck(s, isUpper)

proc isLower*(s: string): bool {.noSideEffect, procvar,
rtl, extern: "nuc$1Str".} =
## Returns true iff `s` contains all lower case unicode characters.
runeCheck(s, isLower)

proc isAlpha*(s: string): bool {.noSideEffect, procvar,
rtl, extern: "nuc$1Str".} =
## Returns true iff `s` contains all alphabetic unicode characters.
Expand All @@ -1423,6 +1413,43 @@ proc isSpace*(s: string): bool {.noSideEffect, procvar,
## Returns true iff `s` contains all whitespace unicode characters.
runeCheck(s, isWhiteSpace)

template runeCaseCheck(s, runeProc) =
## Common code for isLower and isUpper.
if len(s) == 0: return false

var
i = 0
rune: Rune
hasAtleastOneAlphaRune = false
doCaseCheck: bool

while i < len(s):
fastRuneAt(s, i, rune, doInc=true)
doCaseCheck = isAlpha(rune)
if not hasAtleastOneAlphaRune:
hasAtleastOneAlphaRune = doCaseCheck
if doCaseCheck and (not runeProc(rune)):
return false
return hasAtleastOneAlphaRune

proc isUpper*(s: string): bool {.noSideEffect, procvar,
rtl, extern: "nuc$1Str".} =
## Checks if all unicode alphabetic characters in `s` are upper case.
##
## Returns true if all unicode alphabetic characters in `s` are upper case
## and there is at least one character in `s`.
## Returns false if none of the unicode characters in `s` are alphabetic.
runeCaseCheck(s, isUpper)

proc isLower*(s: string): bool {.noSideEffect, procvar,
rtl, extern: "nuc$1Str".} =
## Checks if all unicode alphabetic characters in `s` are lower case.
##
## Returns true if all unicode alphabetic characters in `s` are lower case
## and there is at least one character in `s`.
## Returns false if none of the unicode characters in `s` are alphabetic.
runeCaseCheck(s, isLower)

template convertRune(s, runeProc) =
## Convert runes in `s` using `runeProc` as the converter.
result = newString(len(s))
Expand Down Expand Up @@ -1760,21 +1787,35 @@ when isMainModule:
doAssert(not isLower("Γ"))
doAssert(not isLower("4"))
doAssert(not isLower(""))
doAssert(not isLower(' '.Rune))

doAssert isLower("abcdγ")
doAssert(not isLower("abCDΓ"))
doAssert(not isLower("33aaΓ"))

doAssert isLower("a b")
doAssert isLower("ab?!")
doAssert isLower("1, 2, 3 go!")
doAssert(not isLower(" "))
doAssert(not isLower("(*&#@(^#$✓ ")) # None of the string chars are alphabets

doAssert isUpper("Γ")
doAssert(not isUpper("b"))
doAssert(not isUpper("α"))
doAssert(not isUpper(""))
doAssert(not isUpper(""))
doAssert(not isUpper(' '.Rune))

doAssert isUpper("ΑΒΓ")
doAssert(not isUpper("AAccβ"))
doAssert(not isUpper("A#$β"))

doAssert isUpper("A B")
doAssert isUpper("AB?!")
doAssert isUpper("1, 2, 3 GO!")
doAssert(not isUpper(" "))
doAssert(not isUpper("(*&#@(^#$✓ ")) # None of the string chars are alphabets

doAssert toUpper("Γ") == "Γ"
doAssert toUpper("b") == "B"
doAssert toUpper("α") == "Α"
Expand Down

0 comments on commit a0a105b

Please sign in to comment.