Skip to content

Commit

Permalink
Fix nim-lang#4305: Make split proc for set[char] consistent
Browse files Browse the repository at this point in the history
  • Loading branch information
jyapayne committed Jun 17, 2016
1 parent 9837b12 commit feb45fb
Showing 1 changed file with 63 additions and 70 deletions.
133 changes: 63 additions & 70 deletions lib/pure/strutils.nim
Expand Up @@ -326,17 +326,49 @@ proc toOctal*(c: char): string {.noSideEffect, rtl, extern: "nsuToOctal".} =
result[i] = chr(val mod 8 + ord('0'))
val = val div 8

proc substrEq(s: string, pos: int, substr: string): bool =
var i = 0
var length = substr.len
while i < length and s[pos+i] == substr[i]:
inc i

return i == length

# --------- Private templates for different split separators -----------

template stringHasSep(s: string, index: int, seps: set[char]): bool =
s[index] in seps

template stringHasSep(s: string, index: int, sep: char): bool =
s[index] == sep

template stringHasSep(s: string, index: int, sep: string): bool =
s.substrEq(index, sep)

template splitCommon(s, sep, maxsplit, sepLen) =
## Common code for split procedures
var last = 0
var splits = maxsplit

if len(s) > 0:
while last <= len(s):
var first = last
while last < len(s) and not stringHasSep(s, last, sep):
inc(last)
if splits == 0: last = len(s)
yield substr(s, first, last-1)
if splits == 0: break
dec(splits)
inc(last, sepLen)

iterator split*(s: string, seps: set[char] = Whitespace,
maxsplit: int = -1): string =
## Splits the string `s` into substrings using a group of separators.
##
## Substrings are separated by a substring containing only `seps`. Note
## that whole sequences of characters found in ``seps`` will be counted as
## a single split point and leading/trailing separators will be ignored.
## The following example:
## Substrings are separated by a substring containing only `seps`.
##
## .. code-block:: nim
## for word in split(" this is an example "):
## for word in split("this\lis an\texample"):
## writeLine(stdout, word)
##
## ...generates this output:
Expand All @@ -350,7 +382,7 @@ iterator split*(s: string, seps: set[char] = Whitespace,
## And the following code:
##
## .. code-block:: nim
## for word in split(";;this;is;an;;example;;;", {';'}):
## for word in split("this:is;an$example", {';', ':', '$'}):
## writeLine(stdout, word)
##
## ...produces the same output as the first example. The code:
Expand All @@ -371,26 +403,13 @@ iterator split*(s: string, seps: set[char] = Whitespace,
## "08"
## "08.398990"
##
var last = 0
var splits = maxsplit
assert(not ('\0' in seps))
while last < len(s):
while s[last] in seps: inc(last)
var first = last
while last < len(s) and s[last] notin seps: inc(last) # BUGFIX!
if first <= last-1:
if splits == 0: last = len(s)
yield substr(s, first, last-1)
if splits == 0: break
dec(splits)
splitCommon(s, seps, maxsplit, 1)

iterator split*(s: string, sep: char, maxsplit: int = -1): string =
## Splits the string `s` into substrings using a single separator.
##
## Substrings are separated by the character `sep`.
## Unlike the version of the iterator which accepts a set of separator
## characters, this proc will not coalesce groups of the
## separator, returning a string for each found character. The code:
## The code:
##
## .. code-block:: nim
## for word in split(";;this;is;an;;example;;;", ';'):
Expand All @@ -410,56 +429,27 @@ iterator split*(s: string, sep: char, maxsplit: int = -1): string =
## ""
## ""
##
var last = 0
var splits = maxsplit
assert('\0' != sep)
if len(s) > 0:
# `<=` is correct here for the edge cases!
while last <= len(s):
var first = last
while last < len(s) and s[last] != sep: inc(last)
if splits == 0: last = len(s)
yield substr(s, first, last-1)
if splits == 0: break
dec(splits)
inc(last)

proc substrEq(s: string, pos: int, substr: string): bool =
var i = 0
var length = substr.len
while i < length and s[pos+i] == substr[i]:
inc i

return i == length
splitCommon(s, sep, maxsplit, 1)

iterator split*(s: string, sep: string, maxsplit: int = -1): string =
## Splits the string `s` into substrings using a string separator.
##
## Substrings are separated by the string `sep`.
var last = 0
var splits = maxsplit

if len(s) > 0:
while last <= len(s):
var first = last
while last < len(s) and not s.substrEq(last, sep):
inc(last)
if splits == 0: last = len(s)
yield substr(s, first, last-1)
if splits == 0: break
dec(splits)
inc(last, sep.len)

# --------- Private templates for different rsplit separators -----------

template stringHasSep(s: string, index: int, seps: set[char]): bool =
s[index] in seps

template stringHasSep(s: string, index: int, sep: char): bool =
s[index] == sep
## The code:
##
## .. code-block:: nim
## for word in split("thisDATAisDATAcorrupted", "DATA"):
## writeLine(stdout, word)
##
## Results in:
##
## .. code-block::
## "this"
## "is"
## "corrupted"
##

template stringHasSep(s: string, index: int, sep: string): bool =
s.substrEq(index, sep)
splitCommon(s, sep, maxsplit, sep.len)

template rsplitCommon(s, sep, maxsplit, sepLen) =
## Common code for rsplit functions
Expand Down Expand Up @@ -2218,11 +2208,14 @@ bar
bar
""".unindent() == "foo\nfoo\nbar\n"

let s = " this is an example "
doAssert s.split() == @["this", "is", "an", "example"]
doAssert s.split(maxsplit=4) == @["this", "is", "an", "example"]
doAssert s.split(' ', maxsplit=4) == @["", "this", "", "", "is an example "]
doAssert s.split(" ", maxsplit=4) == @["", "this", "", "", "is an example "]
let s = " this is an example "
let s2 = ":this;is;an:example;;"

doAssert s.split() == @["", "this", "is", "an", "example", "", ""]
doAssert s2.split(seps={':', ';'}) == @["", "this", "is", "an", "example", "", ""]
doAssert s.split(maxsplit=4) == @["", "this", "is", "an", "example "]
doAssert s.split(' ', maxsplit=1) == @["", "this is an example "]
doAssert s.split(" ", maxsplit=4) == @["", "this", "is", "an", "example "]

block: # formatEng tests
doAssert formatEng(0, 2, trim=false) == "0.00"
Expand Down

0 comments on commit feb45fb

Please sign in to comment.