Skip to content

Commit

Permalink
Merge pull request #655 from topazproject/tim/String#scan
Browse files Browse the repository at this point in the history
Missing String#scan
  • Loading branch information
alex committed May 18, 2013
2 parents aabccc5 + b6ef824 commit 6eb84b9
Show file tree
Hide file tree
Showing 3 changed files with 75 additions and 16 deletions.
16 changes: 0 additions & 16 deletions spec/tags/core/string/scan_tags.txt
@@ -1,22 +1,6 @@
fails:String#scan returns an array containing all matches
fails:String#scan respects $KCODE when the pattern collapses to nothing
fails:String#scan stores groups as arrays in the returned arrays
fails:String#scan scans for occurrences of the string if pattern is a string
fails:String#scan sets $~ to MatchData of last match and nil when there's none
fails:String#scan supports \G which matches the end of the previous match / string start for first match
fails:String#scan tries to convert pattern to a string via to_str
fails:String#scan raises a TypeError if pattern isn't a Regexp and can't be converted to a String
fails:String#scan taints the results if the String argument is tainted
fails:String#scan taints the results when passed a String argument if self is tainted
fails:String#scan taints the results if the Regexp argument is tainted
fails:String#scan taints the results when passed a Regexp argument if self is tainted
fails:String#scan with pattern and block returns self
fails:String#scan with pattern and block passes each match to the block as one argument: an array
fails:String#scan with pattern and block passes groups to the block as one argument: an array
fails:String#scan with pattern and block sets $~ for access from the block
fails:String#scan with pattern and block restores $~ after leaving the block
fails:String#scan with pattern and block sets $~ to MatchData of last match and nil when there's none for access from outside
fails:String#scan with pattern and block taints the results if the String argument is tainted
fails:String#scan with pattern and block taints the results when passed a String argument if self is tainted
fails:String#scan with pattern and block taints the results if the Regexp argument is tainted
fails:String#scan with pattern and block taints the results when passed a Regexp argument if self is tainted
74 changes: 74 additions & 0 deletions topaz/objects/stringobject.py
Expand Up @@ -877,6 +877,80 @@ def method_getbyte(self, space, pos):
def method_includep(self, space, substr):
return space.newbool(substr in space.str_w(self))

def scan_string(self, space, w_pattern):
string = space.str_w(self)
pattern = space.str_w(w_pattern)
idx = string.find(pattern, 0)
while idx >= 0:
w_match = space.newstr_fromstr(string[idx:idx + len(pattern)])
space.infect(w_match, self)
space.infect(w_match, w_pattern)
yield w_match
if not pattern:
idx += 1
else:
idx += len(pattern)
idx = string.find(pattern, idx)

def scan_regexp(self, space, w_pattern):
last = -1
string = space.str_w(self)
ctx = w_pattern.make_ctx(string)

while last < len(string) and self.search_context(space, ctx):
w_matchdata = w_pattern.get_match_result(space, ctx, found=True)
if w_matchdata.size() > 1:
matches_w = []
for num in xrange(1, w_matchdata.size(), 1):
begin, end = w_matchdata.get_span(num)
assert begin >= 0
assert end >= 0
w_str = space.newstr_fromstr(string[begin:end])
space.infect(w_str, self)
space.infect(w_str, w_pattern)
matches_w.append(w_str)
w_match = space.newarray(matches_w)
else:
w_match = space.newstr_fromstr(string[ctx.match_start:ctx.match_end])
space.infect(w_match, self)
space.infect(w_match, w_pattern)

yield w_match

if ctx.match_start == ctx.match_end:
last += 1
else:
last = ctx.match_end
assert last >= 0
ctx.reset(last)

def scan_process_result(self, space, w_match, results_w, block):
if block:
space.invoke_block(block, [w_match])
else:
results_w.append(w_match)

@classdef.method("scan")
def method_scan(self, space, w_pattern, block):
results_w = []
w_str_pattern = space.convert_type(w_pattern, space.w_string, "to_str", raise_error=False)
if w_str_pattern is not space.w_nil:
for w_match in self.scan_string(space, w_str_pattern):
self.scan_process_result(space, w_match, results_w, block)
elif space.is_kind_of(w_pattern, space.w_regexp):
for w_match in self.scan_regexp(space, w_pattern):
self.scan_process_result(space, w_match, results_w, block)
else:
raise space.error(
space.w_TypeError,
"wrong argument type %s (expected Regexp)" %
space.obj_to_s(space.getclass(w_pattern))
)
if block:
return self
else:
return space.newarray(results_w)

@classdef.method("gsub")
def method_gsub(self, space, w_pattern, w_replacement=None, block=None):
return self.gsub_main(space, w_pattern, w_replacement, block, first_only=False)
Expand Down
1 change: 1 addition & 0 deletions topaz/utils/regexp.py
Expand Up @@ -810,6 +810,7 @@ def compile(self, ctx):
}
CHARSET_ESCAPES = {
"d": Property(CATEGORY_DIGIT),
"w": Property(CATEGORY_WORD),
}
PROPERTIES = {
"digit": CATEGORY_DIGIT,
Expand Down

0 comments on commit 6eb84b9

Please sign in to comment.