Skip to content

Commit 75bebb3

Browse files
committed
Add strict_integer option to parse numbers with commas as strings
Authored-by: Seth Boyles <sethboyles@gmail.com>
1 parent ba203f1 commit 75bebb3

File tree

6 files changed

+62
-17
lines changed

6 files changed

+62
-17
lines changed

lib/psych.rb

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -268,10 +268,10 @@ module Psych
268268
# YAML documents that are supplied via user input. Instead, please use the
269269
# load method or the safe_load method.
270270
#
271-
def self.unsafe_load yaml, filename: nil, fallback: false, symbolize_names: false, freeze: false
271+
def self.unsafe_load yaml, filename: nil, fallback: false, symbolize_names: false, freeze: false, strict_integer: false
272272
result = parse(yaml, filename: filename)
273273
return fallback unless result
274-
result.to_ruby(symbolize_names: symbolize_names, freeze: freeze)
274+
result.to_ruby(symbolize_names: symbolize_names, freeze: freeze, strict_integer: strict_integer)
275275
end
276276
class << self; alias :load :unsafe_load; end
277277

@@ -320,13 +320,13 @@ class << self; alias :load :unsafe_load; end
320320
# Psych.safe_load("---\n foo: bar") # => {"foo"=>"bar"}
321321
# Psych.safe_load("---\n foo: bar", symbolize_names: true) # => {:foo=>"bar"}
322322
#
323-
def self.safe_load yaml, permitted_classes: [], permitted_symbols: [], aliases: false, filename: nil, fallback: nil, symbolize_names: false, freeze: false
323+
def self.safe_load yaml, permitted_classes: [], permitted_symbols: [], aliases: false, filename: nil, fallback: nil, symbolize_names: false, freeze: false, strict_integer: false
324324
result = parse(yaml, filename: filename)
325325
return fallback unless result
326326

327327
class_loader = ClassLoader::Restricted.new(permitted_classes.map(&:to_s),
328328
permitted_symbols.map(&:to_s))
329-
scanner = ScalarScanner.new class_loader
329+
scanner = ScalarScanner.new class_loader, strict_integer: strict_integer
330330
visitor = if aliases
331331
Visitors::ToRuby.new scanner, class_loader, symbolize_names: symbolize_names, freeze: freeze
332332
else
@@ -366,14 +366,15 @@ def self.safe_load yaml, permitted_classes: [], permitted_symbols: [], aliases:
366366
# Raises a TypeError when `yaml` parameter is NilClass. This method is
367367
# similar to `safe_load` except that `Symbol` objects are allowed by default.
368368
#
369-
def self.load yaml, permitted_classes: [Symbol], permitted_symbols: [], aliases: false, filename: nil, fallback: nil, symbolize_names: false, freeze: false
369+
def self.load yaml, permitted_classes: [Symbol], permitted_symbols: [], aliases: false, filename: nil, fallback: nil, symbolize_names: false, freeze: false, strict_integer: false
370370
safe_load yaml, permitted_classes: permitted_classes,
371371
permitted_symbols: permitted_symbols,
372372
aliases: aliases,
373373
filename: filename,
374374
fallback: fallback,
375375
symbolize_names: symbolize_names,
376-
freeze: freeze
376+
freeze: freeze,
377+
strict_integer: strict_integer
377378
end
378379

379380
###

lib/psych/nodes/node.rb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,8 +46,8 @@ def each &block
4646
# Convert this node to Ruby.
4747
#
4848
# See also Psych::Visitors::ToRuby
49-
def to_ruby(symbolize_names: false, freeze: false)
50-
Visitors::ToRuby.create(symbolize_names: symbolize_names, freeze: freeze).accept(self)
49+
def to_ruby(symbolize_names: false, freeze: false, strict_integer: false)
50+
Visitors::ToRuby.create(symbolize_names: symbolize_names, freeze: freeze, strict_integer: strict_integer).accept(self)
5151
end
5252
alias :transform :to_ruby
5353

lib/psych/scalar_scanner.rb

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -12,24 +12,32 @@ class ScalarScanner
1212
FLOAT = /^(?:[-+]?([0-9][0-9_,]*)?\.[0-9]*([eE][-+][0-9]+)?(?# base 10))$/x
1313

1414
# Taken from http://yaml.org/type/int.html
15-
INTEGER = /^(?:[-+]?0b[0-1_,]+ (?# base 2)
16-
|[-+]?0[0-7_,]+ (?# base 8)
17-
|[-+]?(?:0|[1-9](?:[0-9]|,[0-9]|_[0-9])*) (?# base 10)
18-
|[-+]?0x[0-9a-fA-F_,]+ (?# base 16))$/x
15+
INTEGER_STRICT = /^(?:[-+]?0b[0-1_]+ (?# base 2)
16+
|[-+]?0[0-7_]+ (?# base 8)
17+
|[-+]?(0|[1-9][0-9_]*) (?# base 10)
18+
|[-+]?0x[0-9a-fA-F_]+ (?# base 16))$/x
19+
20+
# Same as above, but allows commas.
21+
# Not to YML spec, but kept for backwards compatibility
22+
INTEGER_LEGACY = /^(?:[-+]?0b[0-1_,]+ (?# base 2)
23+
|[-+]?0[0-7_,]+ (?# base 8)
24+
|[-+]?(?:0|[1-9](?:[0-9]|,[0-9]|_[0-9])*) (?# base 10)
25+
|[-+]?0x[0-9a-fA-F_,]+ (?# base 16))$/x
1926

2027
attr_reader :class_loader
2128

2229
# Create a new scanner
23-
def initialize class_loader
30+
def initialize class_loader, strict_integer: false
2431
@symbol_cache = {}
2532
@class_loader = class_loader
33+
@strict_integer = strict_integer
2634
end
2735

2836
# Tokenize +string+ returning the Ruby object
2937
def tokenize string
3038
return nil if string.empty?
3139
return @symbol_cache[string] if @symbol_cache.key?(string)
32-
40+
integer_regex = @strict_integer ? INTEGER_STRICT : INTEGER_LEGACY
3341
# Check for a String type, being careful not to get caught by hash keys, hex values, and
3442
# special floats (e.g., -.inf).
3543
if string.match?(%r{^[^\d.:-]?[[:alpha:]_\s!@#$%\^&*(){}<>|/\\~;=]+}) || string.match?(/\n/)
@@ -89,7 +97,7 @@ def tokenize string
8997
else
9098
Float(string.gsub(/[,_]|\.([Ee]|$)/, '\1'))
9199
end
92-
elsif string.match?(INTEGER)
100+
elsif string.match?(integer_regex)
93101
parse_int string
94102
else
95103
string

lib/psych/visitors/to_ruby.rb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,9 @@ module Visitors
1212
###
1313
# This class walks a YAML AST, converting each node to Ruby
1414
class ToRuby < Psych::Visitors::Visitor
15-
def self.create(symbolize_names: false, freeze: false)
15+
def self.create(symbolize_names: false, freeze: false, strict_integer: false)
1616
class_loader = ClassLoader.new
17-
scanner = ScalarScanner.new class_loader
17+
scanner = ScalarScanner.new class_loader, strict_integer: strict_integer
1818
new(scanner, class_loader, symbolize_names: symbolize_names, freeze: freeze)
1919
end
2020

test/psych/test_numeric.rb

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,5 +43,16 @@ def test_does_not_attempt_numeric
4343
str = Psych.load('--- 1.1.1')
4444
assert_equal '1.1.1', str
4545
end
46+
47+
# This behavior is not to YML spec, but is kept for backwards compatibility
48+
def test_string_with_commas
49+
number = Psych.load('--- 12,34,56')
50+
assert_equal 123456, number
51+
end
52+
53+
def test_string_with_commas_with_strict_integer
54+
str = Psych.load('--- 12,34,56', strict_integer: true)
55+
assert_equal '12,34,56', str
56+
end
4657
end
4758
end

test/psych/test_scalar_scanner.rb

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,31 @@ def test_scan_int_commas_and_underscores
149149
assert_equal 0x123456789abcdef, ss.tokenize('0x12_,34,_56,_789abcdef__')
150150
end
151151

152+
def test_scan_strict_int_commas_and_underscores
153+
# this test is to ensure adherance to YML spec using the 'strict_integer' option
154+
scanner = Psych::ScalarScanner.new ClassLoader.new, strict_integer: true
155+
assert_equal 123_456_789, scanner.tokenize('123_456_789')
156+
assert_equal '123,456,789', scanner.tokenize('123,456,789')
157+
assert_equal '1_2,3,4_5,6_789', scanner.tokenize('1_2,3,4_5,6_789')
158+
159+
assert_equal 1, scanner.tokenize('1')
160+
assert_equal 1, scanner.tokenize('+1')
161+
assert_equal(-1, scanner.tokenize('-1'))
162+
163+
assert_equal 0b010101010, scanner.tokenize('0b010101010')
164+
assert_equal 0b010101010, scanner.tokenize('0b01_01_01_010')
165+
assert_equal '0b0,1_0,1_,0,1_01,0', scanner.tokenize('0b0,1_0,1_,0,1_01,0')
166+
167+
assert_equal 01234567, scanner.tokenize('01234567')
168+
assert_equal '0_,,,1_2,_34567', scanner.tokenize('0_,,,1_2,_34567')
169+
170+
assert_equal 0x123456789abcdef, scanner.tokenize('0x123456789abcdef')
171+
assert_equal 0x123456789abcdef, scanner.tokenize('0x12_34_56_789abcdef')
172+
assert_equal '0x12_,34,_56,_789abcdef', scanner.tokenize('0x12_,34,_56,_789abcdef')
173+
assert_equal '0x_12_,34,_56,_789abcdef', scanner.tokenize('0x_12_,34,_56,_789abcdef')
174+
assert_equal '0x12_,34,_56,_789abcdef__', scanner.tokenize('0x12_,34,_56,_789abcdef__')
175+
end
176+
152177
def test_scan_dot
153178
assert_equal '.', ss.tokenize('.')
154179
end

0 commit comments

Comments
 (0)