From 2c0e72b2ebb4bbf3982ba32c0f53596c1c32ce7e Mon Sep 17 00:00:00 2001 From: DmitryLukyanov Date: Thu, 18 Feb 2021 18:20:38 +0300 Subject: [PATCH] CSHARP-3355: Add more spec tests for invalid relaxed UUIDs. --- src/MongoDB.Bson/IO/JsonReader.cs | 11 +- .../bson-corpus/tests/binary.json | 14 +- .../Specifications/bson-corpus/tests/bsonview | 434 ++++++++++++++++++ 3 files changed, 449 insertions(+), 10 deletions(-) create mode 100644 tests/MongoDB.Bson.Tests/Specifications/bson-corpus/tests/bsonview diff --git a/src/MongoDB.Bson/IO/JsonReader.cs b/src/MongoDB.Bson/IO/JsonReader.cs index e640585a256..6e1551e403a 100644 --- a/src/MongoDB.Bson/IO/JsonReader.cs +++ b/src/MongoDB.Bson/IO/JsonReader.cs @@ -2089,15 +2089,8 @@ private BsonValue ParseUuidExtendedJson() } VerifyToken("}"); - var hexString = uuidToken.StringValue.Replace("-", ""); - var bytes = BsonUtils.ParseHexString(hexString); - if (bytes.Length != 16) - { - var message = string.Format("Invalid $uuid string: '{0}'.", hexString); - throw new FormatException(message); - } - - return new BsonBinaryData(bytes, BsonBinarySubType.UuidStandard); + var guid = Guid.Parse(uuidToken.StringValue); + return new BsonBinaryData(guid, GuidRepresentation.Standard); } private BsonValue ParseUUIDConstructor(string uuidConstructorName) diff --git a/tests/MongoDB.Bson.Tests/Specifications/bson-corpus/tests/binary.json b/tests/MongoDB.Bson.Tests/Specifications/bson-corpus/tests/binary.json index 324c56abdef..dd1ebfa1516 100644 --- a/tests/MongoDB.Bson.Tests/Specifications/bson-corpus/tests/binary.json +++ b/tests/MongoDB.Bson.Tests/Specifications/bson-corpus/tests/binary.json @@ -94,8 +94,20 @@ "string": "{\"x\" : { \"$uuid\" : { \"data\" : \"73ffd264-44b3-4c69-90e8-e7d1dfc035d4\"}}}" }, { - "description": "$uuid invalid value", + "description": "$uuid invalid value--too short", "string": "{\"x\" : { \"$uuid\" : \"73ffd264-44b3-90e8-e7d1dfc035d4\"}}" + }, + { + "description": "$uuid invalid value--too long", + "string": "{\"x\" : { \"$uuid\" : \"73ffd264-44b3-4c69-90e8-e7d1dfc035d4-789e4\"}}" + }, + { + "description": "$uuid invalid value--misplaced hyphens", + "string": "{\"x\" : { \"$uuid\" : \"73ff-d26444b-34c6-990e8e-7d1dfc035d4\"}}" + }, + { + "description": "$uuid invalid value--too many hyphens", + "string": "{\"x\" : { \"$uuid\" : \"----d264-44b3-4--9-90e8-e7d1dfc0----\"}}" } ] } diff --git a/tests/MongoDB.Bson.Tests/Specifications/bson-corpus/tests/bsonview b/tests/MongoDB.Bson.Tests/Specifications/bson-corpus/tests/bsonview new file mode 100644 index 00000000000..b803fc87e66 --- /dev/null +++ b/tests/MongoDB.Bson.Tests/Specifications/bson-corpus/tests/bsonview @@ -0,0 +1,434 @@ +#!/usr/bin/env perl +use v5.10; +use strict; +use warnings; +use utf8; +use open qw/:std :utf8/; + +use Getopt::Long; +use Pod::Usage; + +use if $^O eq 'MSWin32', 'Win32::Console::ANSI'; +use Term::ANSIColor; + +use constant { + NULL => "\x00", + BSON_TYPE => "C", + BSON_ENAME => "Z*", + BSON_TYPE_NAME => "CZ*", + BSON_DOUBLE => "d", + BSON_STRING => "l/A", + BSON_BOOLEAN => "C", + BSON_REGEX => "Z*Z*", + BSON_JSCODE => "", + BSON_INT32 => "l", + BSON_INT64 => "q", + BSON_TIMESTAMP => "q", + BSON_CODE_W_SCOPE => "l", + BSON_REMAINING => 'a*', + BSON_SKIP_4_BYTES => 'x4', + BSON_OBJECTID => 'a12', + BSON_BINARY_TYPE => 'C', + BSON_CSTRING => 'Z*', + BSON_BYTES => 'a*' +}; + +my $BOLD = $^O eq 'MSWin32' ? "bold " : ""; + +# minimum field size +my %FIELD_SIZES = ( + 0x01 => 8, + 0x02 => 5, + 0x03 => 5, + 0x04 => 5, + 0x05 => 5, + 0x06 => 0, + 0x07 => 12, + 0x08 => 1, + 0x09 => 8, + 0x0A => 0, + 0x0B => 2, + 0x0C => 17, + 0x0D => 5, + 0x0E => 5, + 0x0F => 14, + 0x10 => 4, + 0x11 => 8, + 0x12 => 8, + 0x7F => 0, + 0xFF => 0, +); + +sub main { + my ( $hex, $file, $help ); + GetOptions( + "file=s" => \$file, + "x" => \$hex, + "help|h" => \$help, + ) or die("Error in command line args"); + pod2usage( { -exitval => 2, -verbose => 2, } ) if $help; + + if ( $file ) { + dump_file($file); + } + else { + dump_stdin($hex); + } +} + +sub dump_stdin { + my $hex = shift; + while ( defined( my $bson = ) ) { + chomp $bson; + if ( !length($bson) ) { + print_error("[ no document ]\n"); + next; + } + # in -x mode, treat leading # as a comment + if ( $hex && index( $bson, "#" ) == 0 ) { + say $bson; + next; + } + $bson =~ s[ ][]g if $hex; + $bson = pack( "H*", $bson ) if $hex; + dump_document( \$bson ); + print "\n"; + } +} + +sub dump_file { + my $file = shift; + open my $fh, "<", $file; + binmode($fh); + my $data = do { local $/; <$fh> }; + while ( length $data ) { + my $len = unpack( BSON_INT32, $data ); + my $bson = substr($data,0,$len,''); + dump_document(\$bson); + print "\n"; + } +} + +sub dump_document { + my ( $ref, $is_array ) = @_; + print $is_array ? " [" : " {" if defined $is_array; + dump_header($ref); + 1 while dump_field($ref); + print_error( " " . unpack( "H*", $$ref ) ) if length($$ref); + print $is_array ? " ]" : " }" if defined $is_array; + return; +} + +sub dump_header { + my ($ref) = @_; + + my $len = get_length( $ref, 4 ); + return unless defined $len; + + if ( $len < 5 || $len < length($$ref) + 4 ) { + print_length( $len, 'red' ); + } + else { + print_length( $len, 'blue' ); + } +} + +sub dump_field { + my ($ref) = @_; + + # detect end of document + if ( length($$ref) < 2 ) { + if ( length($$ref) == 0 ) { + print_error(" [missing terminator]"); + } + else { + my $end = substr( $$ref, 0, 1, '' ); + print_hex( $end, $end eq NULL ? 'blue' : 'red' ); + } + return; + } + + # unpack type + my $type = unpack( BSON_TYPE, substr( $$ref, 0, 1, '' ) ); + + if ( !exists $FIELD_SIZES{$type} ) { + print_type( $type, 'red' ); + return; + } + + print_type($type); + + # check for key termination + my $key_end = index( $$ref, NULL ); + return if $key_end == -1; + + # unpack key + my $key = unpack( BSON_CSTRING, substr( $$ref, 0, $key_end + 1, '' ) ); + print_key($key); + + # Check if there is enough data to complete field for this type + # This is greedy, so it checks length, not length -1 + my $min_size = $FIELD_SIZES{$type}; + return if length($$ref) < $min_size; + + # fields without payload: 0x06, 0x0A, 0x7F, 0xFF + return 1 if $min_size == 0; + + # document or array + if ( $type == 0x03 || $type == 0x04 ) { + my ($len) = unpack( BSON_INT32, $$ref ); + my $doc = substr( $$ref, 0, $len, '' ); + dump_document( \$doc, $type == 0x04 ); + return 1; + } + + # fixed width fields + if ( $type == 0x01 + || $type == 0x07 + || $type == 0x09 + || $type == 0x10 + || $type == 0x11 + || $type == 0x12 ) + { + my $len = ( $type == 0x10 ? 4 : $type == 0x07 ? 12 : 8 ); + print_hex( substr( $$ref, 0, $len, '' ) ); + return 1; + } + + # boolean + if ( $type == 0x08 ) { + my $bool = substr( $$ref, 0, 1, '' ); + print_hex( $bool, ( $bool eq "\x00" || $bool eq "\x01" ) ? 'green' : 'red' ); + return 1; + } + + # binary field + if ( $type == 0x05 ) { + my $len = get_length( $ref, -1 ); + my $subtype = substr( $$ref, 0, 1, '' ); + + if ( !defined($len) ) { + print_hex($subtype); + return; + } + + my $binary = substr( $$ref, 0, $len, '' ); + + print_length($len); + print_hex($subtype); + + if ( $subtype eq "\x02" ) { + my $bin_len = get_length( \$binary ); + if ( !defined($bin_len) ) { + print_hex( $binary, 'red' ); + return; + } + if ( $bin_len != length($binary) ) { + print_length( $bin_len, 'red' ); + print_hex( $binary, 'red' ); + return; + } + } + + print_hex($binary) if length($binary); + return 1; + } + + # string or symbol or code + if ( $type == 0x02 || $type == 0x0e || $type == 0x0d ) { + my ( $len, $string ) = get_string($ref); + return unless defined $len; + + print_length( $len, 'cyan' ); + print_string($string); + return 1; + + } + + # regex 0x0B + if ( $type == 0x0B ) { + my ( $pattern, $flag ) = unpack( BSON_CSTRING . BSON_CSTRING, $$ref ); + substr( $$ref, 0, length($pattern) + length($flag) + 2, '' ); + print_string($pattern); + print_string($flag); + return 1; + } + + # code with scope 0x0F + if ( $type == 0x0F ) { + my $len = get_length( $ref, 4 ); + return unless defined $len; + + # len + string + doc minimum size is 4 + 5 + 5 + if ( $len < 14 ) { + print_length( $len, 'red' ); + return; + } + + print_length($len); + + my $cws = substr( $$ref, 0, $len - 4, '' ); + + my ( $strlen, $string ) = get_string( \$cws ); + + if ( !defined $strlen ) { + print_hex( $cws, 'red' ); + return; + } + + print_length($strlen); + print_string($string); + + dump_document( \$cws, 0 ); + + return 1; + } + + # dbpointer 0x0C + if ( $type == 0x0C ) { + my ( $len, $string ) = get_string($ref); + return unless defined $len; + + print_length($len); + print_string($string); + + # Check if there are 12 bytes (plus terminator) or more + return if length($$ref) < 13; + + my $oid = substr( $$ref, 0, 12, '' ); + print_hex($oid); + + return 1; + } + + die "Shouldn't reach here"; +} + +sub get_length { + my ( $ref, $adj ) = @_; + $adj ||= 0; + my $len = unpack( BSON_INT32, substr( $$ref, 0, 4, '' ) ); + return unless defined $len; + + # check if requested length is too long + if ( $len < 0 || $len > length($$ref) + $adj ) { + print_length( $len, 'red' ); + return; + } + + return $len; +} + +sub get_string { + my ($ref) = @_; + + my $len = get_length($ref); + return unless defined $len; + + # len must be at least 1 for trailing 0x00 + if ( $len == 0 ) { + print_length( $len, 'red' ); + return; + } + + my $string = substr( $$ref, 0, $len, '' ); + + # check if null terminated + if ( substr( $string, -1, 1 ) ne NULL ) { + print_length($len); + print_hex( $string, 'red' ); + return; + } + + # remove trailing null + chop($string); + + # try to decode to UTF-8 + if ( !utf8::decode($string) ) { + print_length($len); + print_hex( $string . "\x00", 'red' ); + return; + } + + return ( $len, $string ); +} + +sub print_error { + my ($text) = @_; + print colored( ["${BOLD}red"], $text ); +} + +sub print_type { + my ( $type, $color ) = @_; + $color ||= 'magenta'; + print colored( ["$BOLD$color"], sprintf( " %02x", $type ) ); +} + +sub print_key { + my ($string) = @_; + print_string( $string, 'yellow' ); +} + +sub print_string { + my ( $string, $color ) = @_; + $color ||= 'green'; + $string =~ s{([^[:graph:]])}{sprintf("\\x%02x",ord($1))}ge; + print colored( ["$BOLD$color"], qq[ "$string"] . " 00" ); +} + +sub print_length { + my ( $len, $color ) = @_; + $color ||= 'cyan'; + print colored( ["$BOLD$color"], " " . unpack( "H*", pack( BSON_INT32, $len ) ) ); +} + +sub print_hex { + my ( $value, $color ) = @_; + $color ||= 'green'; + print colored( ["$BOLD$color"], " " . uc( unpack( "H*", $value ) ) ); +} + +main(); + +__END__ + +=head1 NAME + +bsonview - dump a BSON string with color output showing structure + +=head1 SYNOPSIS + + cat file.bson | bsondump + + echo "0500000000" | bsondump -x + +=head1 OPTIONS + + -x input is in hex format (default is 0) + --help, -h show help + +=head1 USAGE + +Reads from C and dumps colored structures to C. + +=head1 AUTHOR + +=over 4 + +=item * + +David Golden + +=back + +=head1 COPYRIGHT AND LICENSE + +This software is Copyright (c) 2016 by MongoDB, Inc.. + +This is free software, licensed under: + + The Apache License, Version 2.0, January 2004 + +=cut + +=cut