Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse code

Merge remote branch 'upstream/develop' into develop

  • Loading branch information...
commit 7ece760b43eee02a69ea9a971bc6a2528c2898aa 2 parents 18926aa + 8a44e75
Philip Prindeville authored
256 JSON/src/Parser.cpp
@@ -38,6 +38,7 @@
38 38 #include "Poco/JSON/JSONException.h"
39 39 #include "Poco/Ascii.h"
40 40 #include "Poco/Token.h"
  41 +#include "Poco/UTF8Encoding.h"
41 42 #undef min
42 43 #undef max
43 44 #include <limits>
@@ -65,12 +66,12 @@ class SeparatorToken: public Token
65 66
66 67 bool start(char c, std::istream& istr)
67 68 {
68   - if ( c == '{'
69   - || c == '}'
70   - || c == ']'
71   - || c == '['
72   - || c == ','
73   - || c == ':' )
  69 + if (c == '{'
  70 + || c == '}'
  71 + || c == ']'
  72 + || c == '['
  73 + || c == ','
  74 + || c == ':')
74 75 {
75 76 _value = c;
76 77 return true;
@@ -108,7 +109,7 @@ class StringToken: public Token
108 109
109 110 bool start(char c, std::istream& istr)
110 111 {
111   - if ( c == '"')
  112 + if (c == '"')
112 113 {
113 114 _value = ""; // We don't need the quote!
114 115 return true;
@@ -118,10 +119,10 @@ class StringToken: public Token
118 119
119 120 void finish(std::istream& istr)
120 121 {
121   - int c = istr.get();
122   - while (c != -1)
  122 + int c = 0;
  123 + while ((c = istr.get()) != -1)
123 124 {
124   - if ( c == 0 )
  125 + if (c == 0)
125 126 {
126 127 throw JSONException("Null byte not allowed");
127 128 }
@@ -131,38 +132,47 @@ class StringToken: public Token
131 132 throw JSONException(format("Control character 0x%x not allowed", (unsigned int) c));
132 133 }
133 134
134   - if ( c == '"' )
  135 + if (c == '"')
135 136 break;
  137 +
  138 + if(0x80 <= c && c <= 0xFF)
  139 + {
  140 + int count = utf8_check_first(c);
  141 + if (!count)
  142 + {
  143 + throw JSONException(format("Unable to decode byte 0x%x", (unsigned int) c));
  144 + }
  145 +
  146 + char buffer[5];
  147 + buffer[0] = c;
  148 + for(int i = 1; i < count; ++i)
  149 + {
  150 + buffer[i] = istr.get();
  151 + }
  152 +
  153 + if ( !UTF8Encoding::isLegal((unsigned char*) buffer, count) )
  154 + {
  155 + throw JSONException("No legal UTF8 found");
  156 + }
  157 + buffer[count] = '\0';
  158 + _value += buffer;
136 159
137   - if ( c == '\\' ) // Escaped String
  160 + continue;
  161 + }
  162 +
  163 + if (c == '\\') // Escaped String
138 164 {
139 165 c = istr.get();
140 166 switch(c)
141 167 {
142   - case '"' :
143   - c = '"';
144   - break;
145   - case '\\' :
146   - c = '\\';
147   - break;
148   - case '/' :
149   - c = '/';
150   - break;
151   - case 'b' :
152   - c = '\b';
153   - break;
154   - case 'f' :
155   - c = '\f';
156   - break;
157   - case 'n' :
158   - c = '\n';
159   - break;
160   - case 'r' :
161   - c = '\r';
162   - break;
163   - case 't' :
164   - c = '\t';
165   - break;
  168 + case '"' : c = '"'; break;
  169 + case '\\' : c = '\\'; break;
  170 + case '/' : c = '/'; break;
  171 + case 'b' : c = '\b'; break;
  172 + case 'f' : c = '\f'; break;
  173 + case 'n' : c = '\n'; break;
  174 + case 'r' : c = '\r'; break;
  175 + case 't' : c = '\t'; break;
166 176 case 'u' : // Unicode
167 177 {
168 178 Poco::Int32 unicode = decodeUnicode(istr);
@@ -196,8 +206,16 @@ class StringToken: public Token
196 206 {
197 207 throw JSONException("Invalid unicode");
198 208 }
199   - c = unicode;
200   - break;
  209 +
  210 + Poco::UTF8Encoding utf8encoding;
  211 + int length = utf8encoding.convert(unicode, NULL, 0);
  212 + std::vector<unsigned char> convert(length);
  213 + utf8encoding.convert(unicode, &convert[0], length);
  214 + for(int i = 0; i < length; ++i)
  215 + {
  216 + _value += (char) convert[i];
  217 + }
  218 + continue;
201 219 }
202 220 default:
203 221 {
@@ -206,7 +224,6 @@ class StringToken: public Token
206 224 }
207 225 }
208 226 _value += c;
209   - c = istr.get();
210 227 }
211 228
212 229 if ( c == -1 )
@@ -241,6 +258,49 @@ class StringToken: public Token
241 258
242 259 return value;
243 260 }
  261 +
  262 +private:
  263 + int utf8_check_first(char byte)
  264 + {
  265 + unsigned char u = (unsigned char) byte;
  266 +
  267 + if(u < 0x80)
  268 + return 1;
  269 +
  270 + if (0x80 <= u && u <= 0xBF)
  271 + {
  272 + /* second, third or fourth byte of a multi-byte
  273 + sequence, i.e. a "continuation byte" */
  274 + return 0;
  275 + }
  276 + else if(u == 0xC0 || u == 0xC1)
  277 + {
  278 + /* overlong encoding of an ASCII byte */
  279 + return 0;
  280 + }
  281 + else if(0xC2 <= u && u <= 0xDF)
  282 + {
  283 + /* 2-byte sequence */
  284 + return 2;
  285 + }
  286 + else if(0xE0 <= u && u <= 0xEF)
  287 + {
  288 + /* 3-byte sequence */
  289 + return 3;
  290 + }
  291 + else if(0xF0 <= u && u <= 0xF4)
  292 + {
  293 + /* 4-byte sequence */
  294 + return 4;
  295 + }
  296 + else
  297 + {
  298 + /* u >= 0xF5 */
  299 + /* Restricted (start of 4-, 5- or 6-byte sequence) or invalid
  300 + UTF-8 */
  301 + return 0;
  302 + }
  303 + }
244 304 };
245 305
246 306
@@ -307,7 +367,7 @@ class NumberToken: public Token
307 367 if ( c == -1 )
308 368 return false;
309 369
310   - if ( Ascii::isDigit(c) )
  370 + if (Ascii::isDigit(c))
311 371 {
312 372 if ( c == '0' )
313 373 {
@@ -321,14 +381,14 @@ class NumberToken: public Token
321 381 return true;
322 382 }
323 383
324   - if ( c == '-' )
  384 + if (c == '-')
325 385 {
326 386 _value = c;
327 387
328 388 int nc = istr.peek();
329   - if ( Ascii::isDigit(nc) )
  389 + if (Ascii::isDigit(nc))
330 390 {
331   - if ( nc == '0' )
  391 + if (nc == '0')
332 392 {
333 393 _value += '0';
334 394 istr.get();
@@ -351,7 +411,7 @@ class NumberToken: public Token
351 411 int c;
352 412 while( (c = istr.peek()) != -1)
353 413 {
354   - if ( Ascii::isDigit(c) )
  414 + if (Ascii::isDigit(c))
355 415 {
356 416 _value += c;
357 417 istr.get();
@@ -362,7 +422,7 @@ class NumberToken: public Token
362 422 {
363 423 case '.': // Float
364 424 {
365   - if ( _activeClass == Token::FLOAT_LITERAL_TOKEN )
  425 + if (_activeClass == Token::FLOAT_LITERAL_TOKEN)
366 426 {
367 427 throw JSONException("Invalid float value");
368 428 }
@@ -383,7 +443,7 @@ class NumberToken: public Token
383 443 case 'E':
384 444 case 'e':
385 445 {
386   - if ( _activeClass == Token::DOUBLE_LITERAL_TOKEN )
  446 + if (_activeClass == Token::DOUBLE_LITERAL_TOKEN)
387 447 {
388 448 throw JSONException("Invalid double value");
389 449 }
@@ -395,14 +455,14 @@ class NumberToken: public Token
395 455
396 456 // When the next char is - or + then read the next char
397 457 c = istr.peek();
398   - if ( c == '-' || c == '+' )
  458 + if (c == '-' || c == '+')
399 459 {
400 460 _value += c;
401 461 istr.get();
402 462 c = istr.peek();
403 463 }
404 464
405   - if ( ! Ascii::isDigit(c) )
  465 + if (! Ascii::isDigit(c))
406 466 {
407 467 throw JSONException("Invalid double value");
408 468 }
@@ -444,7 +504,7 @@ Parser::~Parser()
444 504 const Token* Parser::nextToken()
445 505 {
446 506 const Token* token = _tokenizer.next();
447   - if ( token->is(Token::EOF_TOKEN) )
  507 + if (token->is(Token::EOF_TOKEN))
448 508 {
449 509 throw JSONException("Unexpected EOF found");
450 510 }
@@ -457,14 +517,14 @@ void Parser::parse(std::istream& in)
457 517 _tokenizer.attachToStream(in);
458 518 const Token* token = nextToken();
459 519
460   - if ( token->is(Token::SEPARATOR_TOKEN) )
  520 + if (token->is(Token::SEPARATOR_TOKEN))
461 521 {
462 522 // This must be a { or a [
463   - if ( token->asChar() == '{' )
  523 + if (token->asChar() == '{')
464 524 {
465 525 readObject();
466 526 }
467   - else if ( token->asChar() == '[' )
  527 + else if (token->asChar() == '[')
468 528 {
469 529 readArray();
470 530 }
@@ -473,7 +533,7 @@ void Parser::parse(std::istream& in)
473 533 throw JSONException(format("Invalid separator '%c' found. Expecting { or [", token->asChar()));
474 534 }
475 535 token = _tokenizer.next();
476   - if ( ! token->is(Token::EOF_TOKEN) )
  536 + if (! token->is(Token::EOF_TOKEN))
477 537 {
478 538 throw JSONException(format("EOF expected but found '%s'", token->asString()));
479 539 }
@@ -487,7 +547,7 @@ void Parser::parse(std::istream& in)
487 547
488 548 void Parser::readObject()
489 549 {
490   - if ( _handler != NULL )
  550 + if (_handler != NULL)
491 551 {
492 552 _handler->startObject();
493 553 }
@@ -497,7 +557,7 @@ void Parser::readObject()
497 557 while(readRow());
498 558 }
499 559
500   - if ( _handler != NULL )
  560 + if (_handler != NULL)
501 561 {
502 562 _handler->endObject();
503 563 }
@@ -508,12 +568,12 @@ bool Parser::readRow(bool firstCall)
508 568 {
509 569 const Token* token = nextToken();
510 570
511   - if ( firstCall && token->tokenClass() == Token::SEPARATOR_TOKEN && token->asChar() == '}' )
  571 + if (firstCall && token->tokenClass() == Token::SEPARATOR_TOKEN && token->asChar() == '}')
512 572 {
513 573 return false; // End of object is possible for an empty object
514 574 }
515 575
516   - if ( token->tokenClass() == Token::STRING_LITERAL_TOKEN )
  576 + if (token->tokenClass() == Token::STRING_LITERAL_TOKEN)
517 577 {
518 578 std::string propertyName = token->tokenString();
519 579 if ( _handler != NULL )
@@ -523,20 +583,20 @@ bool Parser::readRow(bool firstCall)
523 583
524 584 token = nextToken();
525 585
526   - if ( token->is(Token::SEPARATOR_TOKEN)
527   - && token->asChar() == ':' )
  586 + if ( token->is(Token::SEPARATOR_TOKEN)
  587 + && token->asChar() == ':')
528 588 {
529 589 readValue(nextToken());
530 590
531 591 token = nextToken();
532 592
533   - if ( token->is(Token::SEPARATOR_TOKEN) )
  593 + if (token->is(Token::SEPARATOR_TOKEN))
534 594 {
535   - if ( token->asChar() == ',' )
  595 + if (token->asChar() == ',')
536 596 {
537 597 return true; // Read next row
538 598 }
539   - else if ( token->asChar() == '}' )
  599 + else if (token->asChar() == '}')
540 600 {
541 601 return false; // End of object
542 602 }
@@ -573,20 +633,20 @@ void Parser::readValue(const Token* token)
573 633 break;
574 634
575 635 case Token::INTEGER_LITERAL_TOKEN:
576   - if ( _handler != NULL )
  636 + if (_handler != NULL)
577 637 {
578 638 #if defined(POCO_HAVE_INT64)
579   - Int64 value = token->asInteger64();
580   - // if number is 32-bit, then handle as such
581   - if ( value > std::numeric_limits<int>::max()
582   - || value < std::numeric_limits<int>::min() )
583   - {
584   - _handler->value(value);
585   - }
586   - else
587   - {
588   - _handler->value(static_cast<int>(value));
589   - }
  639 + Int64 value = token->asInteger64();
  640 + // if number is 32-bit, then handle as such
  641 + if ( value > std::numeric_limits<int>::max()
  642 + || value < std::numeric_limits<int>::min())
  643 + {
  644 + _handler->value(value);
  645 + }
  646 + else
  647 + {
  648 + _handler->value(static_cast<int>(value));
  649 + }
590 650 #else
591 651 int value = token->asInteger();
592 652 _handle->value(value);
@@ -595,23 +655,23 @@ void Parser::readValue(const Token* token)
595 655 break;
596 656 case Token::KEYWORD_TOKEN:
597 657 {
598   - if ( token->tokenString().compare("null") == 0 )
  658 + if (token->tokenString().compare("null") == 0)
599 659 {
600   - if ( _handler != NULL )
  660 + if (_handler != NULL)
601 661 {
602 662 _handler->null();
603 663 }
604 664 }
605   - else if ( token->tokenString().compare("true") == 0 )
  665 + else if (token->tokenString().compare("true") == 0)
606 666 {
607   - if ( _handler != NULL )
  667 + if (_handler != NULL)
608 668 {
609 669 _handler->value(true);
610 670 }
611 671 }
612   - else if ( token->tokenString().compare("false") == 0 )
  672 + else if (token->tokenString().compare("false") == 0)
613 673 {
614   - if ( _handler != NULL )
  674 + if (_handler != NULL)
615 675 {
616 676 _handler->value(false);
617 677 }
@@ -625,46 +685,48 @@ void Parser::readValue(const Token* token)
625 685 case Token::FLOAT_LITERAL_TOKEN:
626 686 // Fall through
627 687 case Token::DOUBLE_LITERAL_TOKEN:
628   - if ( _handler != NULL )
  688 + if (_handler != NULL)
629 689 {
630 690 _handler->value(token->asFloat());
631 691 }
632 692 break;
633 693 case Token::STRING_LITERAL_TOKEN:
634   - if ( _handler != NULL )
  694 + if (_handler != NULL)
635 695 {
636 696 _handler->value(token->tokenString());
637 697 }
638 698 break;
639 699 case Token::SEPARATOR_TOKEN:
640   - {
641   - if ( token->asChar() == '{' )
642   - {
643   - readObject();
644   - }
645   - else if ( token->asChar() == '[' )
646 700 {
647   - readArray();
  701 + if (token->asChar() == '{')
  702 + {
  703 + readObject();
  704 + }
  705 + else if (token->asChar() == '[')
  706 + {
  707 + readArray();
  708 + }
  709 + break;
648 710 }
649   - break;
650   - }
  711 + case Token::INVALID_TOKEN:
  712 + throw JSONException(format("Invalid token '%s' found", token->asString()));
651 713 }
652 714 }
653 715
654 716
655 717 void Parser::readArray()
656 718 {
657   - if ( _handler != NULL )
  719 + if (_handler != NULL)
658 720 {
659 721 _handler->startArray();
660 722 }
661 723
662   - if ( readElements(true) ) // First call is special: check for empty array
  724 + if (readElements(true)) // First call is special: check for empty array
663 725 {
664 726 while(readElements());
665 727 }
666 728
667   - if ( _handler != NULL )
  729 + if (_handler != NULL)
668 730 {
669 731 _handler->endArray();
670 732 }
@@ -675,7 +737,7 @@ bool Parser::readElements(bool firstCall)
675 737 {
676 738 const Token* token = nextToken();
677 739
678   - if ( firstCall && token->is(Token::SEPARATOR_TOKEN) && token->asChar() == ']' )
  740 + if (firstCall && token->is(Token::SEPARATOR_TOKEN) && token->asChar() == ']')
679 741 {
680 742 // End of array is possible for an empty array
681 743 return false;
@@ -687,10 +749,10 @@ bool Parser::readElements(bool firstCall)
687 749
688 750 if ( token->is(Token::SEPARATOR_TOKEN) )
689 751 {
690   - if ( token->asChar() == ']' )
  752 + if (token->asChar() == ']')
691 753 return false; // End of array
692 754
693   - if ( token->asChar() == ',' )
  755 + if (token->asChar() == ',')
694 756 return true;
695 757
696 758 throw JSONException(format("Invalid separator '%c' found. Expecting , or ]", token->asChar()));
108 JSON/testsuite/src/JSONTest.cpp
@@ -33,6 +33,7 @@
33 33 #include "JSONTest.h"
34 34 #include "CppUnit/TestCaller.h"
35 35 #include "CppUnit/TestSuite.h"
  36 +
36 37 #include "Poco/JSON/Object.h"
37 38 #include "Poco/JSON/Parser.h"
38 39 #include "Poco/JSON/Query.h"
@@ -40,13 +41,17 @@
40 41 #include "Poco/JSON/Stringifier.h"
41 42 #include "Poco/JSON/DefaultHandler.h"
42 43 #include "Poco/JSON/Template.h"
  44 +
43 45 #include "Poco/Path.h"
44 46 #include "Poco/Environment.h"
45 47 #include "Poco/File.h"
46 48 #include "Poco/FileStream.h"
47 49 #include "Poco/Glob.h"
48   -#include <set>
  50 +#include "Poco/UTF8Encoding.h"
  51 +#include "Poco/Latin1Encoding.h"
  52 +#include "Poco/TextConverter.h"
49 53
  54 +#include <set>
50 55
51 56 using namespace Poco::JSON;
52 57 using namespace Poco::Dynamic;
@@ -74,22 +79,6 @@ void JSONTest::tearDown()
74 79 }
75 80
76 81
77   -void JSONTest::testStringifier()
78   -{
79   - Object obj;
80   -
81   - Array arr;
82   - Object obj2;
83   -
84   - obj.set("array", arr);
85   - obj.set("obj2", obj2);
86   -
87   - std::ostringstream ostr;
88   - obj.stringify(ostr);
89   - assert (ostr.str() == "{\"array\":[],\"obj2\":{}}");
90   -}
91   -
92   -
93 82 void JSONTest::testNullProperty()
94 83 {
95 84 std::string json = "{ \"test\" : null }";
@@ -845,6 +834,50 @@ void JSONTest::testInvalidJanssonFiles()
845 834 }
846 835
847 836
  837 +void JSONTest::testInvalidUnicodeJanssonFiles()
  838 +{
  839 + Poco::Path pathPattern(getTestFilesPath("invalid-unicode"));
  840 +
  841 + std::set<std::string> paths;
  842 + Poco::Glob::glob(pathPattern, paths);
  843 +
  844 + for(std::set<std::string>::iterator it = paths.begin(); it != paths.end(); ++it)
  845 + {
  846 + Poco::Path filePath(*it, "input");
  847 +
  848 + if ( filePath.isFile() )
  849 + {
  850 + Poco::File inputFile(filePath);
  851 + if ( inputFile.exists() )
  852 + {
  853 + Poco::FileInputStream fis(filePath.toString());
  854 + std::cout << filePath.toString() << std::endl;
  855 +
  856 + Parser parser;
  857 + Var result;
  858 +
  859 + try
  860 + {
  861 + DefaultHandler handler;
  862 + parser.setHandler(&handler);
  863 + parser.parse(fis);
  864 + result = handler.result();
  865 + // We shouldn't get here.
  866 + std::cout << "We didn't get an exception. This is the result: " << result.convert<std::string>() << std::endl;
  867 + fail(result.convert<std::string>());
  868 + }
  869 + catch(JSONException&)
  870 + {
  871 + continue;
  872 + }
  873 + catch(Poco::SyntaxException&)
  874 + { }
  875 + }
  876 + }
  877 + }
  878 +}
  879 +
  880 +
848 881 void JSONTest::testTemplate()
849 882 {
850 883 Template tpl;
@@ -858,6 +891,40 @@ void JSONTest::testTemplate()
858 891 tpl.render(data, std::cout);
859 892 }
860 893
  894 +void JSONTest::testUnicode()
  895 +{
  896 + const unsigned char supp[] = {0x61, 0xE1, 0xE9, 0x78, 0xED, 0xF3, 0xFA, 0x0};
  897 + std::string text((const char*) supp);
  898 +
  899 + std::string json = "{ \"test\" : \"a\\u00E1\\u00E9x\\u00ED\\u00F3\\u00FA\" }";
  900 + Parser parser;
  901 +
  902 + Var result;
  903 + try
  904 + {
  905 + DefaultHandler handler;
  906 + parser.setHandler(&handler);
  907 + parser.parse(json);
  908 + result = handler.result();
  909 + }
  910 + catch(JSONException& jsone)
  911 + {
  912 + std::cout << jsone.message() << std::endl;
  913 + assert(false);
  914 + }
  915 + assert(result.type() == typeid(Object::Ptr));
  916 +
  917 + Object::Ptr object = result.extract<Object::Ptr>();
  918 + Var test = object->get("test");
  919 +
  920 + Poco::Latin1Encoding latin1;
  921 + Poco::UTF8Encoding utf8;
  922 + Poco::TextConverter converter(latin1, utf8);
  923 + std::string original;
  924 + converter.convert(text, original);
  925 +
  926 + assert(test.convert<std::string>() == original);
  927 +}
861 928
862 929 std::string JSONTest::getTestFilesPath(const std::string& type)
863 930 {
@@ -879,8 +946,10 @@ std::string JSONTest::getTestFilesPath(const std::string& type)
879 946 if (Poco::File(pathPattern).exists())
880 947 validDir += '*';
881 948 else
  949 + {
  950 + std::cout << "Can't find " << validDir << std::endl;
882 951 throw Poco::NotFoundException("cannot locate directory containing valid JSON test files");
883   -
  952 + }
884 953 return validDir;
885 954 }
886 955
@@ -889,7 +958,6 @@ CppUnit::Test* JSONTest::suite()
889 958 {
890 959 CppUnit::TestSuite* pSuite = new CppUnit::TestSuite("JSONTest");
891 960
892   - CppUnit_addTest(pSuite, JSONTest, testStringifier);
893 961 CppUnit_addTest(pSuite, JSONTest, testNullProperty);
894 962 CppUnit_addTest(pSuite, JSONTest, testTrueProperty);
895 963 CppUnit_addTest(pSuite, JSONTest, testFalseProperty);
@@ -917,7 +985,9 @@ CppUnit::Test* JSONTest::suite()
917 985 CppUnit_addTest(pSuite, JSONTest, testQuery);
918 986 CppUnit_addTest(pSuite, JSONTest, testValidJanssonFiles);
919 987 CppUnit_addTest(pSuite, JSONTest, testInvalidJanssonFiles);
  988 + CppUnit_addTest(pSuite, JSONTest, testInvalidUnicodeJanssonFiles);
920 989 CppUnit_addTest(pSuite, JSONTest, testTemplate);
  990 + CppUnit_addTest(pSuite, JSONTest, testUnicode);
921 991
922 992 return pSuite;
923 993 }
3  JSON/testsuite/src/JSONTest.h
@@ -46,7 +46,6 @@ class JSONTest: public CppUnit::TestCase
46 46 JSONTest(const std::string& name);
47 47 ~JSONTest();
48 48
49   - void testStringifier();
50 49 void testNullProperty();
51 50 void testTrueProperty();
52 51 void testFalseProperty();
@@ -76,6 +75,8 @@ class JSONTest: public CppUnit::TestCase
76 75 void testInvalidJanssonFiles();
77 76 void testTemplate();
78 77 void testItunes();
  78 + void testUnicode();
  79 + void testInvalidUnicodeJanssonFiles();
79 80
80 81 void setUp();
81 82 void tearDown();

0 comments on commit 7ece760

Please sign in to comment.
Something went wrong with that request. Please try again.