Browse files

Merge pull request #53 from fbraem/develop

JSON unicode fixes and running tests on invalid unicode JSON
  • Loading branch information...
2 parents 7094df5 + 5964ae0 commit 80cf789dcd80c7c42ba8546ee5baf597fcf4209f @aleks-f aleks-f committed Jan 4, 2013
Showing with 199 additions and 50 deletions.
  1. +108 −30 JSON/src/Parser.cpp
  2. +89 −19 JSON/testsuite/src/JSONTest.cpp
  3. +2 −1 JSON/testsuite/src/JSONTest.h
View
138 JSON/src/Parser.cpp
@@ -38,6 +38,7 @@
#include "Poco/JSON/JSONException.h"
#include "Poco/Ascii.h"
#include "Poco/Token.h"
+#include "Poco/UTF8Encoding.h"
#undef min
#undef max
#include <limits>
@@ -66,11 +67,11 @@ class SeparatorToken: public Token
bool start(char c, std::istream& istr)
{
if ( c == '{'
- || c == '}'
- || c == ']'
- || c == '['
- || c == ','
- || c == ':' )
+ || c == '}'
+ || c == ']'
+ || c == '['
+ || c == ','
+ || c == ':' )
{
_value = c;
return true;
@@ -118,8 +119,8 @@ class StringToken: public Token
void finish(std::istream& istr)
{
- int c = istr.get();
- while (c != -1)
+ int c = 0;
+ while ((c = istr.get()) != -1)
{
if ( c == 0 )
{
@@ -133,6 +134,31 @@ class StringToken: public Token
if ( c == '"' )
break;
+
+ if(0x80 <= c && c <= 0xFF)
+ {
+ int count = utf8_check_first(c);
+ if (!count)
+ {
+ throw JSONException(format("Unable to decode byte 0x%x", (unsigned int) c));
+ }
+
+ char buffer[5];
+ buffer[0] = c;
+ for(int i = 1; i < count; ++i)
+ {
+ buffer[i] = istr.get();
+ }
+
+ if ( !UTF8Encoding::isLegal((unsigned char*) buffer, count) )
+ {
+ throw JSONException("No legal UTF8 found");
+ }
+ buffer[count] = '\0';
+ _value += buffer;
+
+ continue;
+ }
if ( c == '\\' ) // Escaped String
{
@@ -196,8 +222,16 @@ class StringToken: public Token
{
throw JSONException("Invalid unicode");
}
- c = unicode;
- break;
+
+ Poco::UTF8Encoding utf8encoding;
+ int length = utf8encoding.convert(unicode, NULL, 0);
+ std::vector<unsigned char> convert(length);
+ utf8encoding.convert(unicode, &convert[0], length);
+ for(int i = 0; i < length; ++i)
+ {
+ _value += (char) convert[i];
+ }
+ continue;
}
default:
{
@@ -206,7 +240,6 @@ class StringToken: public Token
}
}
_value += c;
- c = istr.get();
}
if ( c == -1 )
@@ -241,6 +274,49 @@ class StringToken: public Token
return value;
}
+
+private:
+ int utf8_check_first(char byte)
+ {
+ unsigned char u = (unsigned char) byte;
+
+ if(u < 0x80)
+ return 1;
+
+ if (0x80 <= u && u <= 0xBF)
+ {
+ /* second, third or fourth byte of a multi-byte
+ sequence, i.e. a "continuation byte" */
+ return 0;
+ }
+ else if(u == 0xC0 || u == 0xC1)
+ {
+ /* overlong encoding of an ASCII byte */
+ return 0;
+ }
+ else if(0xC2 <= u && u <= 0xDF)
+ {
+ /* 2-byte sequence */
+ return 2;
+ }
+ else if(0xE0 <= u && u <= 0xEF)
+ {
+ /* 3-byte sequence */
+ return 3;
+ }
+ else if(0xF0 <= u && u <= 0xF4)
+ {
+ /* 4-byte sequence */
+ return 4;
+ }
+ else
+ {
+ /* u >= 0xF5 */
+ /* Restricted (start of 4-, 5- or 6-byte sequence) or invalid
+ UTF-8 */
+ return 0;
+ }
+ }
};
@@ -524,7 +600,7 @@ bool Parser::readRow(bool firstCall)
token = nextToken();
if ( token->is(Token::SEPARATOR_TOKEN)
- && token->asChar() == ':' )
+ && token->asChar() == ':' )
{
readValue(nextToken());
@@ -576,17 +652,17 @@ void Parser::readValue(const Token* token)
if ( _handler != NULL )
{
#if defined(POCO_HAVE_INT64)
- Int64 value = token->asInteger64();
- // if number is 32-bit, then handle as such
+ Int64 value = token->asInteger64();
+ // if number is 32-bit, then handle as such
if ( value > std::numeric_limits<int>::max()
- || value < std::numeric_limits<int>::min() )
- {
- _handler->value(value);
- }
- else
- {
- _handler->value(static_cast<int>(value));
- }
+ || value < std::numeric_limits<int>::min() )
+ {
+ _handler->value(value);
+ }
+ else
+ {
+ _handler->value(static_cast<int>(value));
+ }
#else
int value = token->asInteger();
_handle->value(value);
@@ -637,17 +713,19 @@ void Parser::readValue(const Token* token)
}
break;
case Token::SEPARATOR_TOKEN:
- {
- if ( token->asChar() == '{' )
{
- readObject();
- }
- else if ( token->asChar() == '[' )
- {
- readArray();
+ if ( token->asChar() == '{' )
+ {
+ readObject();
+ }
+ else if ( token->asChar() == '[' )
+ {
+ readArray();
+ }
+ break;
}
- break;
- }
+ case Token::INVALID_TOKEN:
+ throw JSONException(format("Invalid token '%s' found", token->asString()));
}
}
View
108 JSON/testsuite/src/JSONTest.cpp
@@ -33,20 +33,25 @@
#include "JSONTest.h"
#include "CppUnit/TestCaller.h"
#include "CppUnit/TestSuite.h"
+
#include "Poco/JSON/Object.h"
#include "Poco/JSON/Parser.h"
#include "Poco/JSON/Query.h"
#include "Poco/JSON/JSONException.h"
#include "Poco/JSON/Stringifier.h"
#include "Poco/JSON/DefaultHandler.h"
#include "Poco/JSON/Template.h"
+
#include "Poco/Path.h"
#include "Poco/Environment.h"
#include "Poco/File.h"
#include "Poco/FileStream.h"
#include "Poco/Glob.h"
-#include <set>
+#include "Poco/UTF8Encoding.h"
+#include "Poco/Latin1Encoding.h"
+#include "Poco/TextConverter.h"
+#include <set>
using namespace Poco::JSON;
using namespace Poco::Dynamic;
@@ -74,22 +79,6 @@ void JSONTest::tearDown()
}
-void JSONTest::testStringifier()
-{
- Object obj;
-
- Array arr;
- Object obj2;
-
- obj.set("array", arr);
- obj.set("obj2", obj2);
-
- std::ostringstream ostr;
- obj.stringify(ostr);
- assert (ostr.str() == "{\"array\":[],\"obj2\":{}}");
-}
-
-
void JSONTest::testNullProperty()
{
std::string json = "{ \"test\" : null }";
@@ -845,6 +834,50 @@ void JSONTest::testInvalidJanssonFiles()
}
+void JSONTest::testInvalidUnicodeJanssonFiles()
+{
+ Poco::Path pathPattern(getTestFilesPath("invalid-unicode"));
+
+ std::set<std::string> paths;
+ Poco::Glob::glob(pathPattern, paths);
+
+ for(std::set<std::string>::iterator it = paths.begin(); it != paths.end(); ++it)
+ {
+ Poco::Path filePath(*it, "input");
+
+ if ( filePath.isFile() )
+ {
+ Poco::File inputFile(filePath);
+ if ( inputFile.exists() )
+ {
+ Poco::FileInputStream fis(filePath.toString());
+ std::cout << filePath.toString() << std::endl;
+
+ Parser parser;
+ Var result;
+
+ try
+ {
+ DefaultHandler handler;
+ parser.setHandler(&handler);
+ parser.parse(fis);
+ result = handler.result();
+ // We shouldn't get here.
+ std::cout << "We didn't get an exception. This is the result: " << result.convert<std::string>() << std::endl;
+ fail(result.convert<std::string>());
+ }
+ catch(JSONException&)
+ {
+ continue;
+ }
+ catch(Poco::SyntaxException&)
+ { }
+ }
+ }
+ }
+}
+
+
void JSONTest::testTemplate()
{
Template tpl;
@@ -858,6 +891,40 @@ void JSONTest::testTemplate()
tpl.render(data, std::cout);
}
+void JSONTest::testUnicode()
+{
+ const unsigned char supp[] = {0x61, 0xE1, 0xE9, 0x78, 0xED, 0xF3, 0xFA, 0x0};
+ std::string text((const char*) supp);
+
+ std::string json = "{ \"test\" : \"a\\u00E1\\u00E9x\\u00ED\\u00F3\\u00FA\" }";
+ Parser parser;
+
+ Var result;
+ try
+ {
+ DefaultHandler handler;
+ parser.setHandler(&handler);
+ parser.parse(json);
+ result = handler.result();
+ }
+ catch(JSONException& jsone)
+ {
+ std::cout << jsone.message() << std::endl;
+ assert(false);
+ }
+ assert(result.type() == typeid(Object::Ptr));
+
+ Object::Ptr object = result.extract<Object::Ptr>();
+ Var test = object->get("test");
+
+ Poco::Latin1Encoding latin1;
+ Poco::UTF8Encoding utf8;
+ Poco::TextConverter converter(latin1, utf8);
+ std::string original;
+ converter.convert(text, original);
+
+ assert(test.convert<std::string>() == original);
+}
std::string JSONTest::getTestFilesPath(const std::string& type)
{
@@ -879,8 +946,10 @@ std::string JSONTest::getTestFilesPath(const std::string& type)
if (Poco::File(pathPattern).exists())
validDir += '*';
else
+ {
+ std::cout << "Can't find " << validDir << std::endl;
throw Poco::NotFoundException("cannot locate directory containing valid JSON test files");
-
+ }
return validDir;
}
@@ -889,7 +958,6 @@ CppUnit::Test* JSONTest::suite()
{
CppUnit::TestSuite* pSuite = new CppUnit::TestSuite("JSONTest");
- CppUnit_addTest(pSuite, JSONTest, testStringifier);
CppUnit_addTest(pSuite, JSONTest, testNullProperty);
CppUnit_addTest(pSuite, JSONTest, testTrueProperty);
CppUnit_addTest(pSuite, JSONTest, testFalseProperty);
@@ -917,7 +985,9 @@ CppUnit::Test* JSONTest::suite()
CppUnit_addTest(pSuite, JSONTest, testQuery);
CppUnit_addTest(pSuite, JSONTest, testValidJanssonFiles);
CppUnit_addTest(pSuite, JSONTest, testInvalidJanssonFiles);
+ CppUnit_addTest(pSuite, JSONTest, testInvalidUnicodeJanssonFiles);
CppUnit_addTest(pSuite, JSONTest, testTemplate);
+ CppUnit_addTest(pSuite, JSONTest, testUnicode);
return pSuite;
}
View
3 JSON/testsuite/src/JSONTest.h
@@ -46,7 +46,6 @@ class JSONTest: public CppUnit::TestCase
JSONTest(const std::string& name);
~JSONTest();
- void testStringifier();
void testNullProperty();
void testTrueProperty();
void testFalseProperty();
@@ -76,6 +75,8 @@ class JSONTest: public CppUnit::TestCase
void testInvalidJanssonFiles();
void testTemplate();
void testItunes();
+ void testUnicode();
+ void testInvalidUnicodeJanssonFiles();
void setUp();
void tearDown();

0 comments on commit 80cf789

Please sign in to comment.