@@ -883,8 +883,7 @@ QPDFObjectHandle::parseContentStream_internal(PointerHolder<Buffer> stream_data,
883883 while (static_cast <size_t >(input->tell ()) < length)
884884 {
885885 QPDFObjectHandle obj =
886- parseInternal (input, " content" , tokenizer, empty,
887- 0 , 0 , false , false , true );
886+ parseInternal (input, " content" , tokenizer, empty, 0 , 0 , true );
888887 if (! obj.isInitialized ())
889888 {
890889 // EOF
@@ -945,15 +944,14 @@ QPDFObjectHandle::parse(PointerHolder<InputSource> input,
945944 StringDecrypter* decrypter, QPDF* context)
946945{
947946 return parseInternal (input, object_description, tokenizer, empty,
948- decrypter, context, false , false , false );
947+ decrypter, context, false );
949948}
950949
951950QPDFObjectHandle
952951QPDFObjectHandle::parseInternal (PointerHolder<InputSource> input,
953952 std::string const & object_description,
954953 QPDFTokenizer& tokenizer, bool & empty,
955954 StringDecrypter* decrypter, QPDF* context,
956- bool in_array, bool in_dictionary,
957955 bool content_stream)
958956{
959957 // This method must take care not to resolve any objects. Don't
@@ -962,22 +960,23 @@ QPDFObjectHandle::parseInternal(PointerHolder<InputSource> input,
962960 // of reading the object and changing the file pointer.
963961
964962 empty = false ;
965- if (in_dictionary && in_array)
966- {
967- // Although dictionaries and arrays arbitrarily nest, these
968- // variables indicate what is at the top of the stack right
969- // now, so they can, by definition, never both be true.
970- throw std::logic_error (
971- " INTERNAL ERROR: parseInternal: in_dict && in_array" );
972- }
973963
974964 QPDFObjectHandle object;
975965
976- qpdf_offset_t offset = input->tell ();
977- std::vector<QPDFObjectHandle> olist;
966+ std::vector<std::vector<QPDFObjectHandle> > olist_stack;
967+ olist_stack.push_back (std::vector<QPDFObjectHandle>());
968+ enum state_e { st_top, st_start, st_stop, st_eof, st_dictionary, st_array };
969+ std::vector<state_e> state_stack;
970+ state_stack.push_back (st_top);
971+ std::vector<qpdf_offset_t > offset_stack;
972+ offset_stack.push_back (input->tell ());
978973 bool done = false ;
979974 while (! done)
980975 {
976+ std::vector<QPDFObjectHandle>& olist = olist_stack.back ();
977+ state_e state = state_stack.back ();
978+ qpdf_offset_t offset = offset_stack.back ();
979+
981980 object = QPDFObjectHandle ();
982981
983982 QPDFTokenizer::Token token =
@@ -988,8 +987,7 @@ QPDFObjectHandle::parseInternal(PointerHolder<InputSource> input,
988987 case QPDFTokenizer::tt_eof:
989988 if (content_stream)
990989 {
991- // Return uninitialized object to indicate EOF
992- return object;
990+ state = st_eof;
993991 }
994992 else
995993 {
@@ -1012,9 +1010,9 @@ QPDFObjectHandle::parseInternal(PointerHolder<InputSource> input,
10121010 break ;
10131011
10141012 case QPDFTokenizer::tt_array_close:
1015- if (in_array )
1013+ if (state == st_array )
10161014 {
1017- done = true ;
1015+ state = st_stop ;
10181016 }
10191017 else
10201018 {
@@ -1029,9 +1027,9 @@ QPDFObjectHandle::parseInternal(PointerHolder<InputSource> input,
10291027 break ;
10301028
10311029 case QPDFTokenizer::tt_dict_close:
1032- if (in_dictionary )
1030+ if (state == st_dictionary )
10331031 {
1034- done = true ;
1032+ state = st_stop ;
10351033 }
10361034 else
10371035 {
@@ -1046,15 +1044,13 @@ QPDFObjectHandle::parseInternal(PointerHolder<InputSource> input,
10461044 break ;
10471045
10481046 case QPDFTokenizer::tt_array_open:
1049- object = parseInternal (
1050- input, object_description, tokenizer, empty,
1051- decrypter, context, true , false , content_stream);
1052- break ;
1053-
10541047 case QPDFTokenizer::tt_dict_open:
1055- object = parseInternal (
1056- input, object_description, tokenizer, empty,
1057- decrypter, context, false , true , content_stream);
1048+ olist_stack.push_back (std::vector<QPDFObjectHandle>());
1049+ state = st_start;
1050+ offset_stack.push_back (input->tell ());
1051+ state_stack.push_back (
1052+ (token.getType () == QPDFTokenizer::tt_array_open) ?
1053+ st_array : st_dictionary);
10581054 break ;
10591055
10601056 case QPDFTokenizer::tt_bool:
@@ -1084,12 +1080,12 @@ QPDFObjectHandle::parseInternal(PointerHolder<InputSource> input,
10841080 {
10851081 object = QPDFObjectHandle::newOperator (value);
10861082 }
1087- else if ((value == " R" ) && (in_array || in_dictionary ) &&
1088- (olist.size () >= 2 ) &&
1089- (! olist.at (olist.size () - 1 ).isIndirect ()) &&
1090- (olist.at (olist.size () - 1 ).isInteger ()) &&
1091- (! olist.at (olist.size () - 2 ).isIndirect ()) &&
1092- (olist.at (olist.size () - 2 ).isInteger ()))
1083+ else if ((value == " R" ) && (state != st_top ) &&
1084+ (olist.size () >= 2 ) &&
1085+ (! olist.at (olist.size () - 1 ).isIndirect ()) &&
1086+ (olist.at (olist.size () - 1 ).isInteger ()) &&
1087+ (! olist.at (olist.size () - 2 ).isIndirect ()) &&
1088+ (olist.at (olist.size () - 2 ).isInteger ()))
10931089 {
10941090 if (context == 0 )
10951091 {
@@ -1106,8 +1102,7 @@ QPDFObjectHandle::parseInternal(PointerHolder<InputSource> input,
11061102 olist.pop_back ();
11071103 olist.pop_back ();
11081104 }
1109- else if ((value == " endobj" ) &&
1110- (! (in_array || in_dictionary)))
1105+ else if ((value == " endobj" ) && (state == st_top))
11111106 {
11121107 // We just saw endobj without having read
11131108 // anything. Treat this as a null and do not move
@@ -1153,93 +1148,132 @@ QPDFObjectHandle::parseInternal(PointerHolder<InputSource> input,
11531148 break ;
11541149 }
11551150
1156- if (in_dictionary || in_array)
1157- {
1158- if (! done)
1159- {
1160- olist.push_back (object);
1161- }
1162- }
1163- else if (! object.isInitialized ())
1164- {
1165- warn (context,
1166- QPDFExc (qpdf_e_damaged_pdf, input->getName (),
1167- object_description,
1168- input->getLastOffset (),
1169- " parse error while reading object" ));
1151+ if ((! object.isInitialized ()) &&
1152+ (! ((state == st_start) ||
1153+ (state == st_stop) ||
1154+ (state == st_eof))))
1155+ {
1156+ throw std::logic_error (
1157+ " QPDFObjectHandle::parseInternal: "
1158+ " unexpected uninitialized object" );
11701159 object = newNull ();
1171- }
1172- else
1173- {
1174- done = true ;
1175- }
1176- }
1160+ }
11771161
1178- if (in_array)
1179- {
1180- object = newArray (olist);
1181- }
1182- else if (in_dictionary)
1183- {
1184- // Convert list to map. Alternating elements are keys. Attempt
1185- // to recover more or less gracefully from invalid
1186- // dictionaries.
1187- std::set<std::string> names;
1188- for (std::vector<QPDFObjectHandle>::iterator iter = olist.begin ();
1189- iter != olist.end (); ++iter)
1162+ switch (state)
11901163 {
1191- if ((! (*iter).isIndirect ()) && (*iter).isName ())
1164+ case st_eof:
1165+ if (state_stack.size () > 1 )
11921166 {
1193- names.insert ((*iter).getName ());
1167+ warn (context,
1168+ QPDFExc (qpdf_e_damaged_pdf, input->getName (),
1169+ object_description,
1170+ input->getLastOffset (),
1171+ " parse error while reading object" ));
11941172 }
1195- }
1173+ done = true ;
1174+ // Leave object uninitialized to indicate EOF
1175+ break ;
11961176
1197- std::map<std::string, QPDFObjectHandle> dict;
1198- int next_fake_key = 1 ;
1199- for (unsigned int i = 0 ; i < olist.size (); ++i)
1200- {
1201- QPDFObjectHandle key_obj = olist.at (i);
1202- QPDFObjectHandle val;
1203- if (key_obj.isIndirect () || (! key_obj.isName ()))
1177+ case st_dictionary:
1178+ case st_array:
1179+ olist.push_back (object);
1180+ break ;
1181+
1182+ case st_top:
1183+ done = true ;
1184+ break ;
1185+
1186+ case st_start:
1187+ break ;
1188+
1189+ case st_stop:
1190+ if ((state_stack.size () < 2 ) || (olist_stack.size () < 2 ))
1191+ {
1192+ throw std::logic_error (
1193+ " QPDFObjectHandle::parseInternal: st_stop encountered"
1194+ " with insufficient elements in stack" );
1195+ }
1196+ state_e old_state = state_stack.back ();
1197+ state_stack.pop_back ();
1198+ if (old_state == st_array)
12041199 {
1205- bool found_fake = false ;
1206- std::string candidate;
1207- while (! found_fake)
1200+ object = newArray (olist);
1201+ }
1202+ else if (old_state == st_dictionary)
1203+ {
1204+ // Convert list to map. Alternating elements are keys.
1205+ // Attempt to recover more or less gracefully from
1206+ // invalid dictionaries.
1207+ std::set<std::string> names;
1208+ for (std::vector<QPDFObjectHandle>::iterator iter =
1209+ olist.begin ();
1210+ iter != olist.end (); ++iter)
1211+ {
1212+ if ((! (*iter).isIndirect ()) && (*iter).isName ())
1213+ {
1214+ names.insert ((*iter).getName ());
1215+ }
1216+ }
1217+
1218+ std::map<std::string, QPDFObjectHandle> dict;
1219+ int next_fake_key = 1 ;
1220+ for (unsigned int i = 0 ; i < olist.size (); ++i)
12081221 {
1209- candidate =
1210- " /QPDFFake" + QUtil::int_to_string (next_fake_key++);
1211- found_fake = (names.count (candidate) == 0 );
1212- QTC::TC (" qpdf" , " QPDFObjectHandle found fake" ,
1213- (found_fake ? 0 : 1 ));
1222+ QPDFObjectHandle key_obj = olist.at (i);
1223+ QPDFObjectHandle val;
1224+ if (key_obj.isIndirect () || (! key_obj.isName ()))
1225+ {
1226+ bool found_fake = false ;
1227+ std::string candidate;
1228+ while (! found_fake)
1229+ {
1230+ candidate =
1231+ " /QPDFFake" +
1232+ QUtil::int_to_string (next_fake_key++);
1233+ found_fake = (names.count (candidate) == 0 );
1234+ QTC::TC (" qpdf" , " QPDFObjectHandle found fake" ,
1235+ (found_fake ? 0 : 1 ));
1236+ }
1237+ warn (context,
1238+ QPDFExc (
1239+ qpdf_e_damaged_pdf,
1240+ input->getName (), object_description, offset,
1241+ " expected dictionary key but found"
1242+ " non-name object; inserting key " +
1243+ candidate));
1244+ val = key_obj;
1245+ key_obj = newName (candidate);
1246+ }
1247+ else if (i + 1 >= olist.size ())
1248+ {
1249+ QTC::TC (" qpdf" , " QPDFObjectHandle no val for last key" );
1250+ warn (context,
1251+ QPDFExc (
1252+ qpdf_e_damaged_pdf,
1253+ input->getName (), object_description, offset,
1254+ " dictionary ended prematurely; "
1255+ " using null as value for last key" ));
1256+ val = newNull ();
1257+ }
1258+ else
1259+ {
1260+ val = olist.at (++i);
1261+ }
1262+ dict[key_obj.getName ()] = val;
12141263 }
1215- warn (context,
1216- QPDFExc (
1217- qpdf_e_damaged_pdf,
1218- input->getName (), object_description, offset,
1219- " expected dictionary key but found"
1220- " non-name object; inserting key " +
1221- candidate));
1222- val = key_obj;
1223- key_obj = newName (candidate);
1264+ object = newDictionary (dict);
12241265 }
1225- else if (i + 1 >= olist.size ())
1266+ olist_stack.pop_back ();
1267+ offset_stack.pop_back ();
1268+ if (state_stack.back () == st_top)
12261269 {
1227- QTC::TC (" qpdf" , " QPDFObjectHandle no val for last key" );
1228- warn (context,
1229- QPDFExc (
1230- qpdf_e_damaged_pdf,
1231- input->getName (), object_description, offset,
1232- " dictionary ended prematurely; using null as value"
1233- " for last key" ));
1234- val = newNull ();
1270+ done = true ;
12351271 }
12361272 else
12371273 {
1238- val = olist. at (++i );
1274+ olist_stack. back (). push_back (object );
12391275 }
1240- dict[key_obj.getName ()] = val;
12411276 }
1242- object = newDictionary (dict);
12431277 }
12441278
12451279 return object;
0 commit comments