@@ -510,7 +510,7 @@ struct StateTable
510510 const Entry<Extra> &get_entry (int state, unsigned int klass) const
511511 {
512512 if (unlikely (klass >= nClasses))
513- klass = StateTable<Types, Entry<Extra>> ::CLASS_OUT_OF_BOUNDS;
513+ klass = StateTable::CLASS_OUT_OF_BOUNDS;
514514
515515 const HBUSHORT *states = (this +stateArrayTable).arrayZ ;
516516 const Entry<Extra> *entries = (this +entryTable).arrayZ ;
@@ -576,7 +576,7 @@ struct StateTable
576576 if (unlikely (stop > states))
577577 return_trace (false );
578578 for (const HBUSHORT *p = states; stop < p; p--)
579- num_entries = hb_max (num_entries, *(p - 1 ) + 1 );
579+ num_entries = hb_max (num_entries, *(p - 1 ) + 1u );
580580 state_neg = min_state;
581581 }
582582 }
@@ -597,7 +597,7 @@ struct StateTable
597597 if (unlikely (stop < states))
598598 return_trace (false );
599599 for (const HBUSHORT *p = &states[state_pos * num_classes]; p < stop; p++)
600- num_entries = hb_max (num_entries, *p + 1 );
600+ num_entries = hb_max (num_entries, *p + 1u );
601601 state_pos = max_state + 1 ;
602602 }
603603 }
@@ -729,7 +729,10 @@ struct ExtendedTypes
729729template <typename Types, typename EntryData>
730730struct StateTableDriver
731731{
732- StateTableDriver (const StateTable<Types, EntryData> &machine_,
732+ using StateTableT = StateTable<Types, EntryData>;
733+ using EntryT = Entry<EntryData>;
734+
735+ StateTableDriver (const StateTableT &machine_,
733736 hb_buffer_t *buffer_,
734737 hb_face_t *face_) :
735738 machine (machine_),
@@ -742,59 +745,101 @@ struct StateTableDriver
742745 if (!c->in_place )
743746 buffer->clear_output ();
744747
745- int state = StateTable<Types, EntryData> ::STATE_START_OF_TEXT;
748+ int state = StateTableT ::STATE_START_OF_TEXT;
746749 for (buffer->idx = 0 ; buffer->successful ;)
747750 {
748751 unsigned int klass = buffer->idx < buffer->len ?
749752 machine.get_class (buffer->info [buffer->idx ].codepoint , num_glyphs) :
750- (unsigned ) StateTable<Types, EntryData> ::CLASS_END_OF_TEXT;
753+ (unsigned ) StateTableT ::CLASS_END_OF_TEXT;
751754 DEBUG_MSG (APPLY, nullptr , " c%u at %u" , klass, buffer->idx );
752- const Entry<EntryData> &entry = machine.get_entry (state, klass);
755+ const EntryT &entry = machine.get_entry (state, klass);
756+ const int next_state = machine.new_state (entry.newState );
753757
754- /* Unsafe-to-break before this if not in state 0, as things might
755- * go differently if we start from state 0 here.
758+ /* Conditions under which it's guaranteed safe-to-break before current glyph:
756759 *
757- * Ugh. The indexing here is ugly... */
758- if (state && buffer->backtrack_len () && buffer->idx < buffer->len )
759- {
760- /* If there's no action and we're just epsilon-transitioning to state 0,
761- * safe to break. */
762- if (c->is_actionable (this , entry) ||
763- !(entry.newState == StateTable<Types, EntryData>::STATE_START_OF_TEXT &&
764- entry.flags == context_t ::DontAdvance))
765- buffer->unsafe_to_break_from_outbuffer (buffer->backtrack_len () - 1 , buffer->idx + 1 );
766- }
767-
768- /* Unsafe-to-break if end-of-text would kick in here. */
769- if (buffer->idx + 2 <= buffer->len )
770- {
771- const Entry<EntryData> &end_entry = machine.get_entry (state, StateTable<Types, EntryData>::CLASS_END_OF_TEXT);
772- if (c->is_actionable (this , end_entry))
773- buffer->unsafe_to_break (buffer->idx , buffer->idx + 2 );
774- }
760+ * 1. There was no action in this transition; and
761+ *
762+ * 2. If we break before current glyph, the results will be the same. That
763+ * is guaranteed if:
764+ *
765+ * 2a. We were already in start-of-text state; or
766+ *
767+ * 2b. We are epsilon-transitioning to start-of-text state; or
768+ *
769+ * 2c. Starting from start-of-text state seeing current glyph:
770+ *
771+ * 2c'. There won't be any actions; and
772+ *
773+ * 2c". We would end up in the same state that we were going to end up
774+ * in now, including whether epsilon-transitioning.
775+ *
776+ * and
777+ *
778+ * 3. If we break before current glyph, there won't be any end-of-text action
779+ * after previous glyph.
780+ *
781+ * This triples the transitions we need to look up, but is worth returning
782+ * granular unsafe-to-break results. See eg.:
783+ *
784+ * https://github.com/harfbuzz/harfbuzz/issues/2860
785+ */
786+ const EntryT *wouldbe_entry;
787+ bool safe_to_break =
788+ /* 1. */
789+ !c->is_actionable (this , entry)
790+ &&
791+ /* 2. */
792+ (
793+ /* 2a. */
794+ state == StateTableT::STATE_START_OF_TEXT
795+ ||
796+ /* 2b. */
797+ (
798+ (entry.flags & context_t ::DontAdvance) &&
799+ next_state == StateTableT::STATE_START_OF_TEXT
800+ )
801+ ||
802+ /* 2c. */
803+ (
804+ wouldbe_entry = &machine.get_entry (StateTableT::STATE_START_OF_TEXT, klass)
805+ ,
806+ /* 2c'. */
807+ !c->is_actionable (this , *wouldbe_entry)
808+ &&
809+ /* 2c". */
810+ (
811+ next_state == machine.new_state (wouldbe_entry->newState )
812+ &&
813+ (entry.flags & context_t ::DontAdvance) == (wouldbe_entry->flags & context_t ::DontAdvance)
814+ )
815+ )
816+ )
817+ &&
818+ /* 3. */
819+ !c->is_actionable (this , machine.get_entry (state, StateTableT::CLASS_END_OF_TEXT))
820+ ;
821+
822+ if (!safe_to_break && buffer->backtrack_len () && buffer->idx < buffer->len )
823+ buffer->unsafe_to_break_from_outbuffer (buffer->backtrack_len () - 1 , buffer->idx + 1 );
775824
776825 c->transition (this , entry);
777826
778- state = machine. new_state (entry. newState ) ;
827+ state = next_state ;
779828 DEBUG_MSG (APPLY, nullptr , " s%d" , state);
780829
781- if (buffer->idx == buffer->len )
830+ if (buffer->idx == buffer->len || unlikely (!buffer-> successful ) )
782831 break ;
783832
784833 if (!(entry.flags & context_t ::DontAdvance) || buffer->max_ops -- <= 0 )
785- buffer->next_glyph ();
834+ ( void ) buffer->next_glyph ();
786835 }
787836
788837 if (!c->in_place )
789- {
790- for (; buffer->successful && buffer->idx < buffer->len ;)
791- buffer->next_glyph ();
792838 buffer->swap_buffers ();
793- }
794839 }
795840
796841 public:
797- const StateTable<Types, EntryData> &machine;
842+ const StateTableT &machine;
798843 hb_buffer_t *buffer;
799844 unsigned int num_glyphs;
800845};
0 commit comments