Refactor: remove micro-optimizations from ACS95 token reading function

positively-charged · Apr 23, 2017 · 4388a16 · 4388a16
1 parent 8082d76
commit 4388a16
Showing 1 changed file with 67 additions and 147 deletions.
diff --git a/src/parse/token/source.c b/src/parse/token/source.c
@@ -1125,8 +1125,7 @@ void read_token_acs95( struct parse* parse, struct token* token ) {
    int line = 0;
    int column = 0;
    enum tk tk = TK_END;
-   char* text = NULL;
-   int length = 0;
+   struct str* text = NULL;
 
    whitespace:
    // -----------------------------------------------------------------------
@@ -1427,62 +1426,28 @@ void read_token_acs95( struct parse* parse, struct token* token ) {
          { "while", TK_WHILE },
          { "world", TK_WORLD },
       };
+      text = temp_text( parse );
+      while ( isalnum( ch ) || ch == '_' ) {
+         append_ch( text, tolower( ch ) );
+         ch = read_ch( parse );
+      }
       enum { MAX_IDENTIFIER_LENGTH = 31 };
-      struct text_buffer* text_buffer = t_get_text_buffer( parse->task,
-         MAX_IDENTIFIER_LENGTH + 1 );
-      text = text_buffer->left;
-      char* copied_text = text;
-      char* end = copied_text + MAX_IDENTIFIER_LENGTH;
-      char* source_text = parse->source->buffer +
-         parse->source->buffer_pos - 1;
-      while ( true ) {
-         if ( isalnum( *source_text ) || *source_text == '_' ) {
-            if ( copied_text == end ) {
-               struct pos pos;
-               t_init_pos( &pos,
-                  parse->source->file_entry_id,
-                  line, column );
-               p_diag( parse, DIAG_POS_ERR, &pos,
-                  "identifier too long (maximum length is %d)",
-                  MAX_IDENTIFIER_LENGTH );
-               p_bail( parse );
-            }
-            *copied_text = tolower( *source_text );
-            ++copied_text;
-            ++source_text;
-         }
-         // Read new data from the source file.
-         else if ( *source_text == '\n' && source_text[ 1 ] == '\0' ) {
-            size_t count = fread( parse->source->buffer,
-               sizeof( parse->source->buffer[ 0 ] ), SOURCE_BUFFER_SIZE,
-               parse->source->fh );
-            parse->source->buffer[ count ] = '\n';
-            parse->source->buffer[ count + 1 ] = '\0';
-            parse->source->buffer_pos = 0;
-            source_text = parse->source->buffer;
-            if ( count == 0 ) {
-               break;
-            }
-         }
-         else {
-            break;
-         }
+      if ( text->length > MAX_IDENTIFIER_LENGTH ) {
+         struct pos pos;
+         t_init_pos( &pos,
+            parse->source->file_entry_id,
+            line, column );
+         p_diag( parse, DIAG_POS_ERR, &pos,
+            "identifier too long (maximum length is %d)",
+            MAX_IDENTIFIER_LENGTH );
+         p_bail( parse );
       }
-      *copied_text = '\0';
-      length = copied_text - text;
-      // Update source buffer. The 1 added to `buffer_pos` is for the character
-      // after the identifier, since we assume it is now read.
-      parse->source->buffer_pos = source_text - parse->source->buffer + 1;
-      parse->source->ch = *source_text;
-      parse->source->column += length;
-      // Update text buffer.
-      text_buffer->left = copied_text + 1;
       // Reserved identifier. Uses binary search.
       int left = 0;
       int right = ARRAY_SIZE( table ) - 1;
       while ( left <= right ) {
          int middle = ( left + right ) / 2;
-         int result = strcmp( text, table[ middle ].name );
+         int result = strcmp( text->value, table[ middle ].name );
          if ( result > 0 ) {
             left = middle + 1;
          }
@@ -1491,6 +1456,7 @@ void read_token_acs95( struct parse* parse, struct token* token ) {
          }
          else {
             tk = table[ middle ].tk;
+            text = NULL;
             goto finish;
          }
       }
@@ -1508,9 +1474,9 @@ void read_token_acs95( struct parse* parse, struct token* token ) {
          goto hexadecimal;
       }
       else if ( ch == '.' ) {
-         str_clear( &parse->temp_text );
-         append_ch( &parse->temp_text, '0' );
-         append_ch( &parse->temp_text, '.' );
+         text = temp_text( parse );
+         append_ch( text, '0' );
+         append_ch( text, '.' );
          ch = read_ch( parse );
          goto fixedpoint;
       }
@@ -1524,26 +1490,24 @@ void read_token_acs95( struct parse* parse, struct token* token ) {
 
    hexadecimal:
    // -----------------------------------------------------------------------
-   str_clear( &parse->temp_text );
+   text = temp_text( parse );
    while ( true ) {
       if ( isxdigit( ch ) ) {
-         append_ch( &parse->temp_text, ch );
+         append_ch( text, ch );
          ch = read_ch( parse );
       }
       else {
-         if ( parse->temp_text.length == 0 ) {
+         if ( text->length == 0 ) {
             struct pos pos;
             t_init_pos( &pos,
                parse->source->file_entry_id,
                parse->source->line,
                column );
             p_diag( parse, DIAG_POS | DIAG_WARN, &pos,
                "hexadecimal literal has no digits, will interpret it as 0x0" );
-            append_ch( &parse->temp_text, '0' );
+            append_ch( text, '0' );
          }
          tk = TK_LIT_HEX;
-         text = parse->temp_text.value;
-         length = parse->temp_text.length;
          goto finish;
       }
    }
@@ -1557,48 +1521,46 @@ void read_token_acs95( struct parse* parse, struct token* token ) {
       goto decimal;
    }
    else if ( ch == '.' ) {
-      str_clear( &parse->temp_text );
-      append_ch( &parse->temp_text, '0' );
-      append_ch( &parse->temp_text, '.' );
+      text = temp_text( parse );
+      append_ch( text, '0' );
+      append_ch( text, '.' );
       ch = read_ch( parse );
       goto fixedpoint;
    }
    else if ( ch == '_' ) {
-      str_clear( &parse->temp_text );
-      append_ch( &parse->temp_text, '0' );
-      append_ch( &parse->temp_text, ch );
+      text = temp_text( parse );
+      append_ch( text, '0' );
+      append_ch( text, ch );
       ch = read_ch( parse );
       goto radix;
    }
    else {
-      text = "0";
-      length = 1;
+      text = temp_text( parse );
+      append_ch( text, '0' );
       tk = TK_LIT_DECIMAL;
       goto finish;
    }
 
    decimal:
    // -----------------------------------------------------------------------
-   str_clear( &parse->temp_text );
+   text = temp_text( parse );
    while ( true ) {
       if ( isdigit( ch ) ) {
-         append_ch( &parse->temp_text, ch );
+         append_ch( text, ch );
          ch = read_ch( parse );
       }
       else if ( ch == '.' ) {
-         append_ch( &parse->temp_text, ch );
+         append_ch( text, ch );
          ch = read_ch( parse );
          goto fixedpoint;
       }
       else if ( ch == '_' ) {
-         append_ch( &parse->temp_text, ch );
+         append_ch( text, ch );
          ch = read_ch( parse );
          goto radix;
       }
       else {
          tk = TK_LIT_DECIMAL;
-         text = parse->temp_text.value;
-         length = parse->temp_text.length;
          goto finish;
       }
    }
@@ -1607,24 +1569,22 @@ void read_token_acs95( struct parse* parse, struct token* token ) {
    // -----------------------------------------------------------------------
    while ( true ) {
       if ( isdigit( ch ) ) {
-         append_ch( &parse->temp_text, ch );
+         append_ch( text, ch );
          ch = read_ch( parse );
       }
       else {
-         if ( parse->temp_text.value[ parse->temp_text.length - 1 ] == '.' ) {
+         if ( text->value[ text->length - 1 ] == '.' ) {
             struct pos pos;
             t_init_pos( &pos,
                parse->source->file_entry_id,
                parse->source->line,
                column );
             p_diag( parse, DIAG_POS | DIAG_WARN, &pos,
                "fixed-point literal has no digits after point, will interpret "
-               "it as %s0", parse->temp_text.value );
-            append_ch( &parse->temp_text, '0' );
+               "it as %s0", text->value );
+            append_ch( text, '0' );
          }
          tk = TK_LIT_FIXED;
-         text = parse->temp_text.value;
-         length = parse->temp_text.length;
          goto finish;
       }
    }
@@ -1633,87 +1593,47 @@ void read_token_acs95( struct parse* parse, struct token* token ) {
    // -----------------------------------------------------------------------
    while ( true ) {
       if ( isalnum( ch ) ) {
-         append_ch( &parse->temp_text, tolower( ch ) );
+         append_ch( text, tolower( ch ) );
          ch = read_ch( parse );
       }
       else {
-         if ( parse->temp_text.value[ parse->temp_text.length - 1 ] == '_' ) {
+         if ( text->value[ text->length - 1 ] == '_' ) {
             struct pos pos;
             t_init_pos( &pos,
                parse->source->file_entry_id,
                parse->source->line,
                column );
             p_diag( parse, DIAG_POS | DIAG_WARN, &pos,
                "radix literal has no digits after underscore, "
-               "will interpret it as %s0", parse->temp_text.value );
-            append_ch( &parse->temp_text, '0' );
+               "will interpret it as %s0", text->value );
+            append_ch( text, '0' );
          }
-         text = parse->temp_text.value;
-         length = parse->temp_text.length;
          tk = TK_LIT_RADIX;
          goto finish;
       }
    }
 
    string:
    // -----------------------------------------------------------------------
-   {
-      // Most strings will be small, so copy the characters directly into the
-      // text buffer. For long strings, use an intermediate buffer.
-      enum { SEGMENTLENGTH = 255 };
-      enum { CUSHIONLENGTH = 1 };
-      enum { SAFELENGTH = SEGMENTLENGTH - CUSHIONLENGTH };
-      struct text_buffer* text_buffer = t_get_text_buffer( parse->task,
-         SEGMENTLENGTH + 1 );
-      text = text_buffer->left;
-      char* copied_text = text;
-      char* end = copied_text + SAFELENGTH;
-      struct str* temp_text = NULL;
-      while ( true ) {
-         if ( copied_text >= end ) {
-            if ( ! temp_text ) {
-               temp_text = &parse->temp_text;
-               str_clear( temp_text );
-               str_append_sub( temp_text, text, copied_text - text );
-            }
-            else {
-               temp_text->length = copied_text - temp_text->value;
-               str_grow( temp_text, temp_text->buffer_length * 2 );
-            }
-            copied_text = temp_text->value + temp_text->length;
-            end = temp_text->value + temp_text->buffer_length -
-               ( CUSHIONLENGTH + 1 );
-         }
-         else if ( ! ch ) {
-            struct pos pos;
-            t_init_pos( &pos,
-               parse->source->file_entry_id,
-               line, column );
-            p_diag( parse, DIAG_POS_ERR, &pos,
-               "unterminated string" );
-            p_bail( parse );
-         }
-         else if ( ch == '"' ) {
-            read_ch( parse );
-            tk = TK_LIT_STRING;
-            *copied_text = '\0';
-            if ( temp_text ) {
-               temp_text->length = copied_text - temp_text->value;
-               text = t_intern_text( parse->task, temp_text->value,
-                  temp_text->length );
-               length = temp_text->length;
-            }
-            else {
-               length = copied_text - text;
-               text_buffer->left = copied_text + 1;
-            }
-            goto finish;
-         }
-         else {
-            *copied_text = ch;
-            ++copied_text;
-            ch = read_ch( parse );
-         }
+   text = temp_text( parse );
+   while ( true ) {
+      if ( ! ch ) {
+         struct pos pos;
+         t_init_pos( &pos,
+            parse->source->file_entry_id,
+            line, column );
+         p_diag( parse, DIAG_POS_ERR, &pos,
+            "unterminated string" );
+         p_bail( parse );
+      }
+      else if ( ch == '"' ) {
+         ch = read_ch( parse );
+         tk = TK_LIT_STRING;
+         goto finish;
+      }
+      else {
+         append_ch( text, ch );
+         ch = read_ch( parse );
       }
    }
 
@@ -1751,15 +1671,15 @@ void read_token_acs95( struct parse* parse, struct token* token ) {
    finish:
    // -----------------------------------------------------------------------
    token->type = tk;
+   token->modifiable_text = NULL;
    if ( text ) {
-      token->text = text;
-      token->length = length;
+      token->text = t_intern_text( parse->task, text->value, text->length );
+      token->length = text->length;
    }
    else {
       const struct token_info* info = p_get_token_info( tk );
       token->text = info->shared_text;
-      token->length = ( length > 0 ) ?
-         length : info->length;
+      token->length = info->length;
    }
    token->pos.line = line;
    token->pos.column = column;