From 5bf64d73c50ccb37b0f429685beb599bccd2a77a Mon Sep 17 00:00:00 2001 From: Jim Huang Date: Mon, 18 Aug 2025 04:45:55 +0800 Subject: [PATCH] Fix crash on pointer dereference after declaration The parser would crash with "Unexpected token" when a pointer dereference assignment (e.g., '*p = 0') appeared immediately after a pointer declaration in the same block. This is a common C pattern that was blocking normal code compilation. The issue occurred because the parser tried to interpret '*p' as a type declaration when it appeared after a pointer declaration like "int *p = &x;". The ambiguity between * as a type modifier versus a dereference operator caused the crash. This fix adds lookahead logic to check if the identifier after '*' is a known type. Only if it is a type will the statement be treated as a declaration; otherwise it's handled as a pointer dereference. --- src/parser.c | 46 +++++++++++++++++++++++++++++++++++++--------- tests/driver.sh | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 84 insertions(+), 9 deletions(-) diff --git a/src/parser.c b/src/parser.c index a789b469..9f95100c 100644 --- a/src/parser.c +++ b/src/parser.c @@ -3267,11 +3267,42 @@ basic_block_t *read_body_statement(block_t *parent, basic_block_t *bb) } /* is it a variable declaration? */ - int find_type_flag = lex_accept(T_struct) ? 2 : 1; - if (find_type_flag == 1 && lex_accept(T_union)) { - find_type_flag = 2; + /* Special handling when statement starts with asterisk */ + if (has_asterisk) { + /* For "*identifier", check if identifier is a type. + * If not, it's a dereference, not a declaration. */ + int saved_size = SOURCE->size; + char saved_char = next_char; + int saved_token = next_token; + + /* Skip the asterisk to peek at the identifier */ + lex_accept(T_asterisk); + char next_ident[MAX_TOKEN_LEN]; + bool could_be_type = false; + + if (lex_peek(T_identifier, next_ident)) { + /* Check if it's a type name */ + type = find_type(next_ident, 0); + if (type) + could_be_type = true; + } + + /* Restore position */ + SOURCE->size = saved_size; + next_char = saved_char; + next_token = saved_token; + + /* If it's not a type, skip the declaration block */ + if (!could_be_type) + type = NULL; + } else { + /* Normal type checking without asterisk */ + int find_type_flag = lex_accept(T_struct) ? 2 : 1; + if (find_type_flag == 1 && lex_accept(T_union)) + find_type_flag = 2; + type = find_type(token, find_type_flag); } - type = find_type(token, find_type_flag); + if (type) { var = require_typed_var(parent, type); read_full_var_decl(var, 0, 0); @@ -3280,9 +3311,7 @@ basic_block_t *read_body_statement(block_t *parent, basic_block_t *bb) if (lex_accept(T_assign)) { if (lex_peek(T_open_curly, NULL) && (var->array_size > 0 || var->is_ptr > 0)) { - parse_array_init( - var, parent, &bb, - 1); /* FIXED: Emit code for locals in functions */ + parse_array_init(var, parent, &bb, 1); } else { read_expr(parent, &bb); read_ternary_operation(parent, &bb); @@ -3305,8 +3334,7 @@ basic_block_t *read_body_statement(block_t *parent, basic_block_t *bb) if (lex_accept(T_assign)) { if (lex_peek(T_open_curly, NULL) && (nv->array_size > 0 || nv->is_ptr > 0)) { - parse_array_init(nv, parent, &bb, - 1); /* FIXED: Emit code for locals */ + parse_array_init(nv, parent, &bb, 1); } else { read_expr(parent, &bb); diff --git a/tests/driver.sh b/tests/driver.sh index d947eab6..fbc15012 100755 --- a/tests/driver.sh +++ b/tests/driver.sh @@ -461,6 +461,53 @@ items 3 "int x; int *y; x = 3; y = &x; return y[0];" items 5 "int b; int *a; b = 10; a = &b; a[0] = 5; return b;" items 2 "int x[2]; int y; x[1] = 2; y = *(x + 1); return y;" items 2 "int x; int *y; int z; z = 2; y = &z; x = *y; return x;" + +# pointer dereference immediately after declaration +items 42 "int x; x = 10; int *p; p = &x; p[0] = 42; exit(x);" +items 10 "int val; val = 5; int *ptr; ptr = &val; ptr[0] = 10; exit(val);" +items 7 "int a; a = 3; int *b; b = &a; b[0] = 7; exit(a);" + +# asterisk dereference for reading after declaration +items 42 "int x; x = 42; int *p; p = &x; int y; y = *p; exit(y);" +items 15 "int val; val = 15; int *ptr; ptr = &val; exit(*ptr);" +items 100 "int a; a = 100; int *b; b = &a; int c; c = *b; exit(c);" + +# complex pointer dereference patterns after declaration +try_ 25 << EOF +int main() { + int x; + int *p; + x = 10; + p = &x; /* pointer declaration and assignment */ + p[0] = 25; /* array-style assignment immediately after */ + return x; +} +EOF + +try_ 50 << EOF +int main() { + int arr[3]; + int *ptr; + arr[0] = 10; arr[1] = 20; arr[2] = 30; + ptr = arr; + ptr[0] = 50; /* should modify arr[0] */ + return arr[0]; +} +EOF + +try_ 50 << EOF +int main() { + int a, b; + int *p1, *p2; + a = 5; b = 15; + p1 = &a; + p2 = &b; + p1[0] = 100; /* multiple pointer assignments in same block */ + p2[0] = 200; + return p1[0] / 2; /* 100 / 2 = 50 */ +} +EOF + try_ 10 << EOF void change_it(int *p) { if (p[0] == 0) {