Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 33 additions & 2 deletions src/defs.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@
#define MAX_PARAMS 8
#define MAX_LOCALS 1600
#define MAX_FIELDS 64
#define MAX_TYPES 128
#define MAX_IR_INSTR 60000
#define MAX_TYPES 256
#define MAX_IR_INSTR 80000
#define MAX_BB_PRED 128
#define MAX_BB_DOM_SUCC 64
#define MAX_BB_RDOM_SUCC 256
Expand Down Expand Up @@ -180,6 +180,37 @@ typedef enum {
T_cppd_pragma
} token_t;

/* Source location tracking for better error reporting */
typedef struct {
int line;
int column;
char *filename;
} source_location_t;

/* Token structure with metadata for enhanced lexing */
typedef struct token_info {
token_t type;
char value[MAX_TOKEN_LEN];
source_location_t location;
struct token_info *next; /* For freelist management */
} token_info_t;

/* Token freelist for memory reuse */
typedef struct {
token_info_t *freelist;
int allocated_count;
int reused_count; /* Statistics for debugging */
} token_pool_t;

/* Token buffer for improved lookahead */
#define TOKEN_BUFFER_SIZE 8
typedef struct {
token_info_t *tokens[TOKEN_BUFFER_SIZE];
int head;
int tail;
int count;
} token_buffer_t;

/* builtin types */
typedef enum {
TYPE_void = 0,
Expand Down
16 changes: 14 additions & 2 deletions src/globals.c
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,11 @@ token_t next_token;
char next_char;
bool skip_newline = true;

/* Token memory management */
token_pool_t *TOKEN_POOL;
token_buffer_t *TOKEN_BUFFER;
source_location_t current_location; /* Will be initialized at runtime */

bool preproc_match;

/* Point to the first character after where the macro has been called. It is
Expand Down Expand Up @@ -1120,6 +1125,13 @@ void global_init(void)
SOURCE = strbuf_create(MAX_SOURCE);
FUNC_MAP = hashmap_create(DEFAULT_FUNCS_SIZE);
INCLUSION_MAP = hashmap_create(DEFAULT_INCLUSIONS_SIZE);

/* Initialize token management globals */
current_location.line = 1;
current_location.column = 1;
current_location.filename = NULL;
TOKEN_POOL = NULL;
TOKEN_BUFFER = NULL;
ALIASES_MAP = hashmap_create(MAX_ALIASES);
CONSTANTS_MAP = hashmap_create(MAX_CONSTANTS);

Expand Down Expand Up @@ -1195,8 +1207,8 @@ void error(char *msg)

strcpy(diagnostic + i, "^ Error occurs here");

/* TODO: figure out the corresponding C source file path and report line
* number.
/* TODO: Enhanced error reporting with location tracking will be added
* once self-hosting is stable with new token management
*/
printf("[Error]: %s\nOccurs at source location %d.\n%s\n", msg,
SOURCE->size, diagnostic);
Expand Down
193 changes: 193 additions & 0 deletions src/lexer.c
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,142 @@ token_t lookup_keyword(char *token)
}

/* Cleanup function for lexer hashmaps */
/* Token Memory Management Functions */

/* Initialize token pool for memory reuse */
void token_pool_init(void)
{
if (TOKEN_POOL)
return;

TOKEN_POOL = arena_alloc(GENERAL_ARENA, sizeof(token_pool_t));
if (TOKEN_POOL) {
TOKEN_POOL->freelist = NULL;
TOKEN_POOL->allocated_count = 0;
TOKEN_POOL->reused_count = 0;
}
}

/* Allocate or reuse a token from the pool */
token_info_t *token_pool_alloc(void)
{
if (!TOKEN_POOL)
token_pool_init();

token_info_t *token;

if (TOKEN_POOL->freelist) {
/* Reuse from freelist */
token = TOKEN_POOL->freelist;
TOKEN_POOL->freelist = token->next;
TOKEN_POOL->reused_count++;
} else {
/* Allocate new token */
token = arena_alloc(GENERAL_ARENA, sizeof(token_info_t));
TOKEN_POOL->allocated_count++;
}

/* Clear token data */
token->type = T_eof;
token->value[0] = '\0';
/* Set location fields individually */
token->location.line = current_location.line;
token->location.column = current_location.column;
token->location.filename = current_location.filename;
token->next = NULL;

return token;
}

/* Return token to freelist for reuse */
void token_pool_free(token_info_t *token)
{
if (!token || !TOKEN_POOL)
return;

token->next = TOKEN_POOL->freelist;
TOKEN_POOL->freelist = token;
}

/* Initialize token buffer for lookahead */
void token_buffer_init(void)
{
if (TOKEN_BUFFER)
return;

TOKEN_BUFFER = arena_alloc(GENERAL_ARENA, sizeof(token_buffer_t));
TOKEN_BUFFER->head = 0;
TOKEN_BUFFER->tail = 0;
TOKEN_BUFFER->count = 0;

for (int i = 0; i < TOKEN_BUFFER_SIZE; i++)
TOKEN_BUFFER->tokens[i] = NULL;
}

/* Add token to buffer */
void token_buffer_push(token_info_t *token)
{
if (!TOKEN_BUFFER)
token_buffer_init();

if (TOKEN_BUFFER->count >= TOKEN_BUFFER_SIZE) {
/* Buffer full, free oldest token */
token_info_t *old = TOKEN_BUFFER->tokens[TOKEN_BUFFER->head];
token_pool_free(old);
TOKEN_BUFFER->head = (TOKEN_BUFFER->head + 1) % TOKEN_BUFFER_SIZE;
TOKEN_BUFFER->count--;
}

TOKEN_BUFFER->tokens[TOKEN_BUFFER->tail] = token;
TOKEN_BUFFER->tail = (TOKEN_BUFFER->tail + 1) % TOKEN_BUFFER_SIZE;
TOKEN_BUFFER->count++;
}

/* Look ahead N tokens without consuming */
token_info_t *token_buffer_peek(int offset)
{
if (!TOKEN_BUFFER || offset >= TOKEN_BUFFER->count)
return NULL;

int idx = (TOKEN_BUFFER->head + offset) % TOKEN_BUFFER_SIZE;
return TOKEN_BUFFER->tokens[idx];
}

/* Update source location tracking */
void update_location(char c)
{
if (c == '\n') {
current_location.line++;
current_location.column = 1;
} else if (c == '\t') {
current_location.column += 4; /* Assume 4-space tabs */
} else {
current_location.column++;
}
}

/* Set current filename for error reporting */
void set_current_filename(char *filename)
{
current_location.filename = filename;
current_location.line = 1;
current_location.column = 1;
}

/* Enhanced error reporting with location */
void error_with_location(char *msg, source_location_t *loc)
{
if (loc && loc->filename) {
printf("%s:%d:%d: error: %s\n", loc->filename, loc->line, loc->column,
msg);
} else if (loc) {
printf("line %d, column %d: error: %s\n", loc->line, loc->column, msg);
} else {
printf("error: %s\n", msg);
}
abort();
}

void lexer_cleanup()
{
if (DIRECTIVE_MAP) {
Expand All @@ -140,6 +276,11 @@ void lexer_cleanup()
*/
directive_tokens_storage = NULL;
keyword_tokens_storage = NULL;

/* Token pool and buffer are also arena-allocated, no explicit free needed
*/
TOKEN_POOL = NULL;
TOKEN_BUFFER = NULL;
}

bool is_whitespace(char c)
Expand Down Expand Up @@ -231,6 +372,7 @@ char read_char(bool is_skip_space)
{
SOURCE->size++;
next_char = SOURCE->elements[SOURCE->size];
/* TODO: Re-enable after self-hosting: update_location(next_char); */
if (is_skip_space)
skip_whitespace();
return next_char;
Expand Down Expand Up @@ -807,6 +949,33 @@ token_t lex_token_internal(bool aliasing)
return T_eof;
}

/* Enhanced lex_token that returns a full token_info structure */
token_info_t *lex_token_enhanced(bool aliasing)
{
token_info_t *token = token_pool_alloc();

/* Save location at start of token */
int saved_line = current_location.line;
int saved_column = current_location.column;
char *saved_filename = current_location.filename;

/* Get the token type using existing logic */
token->type = lex_token_internal(aliasing);

/* Copy token string value */
strcpy(token->value, token_str);

/* Restore saved location fields individually */
token->location.line = saved_line;
token->location.column = saved_column;
token->location.filename = saved_filename;

/* Add to buffer for lookahead capability */
token_buffer_push(token);

return token;
}

/* Lex next token and returns its token type. To disable aliasing on next
* token, use 'lex_token_internal'.
*/
Expand All @@ -815,6 +984,30 @@ token_t lex_token(void)
return lex_token_internal(true);
}

/* Advanced lookahead functions using token buffer */
bool lex_peek_ahead(int offset, token_t expected_type)
{
token_info_t *future_token = token_buffer_peek(offset);
return future_token && future_token->type == expected_type;
}

/* Check if next N tokens match a pattern */
bool lex_match_sequence(token_t *pattern, int count)
{
for (int i = 0; i < count; i++) {
if (!lex_peek_ahead(i, pattern[i]))
return false;
}
return true;
}

/* Get token value at offset for lookahead inspection */
char *lex_peek_value(int offset)
{
token_info_t *future_token = token_buffer_peek(offset);
return future_token ? future_token->value : NULL;
}

/* Skip the content. We only need the index where the macro body begins. */
void skip_macro_body(void)
{
Expand Down