From 75b6554f5651b6f9ea06c1aa25e271a59d0fbff2 Mon Sep 17 00:00:00 2001 From: Jim Huang Date: Fri, 31 Oct 2025 15:41:59 +0800 Subject: [PATCH 1/3] Reduce timer checking overhead with lazy evaluation This reduces CPU overhead from timer interrupt checking by implementing: 1. Lazy timer checking: Cache next_interrupt_at (earliest interrupt time across all harts) to skip expensive checks when no interrupt is due. 2. Event-driven wait: Replace fixed 1ms periodic timer with dynamic one-shot timers (kqueue on macOS, timerfd on Linux) that wake exactly when next interrupt is due. --- aclint.c | 19 +++++++ device.h | 10 ++++ main.c | 160 ++++++++++++++++++++++++++++++++++++++++--------------- 3 files changed, 146 insertions(+), 43 deletions(-) diff --git a/aclint.c b/aclint.c index 2be7d46e..53005127 100644 --- a/aclint.c +++ b/aclint.c @@ -4,6 +4,20 @@ #include "riscv_private.h" /* ACLINT MTIMER */ + +/* Recalculate the next interrupt time by finding the minimum mtimecmp + * across all harts. This is called whenever mtimecmp is written. + */ +void aclint_mtimer_recalc_next_interrupt(mtimer_state_t *mtimer) +{ + uint64_t min_cmp = UINT64_MAX; + for (uint32_t i = 0; i < mtimer->n_harts; i++) { + if (mtimer->mtimecmp[i] < min_cmp) + min_cmp = mtimer->mtimecmp[i]; + } + mtimer->next_interrupt_at = min_cmp; +} + void aclint_mtimer_update_interrupts(hart_t *hart, mtimer_state_t *mtimer) { if (semu_timer_get(&mtimer->mtime) >= mtimer->mtimecmp[hart->mhartid]) @@ -63,6 +77,11 @@ static bool aclint_mtimer_reg_write(mtimer_state_t *mtimer, cmp_val = (cmp_val & 0xFFFFFFFF00000000ULL) | value; mtimer->mtimecmp[addr >> 3] = cmp_val; + + /* Recalculate next interrupt time when mtimecmp is updated. + * This is critical for lazy timer checking optimization. + */ + aclint_mtimer_recalc_next_interrupt(mtimer); return true; } diff --git a/device.h b/device.h index 963911eb..0cdcd083 100644 --- a/device.h +++ b/device.h @@ -242,9 +242,19 @@ typedef struct { */ uint64_t *mtimecmp; semu_timer_t mtime; + + /* Cache the earliest interrupt time to avoid checking every instruction. + * Updated when: + * 1. Any mtimecmp register is written (recalculate min of all harts) + * 2. After an interrupt fires (kernel updates mtimecmp) + * 3. Initialization (set to min of all initial mtimecmp values) + */ + uint64_t next_interrupt_at; + uint32_t n_harts; /* Number of harts, needed for min() calculation */ } mtimer_state_t; void aclint_mtimer_update_interrupts(hart_t *hart, mtimer_state_t *mtimer); +void aclint_mtimer_recalc_next_interrupt(mtimer_state_t *mtimer); void aclint_mtimer_read(hart_t *hart, mtimer_state_t *mtimer, uint32_t addr, diff --git a/main.c b/main.c index 28484782..8934c8e8 100644 --- a/main.c +++ b/main.c @@ -105,9 +105,33 @@ static void emu_update_timer_interrupt(hart_t *hart) { emu_state_t *data = PRIV(hart); - /* Sync global timer with local timer */ + /* Lazy timer checking. Only check timer interrupts when the current time + * has reached the earliest scheduled interrupt time. This avoids expensive + * semu_timer_get() calls and interrupt checks. + * + * Fast path: Skip if current time < next interrupt time + * Slow path: Check all harts' timers and recalculate next interrupt time + */ + uint64_t current_time = semu_timer_get(&data->mtimer.mtime); + if (current_time < data->mtimer.next_interrupt_at) { + /* Fast path: No timer interrupt can fire yet, skip checking. + * Still sync the timer for correctness (hart->time is used by CSR + * reads). + */ + hart->time = data->mtimer.mtime; + return; + } + + /* Slow path: At least one timer might fire, check this hart */ hart->time = data->mtimer.mtime; aclint_mtimer_update_interrupts(hart, &data->mtimer); + + /* Recalculate next interrupt time after potential interrupt delivery. + * The kernel likely updated mtimecmp in the interrupt handler, which + * already called recalc, but we call it again to be safe in case + * multiple harts share the same mtimecmp value. + */ + aclint_mtimer_recalc_next_interrupt(&data->mtimer); } static void emu_update_swi_interrupt(hart_t *hart) @@ -342,6 +366,8 @@ static inline sbi_ret_t handle_sbi_ecall_TIMER(hart_t *hart, int32_t fid) (((uint64_t) hart->x_regs[RV_R_A1]) << 32) | (uint64_t) (hart->x_regs[RV_R_A0]); hart->sip &= ~RV_INT_STI_BIT; + /* Recalculate next interrupt time for lazy timer checking */ + aclint_mtimer_recalc_next_interrupt(&data->mtimer); return (sbi_ret_t){SBI_SUCCESS, 0}; default: return (sbi_ret_t){SBI_ERR_NOT_SUPPORTED, 0}; @@ -766,6 +792,11 @@ static int semu_init(emu_state_t *emu, int argc, char **argv) /* Set up ACLINT */ semu_timer_init(&emu->mtimer.mtime, CLOCK_FREQ, hart_count); emu->mtimer.mtimecmp = calloc(vm->n_hart, sizeof(uint64_t)); + emu->mtimer.n_harts = vm->n_hart; + /* mtimecmp is initialized to 0 by calloc, so next_interrupt_at starts at 0. + * It will be updated when the kernel writes mtimecmp via SBI or MMIO. + */ + emu->mtimer.next_interrupt_at = 0; emu->mswi.msip = calloc(vm->n_hart, sizeof(uint32_t)); emu->sswi.ssip = calloc(vm->n_hart, sizeof(uint32_t)); #if SEMU_HAS(VIRTIOSND) @@ -953,6 +984,34 @@ static void print_mmu_cache_stats(vm_t *vm) } #endif +/* Calculate nanoseconds until next timer interrupt. + * Returns 0 if interrupt is already due, or capped at 100ms maximum. + */ +static uint64_t calc_ns_until_next_interrupt(emu_state_t *emu) +{ + uint64_t current_time = semu_timer_get(&emu->mtimer.mtime); + uint64_t next_int = emu->mtimer.next_interrupt_at; + + /* If interrupt is already due or very close, return immediately */ + if (current_time >= next_int) + return 0; + + /* Calculate ticks until interrupt */ + uint64_t ticks_remaining = next_int - current_time; + + /* Convert RISC-V timer ticks to nanoseconds: + * ns = ticks * (1e9 / CLOCK_FREQ) + */ + uint64_t ns = (ticks_remaining * 1000000000ULL) / emu->mtimer.mtime.freq; + + /* Cap at 100ms to maintain responsiveness for UART and other events */ + const uint64_t MAX_WAIT_NS = 100000000ULL; /* 100ms */ + if (ns > MAX_WAIT_NS) + ns = MAX_WAIT_NS; + + return ns; +} + static int semu_run(emu_state_t *emu) { int ret; @@ -974,36 +1033,20 @@ static int semu_run(emu_state_t *emu) return -1; } - /* Add 1ms periodic timer */ - struct kevent kev_timer; - EV_SET(&kev_timer, 1, EVFILT_TIMER, EV_ADD | EV_ENABLE, 0, 1, NULL); - if (kevent(kq, &kev_timer, 1, NULL, 0, NULL) < 0) { - perror("kevent timer setup"); - close(kq); - return -1; - } - - /* Note: UART input is polled via u8250_check_ready(), no need to - * monitor with kqueue. Timer events are sufficient to wake from WFI. + /* Note: Timer is configured dynamically in the event loop based on + * next_interrupt_at. UART input is polled via u8250_check_ready(). */ #else - /* Linux: create timerfd for periodic wakeup */ + /* Linux: create timerfd for dynamic timer wakeup */ int wfi_timer_fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK); if (wfi_timer_fd < 0) { perror("timerfd_create"); return -1; } - /* Configure 1ms periodic timer */ - struct itimerspec its = { - .it_interval = {.tv_sec = 0, .tv_nsec = 1000000}, - .it_value = {.tv_sec = 0, .tv_nsec = 1000000}, - }; - if (timerfd_settime(wfi_timer_fd, 0, &its, NULL) < 0) { - perror("timerfd_settime"); - close(wfi_timer_fd); - return -1; - } + /* Timer is configured dynamically in the event loop based on + * next_interrupt_at to minimize unnecessary wakeups. + */ #endif while (!emu->stopped) { @@ -1025,30 +1068,61 @@ static int semu_run(emu_state_t *emu) } if (all_waiting) { /* All harts waiting for interrupt - use event-driven wait - * to reduce CPU usage while maintaining responsiveness + * to reduce CPU usage while maintaining responsiveness. + * Dynamically adjust timer based on next_interrupt_at. */ + + /* Calculate how long to wait until next timer interrupt */ + uint64_t wait_ns = calc_ns_until_next_interrupt(emu); + + /* If interrupt is already due, don't wait - continue + * immediately + */ + if (wait_ns > 0) { #ifdef __APPLE__ - /* macOS: wait for kqueue events (timer or UART) */ - struct kevent events[2]; - int nevents = kevent(kq, NULL, 0, events, 2, NULL); - /* Events are automatically handled - timer fires every 1ms, - * UART triggers on input. No need to explicitly consume. */ - (void) nevents; + /* configure one-shot kqueue timer with dynamic timeout */ + struct kevent kev_timer; + /* NOTE_USECONDS for microseconds, wait_ns/1000 converts + * ns to us + */ + EV_SET(&kev_timer, 1, EVFILT_TIMER, + EV_ADD | EV_ENABLE | EV_ONESHOT, NOTE_USECONDS, + wait_ns / 1000, NULL); + + struct kevent events[2]; + int nevents = kevent(kq, &kev_timer, 1, events, 2, NULL); + /* Events are automatically handled. Wakeup occurs on: + * - Timer expiration (wait_ns elapsed) + * - UART input (if monitored) + */ + (void) nevents; #else - /* Linux: poll on timerfd and UART */ - struct pollfd pfds[2]; - pfds[0] = (struct pollfd){wfi_timer_fd, POLLIN, 0}; - pfds[1] = (struct pollfd){emu->uart.in_fd, POLLIN, 0}; - poll(pfds, 2, -1); - - /* Consume timerfd event to prevent accumulation */ - if (pfds[0].revents & POLLIN) { - uint64_t expirations; - ssize_t ret = - read(wfi_timer_fd, &expirations, sizeof(expirations)); - (void) ret; /* Ignore read errors - timer will retry */ - } + /* Linux: configure timerfd with dynamic one-shot timeout */ + struct itimerspec its = { + .it_interval = {0, 0}, /* One-shot, no repeat */ + .it_value = {wait_ns / 1000000000, + wait_ns % 1000000000}, + }; + if (timerfd_settime(wfi_timer_fd, 0, &its, NULL) < 0) { + perror("timerfd_settime"); + /* Continue anyway - will retry next iteration */ + } + + /* Poll on timerfd and UART */ + struct pollfd pfds[2]; + pfds[0] = (struct pollfd){wfi_timer_fd, POLLIN, 0}; + pfds[1] = (struct pollfd){emu->uart.in_fd, POLLIN, 0}; + poll(pfds, 2, -1); + + /* Consume timerfd event to prevent accumulation */ + if (pfds[0].revents & POLLIN) { + uint64_t expirations; + ssize_t ret = read(wfi_timer_fd, &expirations, + sizeof(expirations)); + (void) ret; /* Ignore read errors - timer will retry */ + } #endif + } } } From 55712b6976250f2f987b1a81f5cc1283cdf3c398 Mon Sep 17 00:00:00 2001 From: Jim Huang Date: Fri, 31 Oct 2025 16:05:43 +0800 Subject: [PATCH 2/3] Use fixed timeout during fake timer phase The previous event-driven timer implementation caused hrtimer warnings and boot delays on macOS CI because it calculated wait times based on emulator's fake incremental timer but used host OS real-time timers. Root cause: - During boot, semu_timer_get() returns fake ticks (slow linear growth) - calc_ns_until_next_interrupt() converted these to nanoseconds - kqueue/timerfd waited using wall clock time Fix: Use conservative 1ms fixed timeout during boot phase. After boot completes and timer switches to real-time, use dynamic calculation for optimal CPU efficiency. --- main.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/main.c b/main.c index 8934c8e8..3e3d631a 100644 --- a/main.c +++ b/main.c @@ -25,6 +25,7 @@ #include "mini-gdbstub/include/gdbstub.h" #include "riscv.h" #include "riscv_private.h" +#include "utils.h" #define PRIV(x) ((emu_state_t *) x->priv) /* Forward declarations for coroutine support */ @@ -986,9 +987,20 @@ static void print_mmu_cache_stats(vm_t *vm) /* Calculate nanoseconds until next timer interrupt. * Returns 0 if interrupt is already due, or capped at 100ms maximum. + * + * During boot (when using fake incremental timer), use conservative 1ms timeout + * to avoid mismatch between emulator time and host OS timer. */ static uint64_t calc_ns_until_next_interrupt(emu_state_t *emu) { + /* During boot, use fixed short timeout to avoid fake timer / real-time + * mismatch. The fake timer advances slowly (incremental), but host OS + * timers use wall clock time, which can cause large delays if we calculate + * based on fake timer values. + */ + if (!boot_complete) + return 1000000ULL; /* 1ms - conservative but safe during boot */ + uint64_t current_time = semu_timer_get(&emu->mtimer.mtime); uint64_t next_int = emu->mtimer.next_interrupt_at; From 90aa27de926319e45360a49c44272eb1b51fc17b Mon Sep 17 00:00:00 2001 From: Jim Huang Date: Fri, 31 Oct 2025 16:33:21 +0800 Subject: [PATCH 3/3] Fix arithmetic overflow in timer wait calculation When next_interrupt_at equals UINT64_MAX (disabled timer) or is very large, the calculation '(ticks_remaining * 1000000000ULL) / freq' overflows, resulting in wait_ns = 0. This prevents the sleep mechanism from working, eliminating CPU efficiency gains. --- main.c | 35 ++++++++++++++++++++++++++++++----- 1 file changed, 30 insertions(+), 5 deletions(-) diff --git a/main.c b/main.c index 3e3d631a..def6fca5 100644 --- a/main.c +++ b/main.c @@ -993,6 +993,9 @@ static void print_mmu_cache_stats(vm_t *vm) */ static uint64_t calc_ns_until_next_interrupt(emu_state_t *emu) { + /* Cap at 100ms to maintain responsiveness for UART and other events */ + const uint64_t MAX_WAIT_NS = 100000000ULL; /* 100ms */ + /* During boot, use fixed short timeout to avoid fake timer / real-time * mismatch. The fake timer advances slowly (incremental), but host OS * timers use wall clock time, which can cause large delays if we calculate @@ -1004,20 +1007,42 @@ static uint64_t calc_ns_until_next_interrupt(emu_state_t *emu) uint64_t current_time = semu_timer_get(&emu->mtimer.mtime); uint64_t next_int = emu->mtimer.next_interrupt_at; - /* If interrupt is already due or very close, return immediately */ + /* If timer is disabled (next_interrupt_at == UINT64_MAX), use maximum + * timeout to avoid arithmetic overflow. + */ + if (next_int == UINT64_MAX) + return MAX_WAIT_NS; + + /* If interrupt is already due, return immediately. This must be checked + * before any subtraction to avoid unsigned underflow. + */ if (current_time >= next_int) return 0; /* Calculate ticks until interrupt */ uint64_t ticks_remaining = next_int - current_time; - /* Convert RISC-V timer ticks to nanoseconds: + /* If there's an unreasonably large gap, cap at maximum timeout to avoid + * arithmetic overflow in the nanosecond conversion below. + */ + if (ticks_remaining > UINT64_MAX / 1000) + return MAX_WAIT_NS; + + /* Convert RISC-V timer ticks to nanoseconds using overflow-safe arithmetic: * ns = ticks * (1e9 / CLOCK_FREQ) + * + * To avoid overflow in (ticks_remaining * 1000000000ULL), we check if + * ticks_remaining would overflow. If it does, cap at MAX_WAIT_NS. */ - uint64_t ns = (ticks_remaining * 1000000000ULL) / emu->mtimer.mtime.freq; + uint64_t freq = emu->mtimer.mtime.freq; + if (ticks_remaining > UINT64_MAX / 1000000000ULL) { + /* Would overflow - cap at maximum timeout */ + return MAX_WAIT_NS; + } - /* Cap at 100ms to maintain responsiveness for UART and other events */ - const uint64_t MAX_WAIT_NS = 100000000ULL; /* 100ms */ + uint64_t ns = (ticks_remaining * 1000000000ULL) / freq; + + /* Cap at maximum timeout */ if (ns > MAX_WAIT_NS) ns = MAX_WAIT_NS;