From 75b6554f5651b6f9ea06c1aa25e271a59d0fbff2 Mon Sep 17 00:00:00 2001
From: Jim Huang <jserv@ccns.ncku.edu.tw>
Date: Fri, 31 Oct 2025 15:41:59 +0800
Subject: [PATCH 1/3] Reduce timer checking overhead with lazy evaluation

This reduces CPU overhead from timer interrupt checking by implementing:
1. Lazy timer checking: Cache next_interrupt_at (earliest interrupt time
   across all harts) to skip expensive checks when no interrupt is due.
2. Event-driven wait: Replace fixed 1ms periodic timer with dynamic
   one-shot timers (kqueue on macOS, timerfd on Linux) that wake exactly
   when next interrupt is due.
---
 aclint.c |  19 +++++++
 device.h |  10 ++++
 main.c   | 160 ++++++++++++++++++++++++++++++++++++++++---------------
 3 files changed, 146 insertions(+), 43 deletions(-)

diff --git a/aclint.c b/aclint.c
index 2be7d46e..53005127 100644
--- a/aclint.c
+++ b/aclint.c
@@ -4,6 +4,20 @@
 #include "riscv_private.h"
 
 /* ACLINT MTIMER */
+
+/* Recalculate the next interrupt time by finding the minimum mtimecmp
+ * across all harts. This is called whenever mtimecmp is written.
+ */
+void aclint_mtimer_recalc_next_interrupt(mtimer_state_t *mtimer)
+{
+    uint64_t min_cmp = UINT64_MAX;
+    for (uint32_t i = 0; i < mtimer->n_harts; i++) {
+        if (mtimer->mtimecmp[i] < min_cmp)
+            min_cmp = mtimer->mtimecmp[i];
+    }
+    mtimer->next_interrupt_at = min_cmp;
+}
+
 void aclint_mtimer_update_interrupts(hart_t *hart, mtimer_state_t *mtimer)
 {
     if (semu_timer_get(&mtimer->mtime) >= mtimer->mtimecmp[hart->mhartid])
@@ -63,6 +77,11 @@ static bool aclint_mtimer_reg_write(mtimer_state_t *mtimer,
             cmp_val = (cmp_val & 0xFFFFFFFF00000000ULL) | value;
 
         mtimer->mtimecmp[addr >> 3] = cmp_val;
+
+        /* Recalculate next interrupt time when mtimecmp is updated.
+         * This is critical for lazy timer checking optimization.
+         */
+        aclint_mtimer_recalc_next_interrupt(mtimer);
         return true;
     }
 
diff --git a/device.h b/device.h
index 963911eb..0cdcd083 100644
--- a/device.h
+++ b/device.h
@@ -242,9 +242,19 @@ typedef struct {
      */
     uint64_t *mtimecmp;
     semu_timer_t mtime;
+
+    /* Cache the earliest interrupt time to avoid checking every instruction.
+     * Updated when:
+     * 1. Any mtimecmp register is written (recalculate min of all harts)
+     * 2. After an interrupt fires (kernel updates mtimecmp)
+     * 3. Initialization (set to min of all initial mtimecmp values)
+     */
+    uint64_t next_interrupt_at;
+    uint32_t n_harts; /* Number of harts, needed for min() calculation */
 } mtimer_state_t;
 
 void aclint_mtimer_update_interrupts(hart_t *hart, mtimer_state_t *mtimer);
+void aclint_mtimer_recalc_next_interrupt(mtimer_state_t *mtimer);
 void aclint_mtimer_read(hart_t *hart,
                         mtimer_state_t *mtimer,
                         uint32_t addr,
diff --git a/main.c b/main.c
index 28484782..8934c8e8 100644
--- a/main.c
+++ b/main.c
@@ -105,9 +105,33 @@ static void emu_update_timer_interrupt(hart_t *hart)
 {
     emu_state_t *data = PRIV(hart);
 
-    /* Sync global timer with local timer */
+    /* Lazy timer checking. Only check timer interrupts when the current time
+     * has reached the earliest scheduled interrupt time. This avoids expensive
+     * semu_timer_get() calls and interrupt checks.
+     *
+     * Fast path: Skip if current time < next interrupt time
+     * Slow path: Check all harts' timers and recalculate next interrupt time
+     */
+    uint64_t current_time = semu_timer_get(&data->mtimer.mtime);
+    if (current_time < data->mtimer.next_interrupt_at) {
+        /* Fast path: No timer interrupt can fire yet, skip checking.
+         * Still sync the timer for correctness (hart->time is used by CSR
+         * reads).
+         */
+        hart->time = data->mtimer.mtime;
+        return;
+    }
+
+    /* Slow path: At least one timer might fire, check this hart */
     hart->time = data->mtimer.mtime;
     aclint_mtimer_update_interrupts(hart, &data->mtimer);
+
+    /* Recalculate next interrupt time after potential interrupt delivery.
+     * The kernel likely updated mtimecmp in the interrupt handler, which
+     * already called recalc, but we call it again to be safe in case
+     * multiple harts share the same mtimecmp value.
+     */
+    aclint_mtimer_recalc_next_interrupt(&data->mtimer);
 }
 
 static void emu_update_swi_interrupt(hart_t *hart)
@@ -342,6 +366,8 @@ static inline sbi_ret_t handle_sbi_ecall_TIMER(hart_t *hart, int32_t fid)
             (((uint64_t) hart->x_regs[RV_R_A1]) << 32) |
             (uint64_t) (hart->x_regs[RV_R_A0]);
         hart->sip &= ~RV_INT_STI_BIT;
+        /* Recalculate next interrupt time for lazy timer checking */
+        aclint_mtimer_recalc_next_interrupt(&data->mtimer);
         return (sbi_ret_t){SBI_SUCCESS, 0};
     default:
         return (sbi_ret_t){SBI_ERR_NOT_SUPPORTED, 0};
@@ -766,6 +792,11 @@ static int semu_init(emu_state_t *emu, int argc, char **argv)
     /* Set up ACLINT */
     semu_timer_init(&emu->mtimer.mtime, CLOCK_FREQ, hart_count);
     emu->mtimer.mtimecmp = calloc(vm->n_hart, sizeof(uint64_t));
+    emu->mtimer.n_harts = vm->n_hart;
+    /* mtimecmp is initialized to 0 by calloc, so next_interrupt_at starts at 0.
+     * It will be updated when the kernel writes mtimecmp via SBI or MMIO.
+     */
+    emu->mtimer.next_interrupt_at = 0;
     emu->mswi.msip = calloc(vm->n_hart, sizeof(uint32_t));
     emu->sswi.ssip = calloc(vm->n_hart, sizeof(uint32_t));
 #if SEMU_HAS(VIRTIOSND)
@@ -953,6 +984,34 @@ static void print_mmu_cache_stats(vm_t *vm)
 }
 #endif
 
+/* Calculate nanoseconds until next timer interrupt.
+ * Returns 0 if interrupt is already due, or capped at 100ms maximum.
+ */
+static uint64_t calc_ns_until_next_interrupt(emu_state_t *emu)
+{
+    uint64_t current_time = semu_timer_get(&emu->mtimer.mtime);
+    uint64_t next_int = emu->mtimer.next_interrupt_at;
+
+    /* If interrupt is already due or very close, return immediately */
+    if (current_time >= next_int)
+        return 0;
+
+    /* Calculate ticks until interrupt */
+    uint64_t ticks_remaining = next_int - current_time;
+
+    /* Convert RISC-V timer ticks to nanoseconds:
+     * ns = ticks * (1e9 / CLOCK_FREQ)
+     */
+    uint64_t ns = (ticks_remaining * 1000000000ULL) / emu->mtimer.mtime.freq;
+
+    /* Cap at 100ms to maintain responsiveness for UART and other events */
+    const uint64_t MAX_WAIT_NS = 100000000ULL; /* 100ms */
+    if (ns > MAX_WAIT_NS)
+        ns = MAX_WAIT_NS;
+
+    return ns;
+}
+
 static int semu_run(emu_state_t *emu)
 {
     int ret;
@@ -974,36 +1033,20 @@ static int semu_run(emu_state_t *emu)
             return -1;
         }
 
-        /* Add 1ms periodic timer */
-        struct kevent kev_timer;
-        EV_SET(&kev_timer, 1, EVFILT_TIMER, EV_ADD | EV_ENABLE, 0, 1, NULL);
-        if (kevent(kq, &kev_timer, 1, NULL, 0, NULL) < 0) {
-            perror("kevent timer setup");
-            close(kq);
-            return -1;
-        }
-
-        /* Note: UART input is polled via u8250_check_ready(), no need to
-         * monitor with kqueue. Timer events are sufficient to wake from WFI.
+        /* Note: Timer is configured dynamically in the event loop based on
+         * next_interrupt_at. UART input is polled via u8250_check_ready().
          */
 #else
-        /* Linux: create timerfd for periodic wakeup */
+        /* Linux: create timerfd for dynamic timer wakeup */
         int wfi_timer_fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK);
         if (wfi_timer_fd < 0) {
             perror("timerfd_create");
             return -1;
         }
 
-        /* Configure 1ms periodic timer */
-        struct itimerspec its = {
-            .it_interval = {.tv_sec = 0, .tv_nsec = 1000000},
-            .it_value = {.tv_sec = 0, .tv_nsec = 1000000},
-        };
-        if (timerfd_settime(wfi_timer_fd, 0, &its, NULL) < 0) {
-            perror("timerfd_settime");
-            close(wfi_timer_fd);
-            return -1;
-        }
+        /* Timer is configured dynamically in the event loop based on
+         * next_interrupt_at to minimize unnecessary wakeups.
+         */
 #endif
 
         while (!emu->stopped) {
@@ -1025,30 +1068,61 @@ static int semu_run(emu_state_t *emu)
             }
             if (all_waiting) {
                 /* All harts waiting for interrupt - use event-driven wait
-                 * to reduce CPU usage while maintaining responsiveness
+                 * to reduce CPU usage while maintaining responsiveness.
+                 * Dynamically adjust timer based on next_interrupt_at.
                  */
+
+                /* Calculate how long to wait until next timer interrupt */
+                uint64_t wait_ns = calc_ns_until_next_interrupt(emu);
+
+                /* If interrupt is already due, don't wait - continue
+                 * immediately
+                 */
+                if (wait_ns > 0) {
 #ifdef __APPLE__
-                /* macOS: wait for kqueue events (timer or UART) */
-                struct kevent events[2];
-                int nevents = kevent(kq, NULL, 0, events, 2, NULL);
-                /* Events are automatically handled - timer fires every 1ms,
-                 * UART triggers on input. No need to explicitly consume. */
-                (void) nevents;
+                    /* configure one-shot kqueue timer with dynamic timeout */
+                    struct kevent kev_timer;
+                    /* NOTE_USECONDS for microseconds, wait_ns/1000 converts
+                     * ns to us
+                     */
+                    EV_SET(&kev_timer, 1, EVFILT_TIMER,
+                           EV_ADD | EV_ENABLE | EV_ONESHOT, NOTE_USECONDS,
+                           wait_ns / 1000, NULL);
+
+                    struct kevent events[2];
+                    int nevents = kevent(kq, &kev_timer, 1, events, 2, NULL);
+                    /* Events are automatically handled. Wakeup occurs on:
+                     * - Timer expiration (wait_ns elapsed)
+                     * - UART input (if monitored)
+                     */
+                    (void) nevents;
 #else
-                /* Linux: poll on timerfd and UART */
-                struct pollfd pfds[2];
-                pfds[0] = (struct pollfd){wfi_timer_fd, POLLIN, 0};
-                pfds[1] = (struct pollfd){emu->uart.in_fd, POLLIN, 0};
-                poll(pfds, 2, -1);
-
-                /* Consume timerfd event to prevent accumulation */
-                if (pfds[0].revents & POLLIN) {
-                    uint64_t expirations;
-                    ssize_t ret =
-                        read(wfi_timer_fd, &expirations, sizeof(expirations));
-                    (void) ret; /* Ignore read errors - timer will retry */
-                }
+                    /* Linux: configure timerfd with dynamic one-shot timeout */
+                    struct itimerspec its = {
+                        .it_interval = {0, 0}, /* One-shot, no repeat */
+                        .it_value = {wait_ns / 1000000000,
+                                     wait_ns % 1000000000},
+                    };
+                    if (timerfd_settime(wfi_timer_fd, 0, &its, NULL) < 0) {
+                        perror("timerfd_settime");
+                        /* Continue anyway - will retry next iteration */
+                    }
+
+                    /* Poll on timerfd and UART */
+                    struct pollfd pfds[2];
+                    pfds[0] = (struct pollfd){wfi_timer_fd, POLLIN, 0};
+                    pfds[1] = (struct pollfd){emu->uart.in_fd, POLLIN, 0};
+                    poll(pfds, 2, -1);
+
+                    /* Consume timerfd event to prevent accumulation */
+                    if (pfds[0].revents & POLLIN) {
+                        uint64_t expirations;
+                        ssize_t ret = read(wfi_timer_fd, &expirations,
+                                           sizeof(expirations));
+                        (void) ret; /* Ignore read errors - timer will retry */
+                    }
 #endif
+                }
             }
         }
 

From 55712b6976250f2f987b1a81f5cc1283cdf3c398 Mon Sep 17 00:00:00 2001
From: Jim Huang <jserv@ccns.ncku.edu.tw>
Date: Fri, 31 Oct 2025 16:05:43 +0800
Subject: [PATCH 2/3] Use fixed timeout during fake timer phase

The previous event-driven timer implementation caused hrtimer warnings
and boot delays on macOS CI because it calculated wait times based on
emulator's fake incremental timer but used host OS real-time timers.

Root cause:
- During boot, semu_timer_get() returns fake ticks (slow linear growth)
- calc_ns_until_next_interrupt() converted these to nanoseconds
- kqueue/timerfd waited using wall clock time

Fix:
Use conservative 1ms fixed timeout during boot phase. After boot
completes and timer switches to real-time, use dynamic calculation for
optimal CPU efficiency.
---
 main.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/main.c b/main.c
index 8934c8e8..3e3d631a 100644
--- a/main.c
+++ b/main.c
@@ -25,6 +25,7 @@
 #include "mini-gdbstub/include/gdbstub.h"
 #include "riscv.h"
 #include "riscv_private.h"
+#include "utils.h"
 #define PRIV(x) ((emu_state_t *) x->priv)
 
 /* Forward declarations for coroutine support */
@@ -986,9 +987,20 @@ static void print_mmu_cache_stats(vm_t *vm)
 
 /* Calculate nanoseconds until next timer interrupt.
  * Returns 0 if interrupt is already due, or capped at 100ms maximum.
+ *
+ * During boot (when using fake incremental timer), use conservative 1ms timeout
+ * to avoid mismatch between emulator time and host OS timer.
  */
 static uint64_t calc_ns_until_next_interrupt(emu_state_t *emu)
 {
+    /* During boot, use fixed short timeout to avoid fake timer / real-time
+     * mismatch. The fake timer advances slowly (incremental), but host OS
+     * timers use wall clock time, which can cause large delays if we calculate
+     * based on fake timer values.
+     */
+    if (!boot_complete)
+        return 1000000ULL; /* 1ms - conservative but safe during boot */
+
     uint64_t current_time = semu_timer_get(&emu->mtimer.mtime);
     uint64_t next_int = emu->mtimer.next_interrupt_at;
 

From 90aa27de926319e45360a49c44272eb1b51fc17b Mon Sep 17 00:00:00 2001
From: Jim Huang <jserv@ccns.ncku.edu.tw>
Date: Fri, 31 Oct 2025 16:33:21 +0800
Subject: [PATCH 3/3] Fix arithmetic overflow in timer wait calculation

When next_interrupt_at equals UINT64_MAX (disabled timer) or is very
large, the calculation '(ticks_remaining * 1000000000ULL) / freq'
overflows, resulting in wait_ns = 0. This prevents the sleep mechanism
from working, eliminating CPU efficiency gains.
---
 main.c | 35 ++++++++++++++++++++++++++++++-----
 1 file changed, 30 insertions(+), 5 deletions(-)

diff --git a/main.c b/main.c
index 3e3d631a..def6fca5 100644
--- a/main.c
+++ b/main.c
@@ -993,6 +993,9 @@ static void print_mmu_cache_stats(vm_t *vm)
  */
 static uint64_t calc_ns_until_next_interrupt(emu_state_t *emu)
 {
+    /* Cap at 100ms to maintain responsiveness for UART and other events */
+    const uint64_t MAX_WAIT_NS = 100000000ULL; /* 100ms */
+
     /* During boot, use fixed short timeout to avoid fake timer / real-time
      * mismatch. The fake timer advances slowly (incremental), but host OS
      * timers use wall clock time, which can cause large delays if we calculate
@@ -1004,20 +1007,42 @@ static uint64_t calc_ns_until_next_interrupt(emu_state_t *emu)
     uint64_t current_time = semu_timer_get(&emu->mtimer.mtime);
     uint64_t next_int = emu->mtimer.next_interrupt_at;
 
-    /* If interrupt is already due or very close, return immediately */
+    /* If timer is disabled (next_interrupt_at == UINT64_MAX), use maximum
+     * timeout to avoid arithmetic overflow.
+     */
+    if (next_int == UINT64_MAX)
+        return MAX_WAIT_NS;
+
+    /* If interrupt is already due, return immediately. This must be checked
+     * before any subtraction to avoid unsigned underflow.
+     */
     if (current_time >= next_int)
         return 0;
 
     /* Calculate ticks until interrupt */
     uint64_t ticks_remaining = next_int - current_time;
 
-    /* Convert RISC-V timer ticks to nanoseconds:
+    /* If there's an unreasonably large gap, cap at maximum timeout to avoid
+     * arithmetic overflow in the nanosecond conversion below.
+     */
+    if (ticks_remaining > UINT64_MAX / 1000)
+        return MAX_WAIT_NS;
+
+    /* Convert RISC-V timer ticks to nanoseconds using overflow-safe arithmetic:
      * ns = ticks * (1e9 / CLOCK_FREQ)
+     *
+     * To avoid overflow in (ticks_remaining * 1000000000ULL), we check if
+     * ticks_remaining would overflow. If it does, cap at MAX_WAIT_NS.
      */
-    uint64_t ns = (ticks_remaining * 1000000000ULL) / emu->mtimer.mtime.freq;
+    uint64_t freq = emu->mtimer.mtime.freq;
+    if (ticks_remaining > UINT64_MAX / 1000000000ULL) {
+        /* Would overflow - cap at maximum timeout */
+        return MAX_WAIT_NS;
+    }
 
-    /* Cap at 100ms to maintain responsiveness for UART and other events */
-    const uint64_t MAX_WAIT_NS = 100000000ULL; /* 100ms */
+    uint64_t ns = (ticks_remaining * 1000000000ULL) / freq;
+
+    /* Cap at maximum timeout */
     if (ns > MAX_WAIT_NS)
         ns = MAX_WAIT_NS;