From fb11de898562c7a28b1dd41c3afa14ddf56ef6af Mon Sep 17 00:00:00 2001
From: Jim Huang <jserv@ccns.ncku.edu.tw>
Date: Fri, 31 Oct 2025 23:21:59 +0800
Subject: [PATCH] Reduce RFENCE IPI cache flush scope

This optimizes RFENCE.VMA to use range-based cache invalidation instead
of unconditionally flushing all MMU caches. It adds mmu_invalidate_range
that selectively invalidates only cache entries within the specified
virtual address range, reducing cache flushes by 75-100% for single-page
operations.

SBI compliance: size==0 and size==-1 trigger full flush per specification.
---
 main.c  | 38 ++++++++++++++++++++++++++------------
 riscv.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++++
 riscv.h |  3 +++
 3 files changed, 77 insertions(+), 12 deletions(-)

diff --git a/main.c b/main.c
index 2848478..16a443f 100644
--- a/main.c
+++ b/main.c
@@ -437,28 +437,42 @@ static inline sbi_ret_t handle_sbi_ecall_RFENCE(hart_t *hart, int32_t fid)
      * completely.
      */
     uint64_t hart_mask, hart_mask_base;
+    uint32_t start_addr, size;
     switch (fid) {
-    case 0:
+    case SBI_RFENCE__I:
+        /* Instruction cache flush - ignored in interpreter mode */
         return (sbi_ret_t){SBI_SUCCESS, 0};
-    case 1:
+    case SBI_RFENCE__VMA:
+    case SBI_RFENCE__VMA_ASID:
+        /* RFENCE.VMA and RFENCE.VMA.ASID both use the same parameters:
+         * a0: hart_mask (low bits)
+         * a1: hart_mask_base (high bits)
+         * a2: start_addr
+         * a3: size
+         * For VMA_ASID, a4 contains asid (currently ignored)
+         */
         hart_mask = (uint64_t) hart->x_regs[RV_R_A0];
         hart_mask_base = (uint64_t) hart->x_regs[RV_R_A1];
+        start_addr = hart->x_regs[RV_R_A2];
+        size = hart->x_regs[RV_R_A3];
+
         if (hart_mask_base == 0xFFFFFFFFFFFFFFFF) {
-            for (uint32_t i = 0; i < hart->vm->n_hart; i++) {
-                mmu_invalidate(hart->vm->hart[i]);
-            }
+            /* Flush all harts */
+            for (uint32_t i = 0; i < hart->vm->n_hart; i++)
+                mmu_invalidate_range(hart->vm->hart[i], start_addr, size);
         } else {
+            /* Flush specified harts based on mask */
             for (int i = hart_mask_base; hart_mask; hart_mask >>= 1, i++) {
-                mmu_invalidate(hart->vm->hart[i]);
+                if (hart_mask & 1)
+                    mmu_invalidate_range(hart->vm->hart[i], start_addr, size);
             }
         }
         return (sbi_ret_t){SBI_SUCCESS, 0};
-    case 2:
-    case 3:
-    case 4:
-    case 5:
-    case 6:
-    case 7:
+    case SBI_RFENCE__GVMA_VMID:
+    case SBI_RFENCE__GVMA:
+    case SBI_RFENCE__VVMA_ASID:
+    case SBI_RFENCE__VVMA:
+        /* Hypervisor-related RFENCE operations - not implemented */
         return (sbi_ret_t){SBI_SUCCESS, 0};
     default:
         return (sbi_ret_t){SBI_ERR_FAILED, 0};
diff --git a/riscv.c b/riscv.c
index c07254c..e7b36b1 100644
--- a/riscv.c
+++ b/riscv.c
@@ -190,6 +190,54 @@ void mmu_invalidate(hart_t *vm)
     vm->cache_store.n_pages = 0xFFFFFFFF;
 }
 
+/* Invalidate MMU caches for a specific virtual address range.
+ * If size is 0 or -1, invalidate all caches (equivalent to mmu_invalidate()).
+ * Otherwise, only invalidate cache entries whose VPN falls within
+ * [start_addr >> PAGE_SHIFT, (start_addr + size - 1) >> PAGE_SHIFT].
+ */
+void mmu_invalidate_range(hart_t *vm, uint32_t start_addr, uint32_t size)
+{
+    /* SBI spec: size == 0 or size == -1 means flush entire address space */
+    if (size == 0 || size == (uint32_t) -1) {
+        mmu_invalidate(vm);
+        return;
+    }
+
+    /* Calculate VPN range: [start_vpn, end_vpn] inclusive.
+     * Use 64-bit arithmetic to prevent overflow when (start_addr + size - 1)
+     * exceeds UINT32_MAX. For example:
+     *   start_addr = 0xFFF00000, size = 0x00200000
+     *   32-bit: 0xFFF00000 + 0x00200000 - 1 = 0x000FFFFF (wraps)
+     *   64-bit: 0xFFF00000 + 0x00200000 - 1 = 0x100FFFFF (correct)
+     * Clamp to RV32 address space maximum before calculating end_vpn.
+     */
+    uint32_t start_vpn = start_addr >> RV_PAGE_SHIFT;
+    uint64_t end_addr = (uint64_t) start_addr + size - 1;
+    if (end_addr > UINT32_MAX)
+        end_addr = UINT32_MAX;
+    uint32_t end_vpn = (uint32_t) end_addr >> RV_PAGE_SHIFT;
+
+    /* Check each cache entry and invalidate if in range.
+     * Since we only have 4 cache entries total (fetch: 1, load: 2, store: 1),
+     * simple sequential checks are sufficient.
+     */
+    if (vm->cache_fetch.n_pages >= start_vpn &&
+        vm->cache_fetch.n_pages <= end_vpn)
+        vm->cache_fetch.n_pages = 0xFFFFFFFF;
+
+    if (vm->cache_load[0].n_pages >= start_vpn &&
+        vm->cache_load[0].n_pages <= end_vpn)
+        vm->cache_load[0].n_pages = 0xFFFFFFFF;
+
+    if (vm->cache_load[1].n_pages >= start_vpn &&
+        vm->cache_load[1].n_pages <= end_vpn)
+        vm->cache_load[1].n_pages = 0xFFFFFFFF;
+
+    if (vm->cache_store.n_pages >= start_vpn &&
+        vm->cache_store.n_pages <= end_vpn)
+        vm->cache_store.n_pages = 0xFFFFFFFF;
+}
+
 /* Pre-verify the root page table to minimize page table access during
  * translation time.
  */
diff --git a/riscv.h b/riscv.h
index 0563f8f..29ae6c7 100644
--- a/riscv.h
+++ b/riscv.h
@@ -190,3 +190,6 @@ void vm_error_report(const hart_t *vm);
 
 /* Invalidate all MMU translation caches (fetch, load, store) */
 void mmu_invalidate(hart_t *vm);
+
+/* Invalidate MMU caches for a specific virtual address range */
+void mmu_invalidate_range(hart_t *vm, uint32_t start_addr, uint32_t size);