From fb11de898562c7a28b1dd41c3afa14ddf56ef6af Mon Sep 17 00:00:00 2001 From: Jim Huang Date: Fri, 31 Oct 2025 23:21:59 +0800 Subject: [PATCH] Reduce RFENCE IPI cache flush scope This optimizes RFENCE.VMA to use range-based cache invalidation instead of unconditionally flushing all MMU caches. It adds mmu_invalidate_range that selectively invalidates only cache entries within the specified virtual address range, reducing cache flushes by 75-100% for single-page operations. SBI compliance: size==0 and size==-1 trigger full flush per specification. --- main.c | 38 ++++++++++++++++++++++++++------------ riscv.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ riscv.h | 3 +++ 3 files changed, 77 insertions(+), 12 deletions(-) diff --git a/main.c b/main.c index 2848478..16a443f 100644 --- a/main.c +++ b/main.c @@ -437,28 +437,42 @@ static inline sbi_ret_t handle_sbi_ecall_RFENCE(hart_t *hart, int32_t fid) * completely. */ uint64_t hart_mask, hart_mask_base; + uint32_t start_addr, size; switch (fid) { - case 0: + case SBI_RFENCE__I: + /* Instruction cache flush - ignored in interpreter mode */ return (sbi_ret_t){SBI_SUCCESS, 0}; - case 1: + case SBI_RFENCE__VMA: + case SBI_RFENCE__VMA_ASID: + /* RFENCE.VMA and RFENCE.VMA.ASID both use the same parameters: + * a0: hart_mask (low bits) + * a1: hart_mask_base (high bits) + * a2: start_addr + * a3: size + * For VMA_ASID, a4 contains asid (currently ignored) + */ hart_mask = (uint64_t) hart->x_regs[RV_R_A0]; hart_mask_base = (uint64_t) hart->x_regs[RV_R_A1]; + start_addr = hart->x_regs[RV_R_A2]; + size = hart->x_regs[RV_R_A3]; + if (hart_mask_base == 0xFFFFFFFFFFFFFFFF) { - for (uint32_t i = 0; i < hart->vm->n_hart; i++) { - mmu_invalidate(hart->vm->hart[i]); - } + /* Flush all harts */ + for (uint32_t i = 0; i < hart->vm->n_hart; i++) + mmu_invalidate_range(hart->vm->hart[i], start_addr, size); } else { + /* Flush specified harts based on mask */ for (int i = hart_mask_base; hart_mask; hart_mask >>= 1, i++) { - mmu_invalidate(hart->vm->hart[i]); + if (hart_mask & 1) + mmu_invalidate_range(hart->vm->hart[i], start_addr, size); } } return (sbi_ret_t){SBI_SUCCESS, 0}; - case 2: - case 3: - case 4: - case 5: - case 6: - case 7: + case SBI_RFENCE__GVMA_VMID: + case SBI_RFENCE__GVMA: + case SBI_RFENCE__VVMA_ASID: + case SBI_RFENCE__VVMA: + /* Hypervisor-related RFENCE operations - not implemented */ return (sbi_ret_t){SBI_SUCCESS, 0}; default: return (sbi_ret_t){SBI_ERR_FAILED, 0}; diff --git a/riscv.c b/riscv.c index c07254c..e7b36b1 100644 --- a/riscv.c +++ b/riscv.c @@ -190,6 +190,54 @@ void mmu_invalidate(hart_t *vm) vm->cache_store.n_pages = 0xFFFFFFFF; } +/* Invalidate MMU caches for a specific virtual address range. + * If size is 0 or -1, invalidate all caches (equivalent to mmu_invalidate()). + * Otherwise, only invalidate cache entries whose VPN falls within + * [start_addr >> PAGE_SHIFT, (start_addr + size - 1) >> PAGE_SHIFT]. + */ +void mmu_invalidate_range(hart_t *vm, uint32_t start_addr, uint32_t size) +{ + /* SBI spec: size == 0 or size == -1 means flush entire address space */ + if (size == 0 || size == (uint32_t) -1) { + mmu_invalidate(vm); + return; + } + + /* Calculate VPN range: [start_vpn, end_vpn] inclusive. + * Use 64-bit arithmetic to prevent overflow when (start_addr + size - 1) + * exceeds UINT32_MAX. For example: + * start_addr = 0xFFF00000, size = 0x00200000 + * 32-bit: 0xFFF00000 + 0x00200000 - 1 = 0x000FFFFF (wraps) + * 64-bit: 0xFFF00000 + 0x00200000 - 1 = 0x100FFFFF (correct) + * Clamp to RV32 address space maximum before calculating end_vpn. + */ + uint32_t start_vpn = start_addr >> RV_PAGE_SHIFT; + uint64_t end_addr = (uint64_t) start_addr + size - 1; + if (end_addr > UINT32_MAX) + end_addr = UINT32_MAX; + uint32_t end_vpn = (uint32_t) end_addr >> RV_PAGE_SHIFT; + + /* Check each cache entry and invalidate if in range. + * Since we only have 4 cache entries total (fetch: 1, load: 2, store: 1), + * simple sequential checks are sufficient. + */ + if (vm->cache_fetch.n_pages >= start_vpn && + vm->cache_fetch.n_pages <= end_vpn) + vm->cache_fetch.n_pages = 0xFFFFFFFF; + + if (vm->cache_load[0].n_pages >= start_vpn && + vm->cache_load[0].n_pages <= end_vpn) + vm->cache_load[0].n_pages = 0xFFFFFFFF; + + if (vm->cache_load[1].n_pages >= start_vpn && + vm->cache_load[1].n_pages <= end_vpn) + vm->cache_load[1].n_pages = 0xFFFFFFFF; + + if (vm->cache_store.n_pages >= start_vpn && + vm->cache_store.n_pages <= end_vpn) + vm->cache_store.n_pages = 0xFFFFFFFF; +} + /* Pre-verify the root page table to minimize page table access during * translation time. */ diff --git a/riscv.h b/riscv.h index 0563f8f..29ae6c7 100644 --- a/riscv.h +++ b/riscv.h @@ -190,3 +190,6 @@ void vm_error_report(const hart_t *vm); /* Invalidate all MMU translation caches (fetch, load, store) */ void mmu_invalidate(hart_t *vm); + +/* Invalidate MMU caches for a specific virtual address range */ +void mmu_invalidate_range(hart_t *vm, uint32_t start_addr, uint32_t size);