diff --git a/main.c b/main.c index 97968c0..7efb8c0 100644 --- a/main.c +++ b/main.c @@ -8,6 +8,9 @@ #include #include #include +#ifdef MMU_CACHE_STATS +#include +#endif #include "device.h" #include "mini-gdbstub/include/gdbstub.h" @@ -395,11 +398,11 @@ static inline sbi_ret_t handle_sbi_ecall_RFENCE(hart_t *hart, int32_t fid) hart_mask_base = (uint64_t) hart->x_regs[RV_R_A1]; if (hart_mask_base == 0xFFFFFFFFFFFFFFFF) { for (uint32_t i = 0; i < hart->vm->n_hart; i++) { - hart->vm->hart[i]->cache_fetch.n_pages = 0xFFFFFFFF; + mmu_invalidate(hart->vm->hart[i]); } } else { for (int i = hart_mask_base; hart_mask; hart_mask >>= 1, i++) { - hart->vm->hart[i]->cache_fetch.n_pages = 0xFFFFFFFF; + mmu_invalidate(hart->vm->hart[i]); } } return (sbi_ret_t){SBI_SUCCESS, 0}; @@ -796,9 +799,57 @@ static int semu_step(emu_state_t *emu) return 0; } +#ifdef MMU_CACHE_STATS +static void print_mmu_cache_stats(vm_t *vm) +{ + fprintf(stderr, "\n=== MMU Cache Statistics ===\n"); + for (uint32_t i = 0; i < vm->n_hart; i++) { + hart_t *hart = vm->hart[i]; + uint64_t fetch_total = + hart->cache_fetch.hits + hart->cache_fetch.misses; + + /* Combine 2-way load cache statistics */ + uint64_t load_hits = + hart->cache_load[0].hits + hart->cache_load[1].hits; + uint64_t load_misses = + hart->cache_load[0].misses + hart->cache_load[1].misses; + uint64_t load_total = load_hits + load_misses; + + uint64_t store_total = + hart->cache_store.hits + hart->cache_store.misses; + + fprintf(stderr, "\nHart %u:\n", i); + fprintf(stderr, " Fetch: %12llu hits, %12llu misses", + hart->cache_fetch.hits, hart->cache_fetch.misses); + if (fetch_total > 0) + fprintf(stderr, " (%.2f%% hit rate)", + 100.0 * hart->cache_fetch.hits / fetch_total); + fprintf(stderr, "\n"); + + fprintf(stderr, " Load: %12llu hits, %12llu misses (2-way)", + load_hits, load_misses); + if (load_total > 0) + fprintf(stderr, " (%.2f%% hit rate)", + 100.0 * load_hits / load_total); + fprintf(stderr, "\n"); + + fprintf(stderr, " Store: %12llu hits, %12llu misses", + hart->cache_store.hits, hart->cache_store.misses); + if (store_total > 0) + fprintf(stderr, " (%.2f%% hit rate)", + 100.0 * hart->cache_store.hits / store_total); + fprintf(stderr, "\n"); + } +} +#endif + static int semu_run(emu_state_t *emu) { int ret; +#ifdef MMU_CACHE_STATS + struct timeval start_time, current_time; + gettimeofday(&start_time, NULL); +#endif /* Emulate */ while (!emu->stopped) { @@ -833,6 +884,23 @@ static int semu_run(emu_state_t *emu) ret = semu_step(emu); if (ret) return ret; +#ifdef MMU_CACHE_STATS + /* Exit after running for 15 seconds to collect statistics */ + gettimeofday(¤t_time, NULL); + long elapsed_sec = current_time.tv_sec - start_time.tv_sec; + long elapsed_usec = current_time.tv_usec - start_time.tv_usec; + if (elapsed_usec < 0) { + elapsed_sec--; + elapsed_usec += 1000000; + } + long elapsed = elapsed_sec + (elapsed_usec > 0 ? 1 : 0); + if (elapsed >= 15) { + fprintf(stderr, + "\n[MMU_CACHE_STATS] Reached 15 second time limit, " + "exiting...\n"); + return 0; + } +#endif } } @@ -964,7 +1032,13 @@ int main(int argc, char **argv) return ret; if (emu.debug) - return semu_run_debug(&emu); + ret = semu_run_debug(&emu); + else + ret = semu_run(&emu); + +#ifdef MMU_CACHE_STATS + print_mmu_cache_stats(&emu.vm); +#endif - return semu_run(&emu); + return ret; } diff --git a/riscv.c b/riscv.c index bd92f1f..2baced3 100644 --- a/riscv.c +++ b/riscv.c @@ -5,6 +5,19 @@ #include "riscv.h" #include "riscv_private.h" +#if !defined(__GNUC__) && !defined(__clang__) +/* Portable parity implementation for non-GCC/Clang compilers */ +static inline unsigned int __builtin_parity(unsigned int x) +{ + x ^= x >> 16; + x ^= x >> 8; + x ^= x >> 4; + x ^= x >> 2; + x ^= x >> 1; + return x & 1; +} +#endif + /* Return the string representation of an error code identifier */ static const char *vm_error_str(vm_error_t err) { @@ -169,9 +182,12 @@ static inline uint32_t read_rs2(const hart_t *vm, uint32_t insn) /* virtual addressing */ -static void mmu_invalidate(hart_t *vm) +void mmu_invalidate(hart_t *vm) { vm->cache_fetch.n_pages = 0xFFFFFFFF; + vm->cache_load[0].n_pages = 0xFFFFFFFF; + vm->cache_load[1].n_pages = 0xFFFFFFFF; + vm->cache_store.n_pages = 0xFFFFFFFF; } /* Pre-verify the root page table to minimize page table access during @@ -284,6 +300,9 @@ static void mmu_fetch(hart_t *vm, uint32_t addr, uint32_t *value) { uint32_t vpn = addr >> RV_PAGE_SHIFT; if (unlikely(vpn != vm->cache_fetch.n_pages)) { +#ifdef MMU_CACHE_STATS + vm->cache_fetch.misses++; +#endif mmu_translate(vm, &addr, (1 << 3), (1 << 6), false, RV_EXC_FETCH_FAULT, RV_EXC_FETCH_PFAULT); if (vm->error) @@ -295,6 +314,11 @@ static void mmu_fetch(hart_t *vm, uint32_t addr, uint32_t *value) vm->cache_fetch.n_pages = vpn; vm->cache_fetch.page_addr = page_addr; } +#ifdef MMU_CACHE_STATS + else { + vm->cache_fetch.hits++; + } +#endif *value = vm->cache_fetch.page_addr[(addr >> 2) & MASK(RV_PAGE_SHIFT - 2)]; } @@ -304,17 +328,41 @@ static void mmu_load(hart_t *vm, uint32_t *value, bool reserved) { - mmu_translate(vm, &addr, (1 << 1) | (vm->sstatus_mxr ? (1 << 3) : 0), - (1 << 6), vm->sstatus_sum && vm->s_mode, RV_EXC_LOAD_FAULT, - RV_EXC_LOAD_PFAULT); - if (vm->error) - return; - vm->mem_load(vm, addr, width, value); + uint32_t vpn = addr >> RV_PAGE_SHIFT; + uint32_t phys_addr; + /* 2-entry direct-mapped cache: use parity hash to select entry */ + uint32_t index = __builtin_parity(vpn) & 0x1; + + if (unlikely(vpn != vm->cache_load[index].n_pages)) { + /* Cache miss: do full translation */ +#ifdef MMU_CACHE_STATS + vm->cache_load[index].misses++; +#endif + phys_addr = addr; + mmu_translate(vm, &phys_addr, + (1 << 1) | (vm->sstatus_mxr ? (1 << 3) : 0), (1 << 6), + vm->sstatus_sum && vm->s_mode, RV_EXC_LOAD_FAULT, + RV_EXC_LOAD_PFAULT); + if (vm->error) + return; + /* Cache physical page number (not a pointer) */ + vm->cache_load[index].n_pages = vpn; + vm->cache_load[index].phys_ppn = phys_addr >> RV_PAGE_SHIFT; + } else { + /* Cache hit: reconstruct physical address from cached PPN */ +#ifdef MMU_CACHE_STATS + vm->cache_load[index].hits++; +#endif + phys_addr = (vm->cache_load[index].phys_ppn << RV_PAGE_SHIFT) | + (addr & MASK(RV_PAGE_SHIFT)); + } + + vm->mem_load(vm, phys_addr, width, value); if (vm->error) return; if (unlikely(reserved)) - vm->lr_reservation = addr | 1; + vm->lr_reservation = phys_addr | 1; } static bool mmu_store(hart_t *vm, @@ -323,23 +371,43 @@ static bool mmu_store(hart_t *vm, uint32_t value, bool cond) { - mmu_translate(vm, &addr, (1 << 2), (1 << 6) | (1 << 7), - vm->sstatus_sum && vm->s_mode, RV_EXC_STORE_FAULT, - RV_EXC_STORE_PFAULT); - if (vm->error) - return false; + uint32_t vpn = addr >> RV_PAGE_SHIFT; + uint32_t phys_addr; + + if (unlikely(vpn != vm->cache_store.n_pages)) { + /* Cache miss: do full translation */ +#ifdef MMU_CACHE_STATS + vm->cache_store.misses++; +#endif + phys_addr = addr; + mmu_translate(vm, &phys_addr, (1 << 2), (1 << 6) | (1 << 7), + vm->sstatus_sum && vm->s_mode, RV_EXC_STORE_FAULT, + RV_EXC_STORE_PFAULT); + if (vm->error) + return false; + /* Cache physical page number (not a pointer) */ + vm->cache_store.n_pages = vpn; + vm->cache_store.phys_ppn = phys_addr >> RV_PAGE_SHIFT; + } else { + /* Cache hit: reconstruct physical address from cached PPN */ +#ifdef MMU_CACHE_STATS + vm->cache_store.hits++; +#endif + phys_addr = (vm->cache_store.phys_ppn << RV_PAGE_SHIFT) | + (addr & MASK(RV_PAGE_SHIFT)); + } if (unlikely(cond)) { - if ((vm->lr_reservation != (addr | 1))) + if ((vm->lr_reservation != (phys_addr | 1))) return false; } for (uint32_t i = 0; i < vm->vm->n_hart; i++) { if (unlikely(vm->vm->hart[i]->lr_reservation & 1) && - (vm->vm->hart[i]->lr_reservation & ~3) == (addr & ~3)) + (vm->vm->hart[i]->lr_reservation & ~3) == (phys_addr & ~3)) vm->vm->hart[i]->lr_reservation = 0; } - vm->mem_store(vm, addr, width, value); + vm->mem_store(vm, phys_addr, width, value); return true; } @@ -513,13 +581,19 @@ static void csr_write(hart_t *vm, uint16_t addr, uint32_t value) } switch (addr) { - case RV_CSR_SSTATUS: + case RV_CSR_SSTATUS: { + bool old_sum = vm->sstatus_sum; + bool old_mxr = vm->sstatus_mxr; vm->sstatus_sie = (value & (1 << (1))) != 0; vm->sstatus_spie = (value & (1 << (5))) != 0; vm->sstatus_spp = (value & (1 << (8))) != 0; vm->sstatus_sum = (value & (1 << (18))) != 0; vm->sstatus_mxr = (value & (1 << (19))) != 0; + /* Invalidate load/store TLB if SUM or MXR changed */ + if (vm->sstatus_sum != old_sum || vm->sstatus_mxr != old_mxr) + mmu_invalidate(vm); break; + } case RV_CSR_SIE: value &= SIE_MASK; vm->sie = value; diff --git a/riscv.h b/riscv.h index 62c9cf8..e07a087 100644 --- a/riscv.h +++ b/riscv.h @@ -31,10 +31,25 @@ typedef enum { ERR_USER, /**< user-specific error */ } vm_error_t; +/* Instruction fetch cache: stores host memory pointers for direct access */ typedef struct { uint32_t n_pages; uint32_t *page_addr; -} mmu_cache_t; +#ifdef MMU_CACHE_STATS + uint64_t hits; + uint64_t misses; +#endif +} mmu_fetch_cache_t; + +/* Load/store cache: stores physical page numbers (not pointers) */ +typedef struct { + uint32_t n_pages; + uint32_t phys_ppn; /* Physical page number */ +#ifdef MMU_CACHE_STATS + uint64_t hits; + uint64_t misses; +#endif +} mmu_addr_cache_t; /* To use the emulator, start by initializing a hart_t object with zero values, * invoke vm_init(), and set the required environment-supplied callbacks. You @@ -85,7 +100,10 @@ struct __hart_internal { */ uint32_t exc_cause, exc_val; - mmu_cache_t cache_fetch; + mmu_fetch_cache_t cache_fetch; + /* 2-entry direct-mapped with hash-based indexing */ + mmu_addr_cache_t cache_load[2]; + mmu_addr_cache_t cache_store; /* Supervisor state */ bool s_mode; @@ -160,3 +178,6 @@ void hart_trap(hart_t *vm); /* Return a readable description for a RISC-V exception cause */ void vm_error_report(const hart_t *vm); + +/* Invalidate all MMU translation caches (fetch, load, store) */ +void mmu_invalidate(hart_t *vm);