|
| 1 | +#include <chrono> |
| 2 | + |
| 3 | +#include "configparser.h" |
| 4 | +#include "cpu/cpu.h" |
| 5 | +#include "cpu/mem.h" |
| 6 | +#include "system/types.h" |
| 7 | +#include "tools/snprintf.h" |
| 8 | +#include "debug/ppcdis.h" |
| 9 | +#include "cpu/cpu_generic/ppc_mmu.h" |
| 10 | +#include "cpu/cpu_generic/ppc_cpu.h" |
| 11 | +#include "system/arch/sysendian.h" |
| 12 | + |
| 13 | +uint32 cs_code[] = { |
| 14 | + 0x3863FFFC, 0x7C861671, 0x41820090, 0x70600002, 0x41E2001C, 0xA0030004, |
| 15 | + 0x3884FFFE, 0x38630002, 0x5486F0BF, 0x7CA50114, 0x41820070, 0x70C60003, |
| 16 | + 0x41820014, 0x7CC903A6, 0x84030004, 0x7CA50114, 0x4200FFF8, 0x5486E13F, |
| 17 | + 0x41820050, 0x80030004, 0x7CC903A6, 0x80C30008, 0x7CA50114, 0x80E3000C, |
| 18 | + 0x7CA53114, 0x85030010, 0x7CA53914, 0x42400028, 0x80030004, 0x7CA54114, |
| 19 | + 0x80C30008, 0x7CA50114, 0x80E3000C, 0x7CA53114, 0x85030010, 0x7CA53914, |
| 20 | + 0x4200FFE0, 0x7CA54114, 0x70800002, 0x41E20010, 0xA0030004, 0x38630002, |
| 21 | + 0x7CA50114, 0x70800001, 0x41E20010, 0x88030004, 0x5400402E, 0x7CA50114, |
| 22 | + 0x7C650194, /* 0x4E800020 */ 0x00005AF0 |
| 23 | +}; |
| 24 | + |
| 25 | +constexpr uint32 test_size = 0x8000; // 0x7FFFFFFC is the max |
| 26 | +constexpr uint32 test_samples = 2000; |
| 27 | +constexpr uint32 test_iterations = 5; |
| 28 | + |
| 29 | +int ppc_bench() { |
| 30 | + gConfig = new ConfigParser(); |
| 31 | + ppc_cpu_init_config(); |
| 32 | + |
| 33 | + if (!ppc_init_physical_memory(64 * 1024 * 1024)) { // 64MB is the minimum |
| 34 | + ht_printf("cannot initialize memory.\n"); |
| 35 | + return 1; |
| 36 | + } |
| 37 | + if (!ppc_cpu_init()) { |
| 38 | + ht_printf("cpu_init failed! Out of memory?\n"); |
| 39 | + return 1; |
| 40 | + } |
| 41 | + |
| 42 | + size_t code_size = sizeof(cs_code) / sizeof(cs_code[0]); |
| 43 | + ht_printf("Loading %llu instructions:\n", code_size); |
| 44 | + |
| 45 | + /* load executable code into RAM at address 0 */ |
| 46 | + for (int i = 0; i < code_size; i++) { |
| 47 | + uint32 instr_code = cs_code[i]; |
| 48 | + uint32 instr_code_be = ppc_word_to_BE(instr_code); |
| 49 | + uint32 instr_addr = i * 4; |
| 50 | + uint32 instr_physical_addr; |
| 51 | + int r = ppc_effective_to_physical(instr_addr, PPC_MMU_WRITE | PPC_MMU_CODE, instr_physical_addr); |
| 52 | + if (r != PPC_MMU_OK) { |
| 53 | + ht_printf("MMU error when mapping instruction address: %d\n", r); |
| 54 | + return 1; |
| 55 | + } |
| 56 | + |
| 57 | + // ppc_write_physical_word gets optimized out, do its equivalent. |
| 58 | + *((uint32*)(gMemory+instr_physical_addr)) = instr_code_be; |
| 59 | + |
| 60 | + PPCDisassembler dis(PPC_MODE_32); |
| 61 | + CPU_ADDR addr; |
| 62 | + addr.addr32.offset = instr_addr; |
| 63 | + const char *instr_disasm = dis.str(dis.decode((byte*)&instr_code_be, 4, addr), 0); |
| 64 | + ht_printf("Instruction %02llu: 0x%08x %s\n", i, instr_code, instr_disasm); |
| 65 | + } |
| 66 | + |
| 67 | + srand(0xCAFEBABE); |
| 68 | + |
| 69 | + ht_printf("Test size: 0x%X\n", test_size); |
| 70 | + ht_printf("First few bytes:\n"); |
| 71 | + bool did_lf = false; |
| 72 | + for (int i = 0; i < test_size; i++) { |
| 73 | + uint8 val = rand() % 256; |
| 74 | + if (i < 64) { |
| 75 | + ht_printf("%02x", val); |
| 76 | + did_lf = false; |
| 77 | + if (i % 32 == 31) { |
| 78 | + ht_printf("\n"); |
| 79 | + did_lf = true; |
| 80 | + } |
| 81 | + } |
| 82 | + |
| 83 | + uint32 addr = 0x1000+i; |
| 84 | + uint32 physical_addr; |
| 85 | + int r = ppc_effective_to_physical(addr, PPC_MMU_WRITE, physical_addr); |
| 86 | + if (r != PPC_MMU_OK) { |
| 87 | + ht_printf("MMU error when mapping data address: %d\n", r); |
| 88 | + return 1; |
| 89 | + } |
| 90 | + |
| 91 | + gMemory[physical_addr] = val; |
| 92 | + } |
| 93 | + if (!did_lf) |
| 94 | + ht_printf("\n"); |
| 95 | + |
| 96 | + // Run once to warm up the cache cache and to get the expected checksum value. |
| 97 | + ppc_cpu_set_pc(0, 0); |
| 98 | + ppc_cpu_set_gpr(0, 3, 0x1000); // buf |
| 99 | + ppc_cpu_set_gpr(0, 4, test_size); // len |
| 100 | + ppc_cpu_set_gpr(0, 5, 0); // sum |
| 101 | + ppc_cpu_run(); |
| 102 | + uint32 checksum = ppc_cpu_get_gpr(0, 3); |
| 103 | + ht_printf("Checksum: 0x%08X\n", checksum); |
| 104 | + |
| 105 | + // Also warm up the clock |
| 106 | + uint64_t overhead = -1; |
| 107 | + for (int i = 0; i < test_samples; i++) { |
| 108 | + auto start_time = std::chrono::steady_clock::now(); |
| 109 | + auto end_time = std::chrono::steady_clock::now(); |
| 110 | + auto time_elapsed = std::chrono::duration_cast<std::chrono::nanoseconds>(end_time - start_time); |
| 111 | + if (time_elapsed.count() < overhead) { |
| 112 | + overhead = time_elapsed.count(); |
| 113 | + } |
| 114 | + } |
| 115 | + ht_printf("Overhead Time: %lld ns\n", overhead); |
| 116 | + |
| 117 | + ht_printf("Running benchmark...\n"); |
| 118 | + for (int i = 0; i < test_iterations; i++) { |
| 119 | + uint64_t best_sample = -1; |
| 120 | + for (int j = 0; j < test_samples; j++) { |
| 121 | + ppc_cpu_set_pc(0, 0); |
| 122 | + ppc_cpu_set_gpr(0, 3, 0x1000); // buf |
| 123 | + ppc_cpu_set_gpr(0, 4, test_size); // len |
| 124 | + ppc_cpu_set_gpr(0, 5, 0); // sum |
| 125 | + |
| 126 | + auto start_time = std::chrono::steady_clock::now(); |
| 127 | + ppc_cpu_run(); |
| 128 | + auto end_time = std::chrono::steady_clock::now(); |
| 129 | + auto time_elapsed = std::chrono::duration_cast<std::chrono::nanoseconds>(end_time - start_time); |
| 130 | + if (time_elapsed.count() < best_sample) |
| 131 | + best_sample = time_elapsed.count(); |
| 132 | + } |
| 133 | + uint32 iter_checksum = ppc_cpu_get_gpr(0, 3); |
| 134 | + if (iter_checksum != checksum) |
| 135 | + ht_printf("Checksum changed in iteration %d: 0x%08X\n", i, iter_checksum); |
| 136 | + best_sample -= overhead; |
| 137 | + ht_printf("(%d) %lld ns, %.4lf MiB/s\n", i+1, best_sample, 1E9 * test_size / (best_sample * 1024 * 1024)); |
| 138 | + } |
| 139 | + |
| 140 | + return 0; |
| 141 | +} |
0 commit comments