From cd8bda7a29a2ef15d939890cacab02efb84e0158 Mon Sep 17 00:00:00 2001 From: ptitSeb Date: Thu, 14 Mar 2024 11:01:40 +0100 Subject: [PATCH] Changed, again, RDTSC and Hardware counter, introducing auto calibration when hardware counter is too slow for modern standard (and removed BOX64_RDTSC env. var.) --- docs/USAGE.md | 6 --- src/dynarec/arm64/dynarec_arm64_0f.c | 6 +++ src/dynarec/rv64/dynarec_rv64_0f.c | 6 +++ src/emu/x64run0f.c | 4 ++ src/include/debug.h | 1 + src/main.c | 65 +++++++++++++++++----------- src/tools/rcfile.c | 18 +------- 7 files changed, 58 insertions(+), 48 deletions(-) diff --git a/docs/USAGE.md b/docs/USAGE.md index c12a7618f..5981a2aa9 100644 --- a/docs/USAGE.md +++ b/docs/USAGE.md @@ -114,12 +114,6 @@ What to do when a CC INT3 opcode is encounter in the code being run * 0 : Trigger a TRAP signal if a handler is present * 1 : Just skip silently the opcode -#### BOX64_RDTSC * -Will use time-based emulation for rdtsc, even if hardware counter are available. Tick rate of Hardware counter (like on Arm64) might be too low for accurate RDTSC emulation. - * 0 : Use hardware counter if available (depend on architecture basicaly) - * 1 : Use monotonic timer to emulate rdtsc - * 2 : Check frequency of the hardware time (if present), and use hardware if frequency is 1GHz or better or else use mono-tonique time. - #### BOX64_X11GLX * Force libX11's GLX extension to be present. * 0 : Do not force libX11's GLX extension to be present. diff --git a/src/dynarec/arm64/dynarec_arm64_0f.c b/src/dynarec/arm64/dynarec_arm64_0f.c index 0bb9dffc2..8e8ec00e8 100644 --- a/src/dynarec/arm64/dynarec_arm64_0f.c +++ b/src/dynarec/arm64/dynarec_arm64_0f.c @@ -98,6 +98,9 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } else { MRS_cntvct_el0(x1); } + if(box64_rdtsc_shift) { + LSLx(x1, x1, box64_rdtsc_shift); + } LSRx(xRDX, x1, 32); MOVw_REG(xRAX, x1); // wipe upper part MOVw_REG(xRCX, xZR); // IA32_TSC, 0 for now @@ -499,6 +502,9 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } else { MRS_cntvct_el0(x1); } + if(box64_rdtsc_shift) { + LSLx(x1, x1, box64_rdtsc_shift); + } LSRx(xRDX, x1, 32); MOVw_REG(xRAX, x1); // wipe upper part break; diff --git a/src/dynarec/rv64/dynarec_rv64_0f.c b/src/dynarec/rv64/dynarec_rv64_0f.c index bfe29a2c9..d19ecd4ed 100644 --- a/src/dynarec/rv64/dynarec_rv64_0f.c +++ b/src/dynarec/rv64/dynarec_rv64_0f.c @@ -87,6 +87,9 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } else { CSRRS(x3, xZR, 0xC01); // RDTIME } + if(box64_rdtsc_shift) { + SRLI(x3, x3, box64_rdtsc_shift); + } SRLI(xRDX, x3, 32); AND(xRAX, x3, xMASK); // wipe upper part MV(xRCX, xZR); // IA32_TSC, 0 for now @@ -412,6 +415,9 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } else { CSRRS(x3, xZR, 0xC01); // RDTIME } + if(box64_rdtsc_shift) { + SRLI(x3, x3, box64_rdtsc_shift); + } SRLI(xRDX, x3, 32); AND(xRAX, x3, xMASK); // wipe upper part break; diff --git a/src/emu/x64run0f.c b/src/emu/x64run0f.c index f14b9dd52..e9c2c761d 100644 --- a/src/emu/x64run0f.c +++ b/src/emu/x64run0f.c @@ -107,6 +107,8 @@ uintptr_t Run0F(x64emu_t *emu, rex_t rex, uintptr_t addr, int *step) break; case 0xF9: /* RDTSCP */ tmp64u = ReadTSC(emu); + if(box64_rdtsc_shift) + tmp64u<<=box64_rdtsc_shift; R_RAX = tmp64u & 0xffffffff; R_RDX = tmp64u >> 32; R_RCX = 0; // should be low of IA32_TSC @@ -368,6 +370,8 @@ uintptr_t Run0F(x64emu_t *emu, rex_t rex, uintptr_t addr, int *step) case 0x31: /* RDTSC */ tmp64u = ReadTSC(emu); + if(box64_rdtsc_shift) + tmp64u<<=box64_rdtsc_shift; R_RDX = tmp64u>>32; R_RAX = tmp64u&0xFFFFFFFF; break; diff --git a/src/include/debug.h b/src/include/debug.h index a6660d7d2..422a8f6e1 100644 --- a/src/include/debug.h +++ b/src/include/debug.h @@ -14,6 +14,7 @@ extern int box64_maxcpu; extern int box64_mmap32; extern int box64_ignoreint3; extern int box64_rdtsc; +extern uint8_t box64_rdtsc_shift; #ifdef DYNAREC extern int box64_dynarec_dump; extern int box64_dynarec_trace; diff --git a/src/main.c b/src/main.c index 4d9eb6ee5..38c44df11 100644 --- a/src/main.c +++ b/src/main.c @@ -63,6 +63,7 @@ int box64_mmap32 = 0; #endif int box64_ignoreint3 = 0; int box64_rdtsc = 0; +uint8_t box64_rdtsc_shift = 0; #ifdef DYNAREC int box64_dynarec = 1; int box64_dynarec_dump = 0; @@ -1021,39 +1022,54 @@ void LoadLogEnv() if(box64_ignoreint3) printf_log(LOG_INFO, "Will silently ignore INT3 in the code\n"); } - p = getenv("BOX64_RDTSC"); - if(p) { - if(strlen(p)==1) { - if(p[0]>='0' && p[0]<='0'+2) - box64_rdtsc = p[0]-'0'; - } - if(box64_rdtsc==2) { - #if defined(ARM64) || defined(RV64) - box64_rdtsc = 0; // allow hardxare counter - uint64_t freq = ReadTSCFrequency(NULL); - printf_log(LOG_INFO, "Hardware counter measured at %d Mhz, ", freq/1000); - if(freq>1000000000) { - printf_log(LOG_INFO, "keeping it\n"); - } else { - box64_rdtsc = 1; - printf_log(LOG_INFO, "not using it\n"); - } - #else - box64_rdtsc = 1; - printf_log(LOG_INFO, "Will use time-based emulation for rdtsc, even if hardware counter are available\n"); - #endif - } else if(box64_rdtsc) - printf_log(LOG_INFO, "Will use time-based emulation for rdtsc, even if hardware counter are available\n"); - } + // grab pagesize box64_pagesize = sysconf(_SC_PAGESIZE); if(!box64_pagesize) box64_pagesize = 4096; #ifdef DYNAREC + // grab cpu extensions for dynarec usage GatherDynarecExtensions(); #endif + // grab cpu name int ncpu = getNCpu(); const char* cpuname = getCpuName(); printf_log(LOG_INFO, " PageSize:%zd Running on %s with %d Cores\n", box64_pagesize, cpuname, ncpu); + // grab and calibrate hardware counter + int hardware = 0; + #if defined(ARM64) || defined(RV64) + hardware = 1; + box64_rdtsc = 0; // allow hardxare counter + #else + box64_rdtsc = 1; + printf_log(LOG_INFO, "Will use time-based emulation for rdtsc, even if hardware counter are available\n"); + #endif + uint64_t freq = ReadTSCFrequency(NULL); + if(freq<1000000) { + box64_rdtsc = 1; + if(hardware) printf_log(LOG_INFO, "Hardware counter to slow (%d kHz), not using it\n", freq/1000); + hardware = 0; + freq = ReadTSCFrequency(NULL); + } + uint64_t efreq = freq; + while(efreq<500000000) { // minium 500MHz + ++box64_rdtsc_shift; + efreq = freq<=1000000000LL; + if(ghz) freq/=100000000LL; else freq/=100000; + if(ghz) printf_log(LOG_INFO, "%d.%d GHz", freq/10, freq%10); + if(!ghz & freq>=1000) printf_log(LOG_INFO, "%d MHz", freq/10); + if(!ghz & freq<1000) printf_log(LOG_INFO, "%d.%d MHz", freq/10, freq%10); + if(box64_rdtsc_shift) { + printf_log(LOG_INFO, " emulating "); + ghz = efreq>=1000000000LL; + if(ghz) efreq/=100000000LL; else efreq/=100000; + if(ghz) printf_log(LOG_INFO, "%d.%d GHz", efreq/10, efreq%10); + if(!ghz & efreq>=1000) printf_log(LOG_INFO, "%d MHz", efreq/10); + if(!ghz & efreq<1000) printf_log(LOG_INFO, "%d.%d MHz", efreq/10, efreq%10); + } + printf_log(LOG_INFO, "\n"); } EXPORTDYN @@ -1186,7 +1202,6 @@ void PrintFlags() { printf(" BOX64_ENV1='XXX=yyyy' will add XXX=yyyy env. var. and continue with BOX86_ENV2 ... until var doesn't exist\n"); printf(" BOX64_JITGDB with 1 to launch \"gdb\" when a segfault is trapped, attached to the offending process\n"); printf(" BOX64_MMAP32=1 to use 32bits address space mmap in priority for external mmap as soon a 32bits process are detected (default for Snapdragon build)\n"); - printf(" BOX64_RDTSC to use a monotonic timer for rdtsc even if hardware counter are available (or check if precision is >=1Ghz for 2)\n"); } void PrintHelp() { diff --git a/src/tools/rcfile.c b/src/tools/rcfile.c index 87cc3dd9e..e6c40f2e1 100644 --- a/src/tools/rcfile.c +++ b/src/tools/rcfile.c @@ -81,7 +81,7 @@ ENTRYBOOL(BOX64_SHOWSEGV, box64_showsegv) \ ENTRYBOOL(BOX64_SHOWBT, box64_showbt) \ ENTRYBOOL(BOX64_MMAP32, box64_mmap32) \ ENTRYBOOL(BOX64_IGNOREINT3, box64_ignoreint3) \ -ENTRYINT(BOX64_RDTSC, box64_rdtsc, 0, 2, 2) \ +IGNORE(BOX64_RDTSC) \ ENTRYBOOL(BOX64_X11THREADS, box64_x11threads) \ ENTRYBOOL(BOX64_X11GLX, box64_x11glx) \ ENTRYDSTRING(BOX64_LIBGL, box64_libGL) \ @@ -593,22 +593,6 @@ void ApplyParams(const char* name) my_context->bashpath = strdup(param->bash); printf_log(LOG_INFO, "Applying %s=%s\n", "BOX64_BASH", param->bash); } - if(param->is_box64_rdtsc_present && (box64_rdtsc==2)) { - #if defined(ARM64) || defined(RV64) - box64_rdtsc = 0; // allow hardxware counter - uint64_t freq = ReadTSCFrequency(NULL); - printf_log(LOG_INFO, "Applying RDTSC: Hardware counter measured at %d Mhz, ", freq/1000); - if(freq>1000000000) { - printf_log(LOG_INFO, "keeping it\n"); - } else { - box64_rdtsc = 1; - printf_log(LOG_INFO, "not using it\n"); - } - #else - box64_rdtsc = 1; - printf_log(LOG_INFO, "Applying RDTSC: Will use time-based emulation for rdtsc, even if hardware counter are available\n"); - #endif - } #ifdef HAVE_TRACE int old_x64trace = my_context->x64trace; if(param->is_trace_present) {