Skip to content

Commit

Permalink
Add paravirt support for __vdso_gettimeofday().
Browse files Browse the repository at this point in the history
Fixed two bugs in tests/time that cancelled each other out:
- loop should take 1 cycle, not 2
- though it was on split byteQ lines, and did take 2,
depending on surrounding code.

Removed the manual envp walk to get the VDSO base.

TESTED=integration

Change-Id: Ia2d66f3be222b1f11f5339cfc093c02a1110fd73
  • Loading branch information
s-kanev committed Jan 27, 2016
1 parent c6d6c45 commit cf50b19
Show file tree
Hide file tree
Showing 15 changed files with 554 additions and 163 deletions.
34 changes: 31 additions & 3 deletions scripts/run_tests.py
Expand Up @@ -54,7 +54,7 @@ def setUp(self):

def tearDown(self):
if self.clean_run_dir:
shutil.rmtree(self.xio.GetRunDir())
shutil.rmtree(self.run_dir)
if self.background_bmk:
self.background_bmk.kill()

Expand Down Expand Up @@ -504,6 +504,7 @@ def setDriverParams(self):
self.xio.AddBmks(bmk_cfg)
self.xio.AddPinOptions()
self.xio.AddPintoolOptions(num_cores=1)
self.xio.AddROIOptions()
self.xio.AddZestoOptions(os.path.join(self.xio.GetTreeDir(),
"xiosim/config", "N.cfg"))

Expand All @@ -512,9 +513,36 @@ def setUp(self):
self.expected_vals.append((xs.PerfStatRE("all_insn"), 3000000.0))

# Set up expected output from the simulated program.
# 2M cycles / 3.2GHz = 625us.
# 1M cycles / 3.2GHz = 317us.
elapsed_re = "Elapsed: (%s) sec" % xs.DECIMAL_RE
self.bmk_expected_vals.append((elapsed_re, 0.000317))

def runTest(self):
self.runAndValidate()

class TimeVDSOTest(XIOSimTest):
''' End-to-end test for __vdso_gettimeofday(). '''
def setDriverParams(self):
bmk_cfg = self.writeTestBmkConfig("time")
self.xio.AddBmks(bmk_cfg)
self.xio.AddPinOptions()
self.xio.AddPintoolOptions(num_cores=1)
self.xio.AddROIOptions()
self.xio.AddTraceFile("trace.out")
self.xio.AddZestoOptions(os.path.join(self.xio.GetTreeDir(),
"xiosim/config", "N.cfg"))

def setUp(self):
super(TimeVDSOTest, self).setUp()
if self.xio.TARGET_ARCH == "piii":
self.tearDown()
self.skipTest("VDSO not used for gettimeofday() on i686")
self.expected_vals.append((xs.PerfStatRE("all_insn"), 3000000.0))

# Set up expected output from the simulated program.
# 1M cycles / 3.2GHz = 317us.
elapsed_re = "Elapsed: (%s) sec" % xs.DECIMAL_RE
self.bmk_expected_vals.append((elapsed_re, 0.000625))
self.bmk_expected_vals.append((elapsed_re, 0.000317))

def runTest(self):
self.runAndValidate()
Expand Down
3 changes: 3 additions & 0 deletions scripts/xiosim_driver.py
Expand Up @@ -78,6 +78,9 @@ def AddReplaceOptions(self, func):
def AddTraceFile(self, file):
self.cmd += "-trace %s " % file

def DisableSpeculation(self):
self.cmd += "-speculation false "

def AddZestoOptions(self, cfg):
if self.bridge_dirs:
self.cmd += "-buffer_bridge_dirs %s " % self.bridge_dirs
Expand Down
6 changes: 4 additions & 2 deletions tests/Makefile
Expand Up @@ -24,7 +24,7 @@ piii/prefetch: prefetch.cpp
piii/ignore: ignore.cpp
g++ -m32 -std=c++1y -static -O0 -o $@ $<
piii/time: time.cpp
g++ -m32 -O1 -static -o $@ $<
g++ -m32 -O1 -falign-functions=64 -static -o $@ $<
piii/loop: loop.cpp
g++ -m32 -O0 -static -o $@ $<
piii/rdtsc: rdtsc.cpp
Expand All @@ -49,7 +49,9 @@ k8/prefetch: prefetch.cpp
k8/ignore: ignore.cpp
g++ -std=c++1y -static -O0 -o $@ $<
k8/time: time.cpp
g++ -O1 -static -o $@ $<
g++ -O1 -falign-functions=64 -static -o $@ $<
k8/time_vdso: time.cpp
g++ -O1 -falign-functions=64 -o $@ $<
k8/loop: loop.cpp
g++ -O0 -static -o $@ $<
k8/rdtsc: rdtsc.cpp
Expand Down
Binary file modified tests/k8/time
Binary file not shown.
Binary file added tests/k8/time_vdso
Binary file not shown.
Binary file modified tests/piii/time
Binary file not shown.
34 changes: 22 additions & 12 deletions tests/time.cpp
Expand Up @@ -2,14 +2,19 @@
*
* Simulator should return simulated time instead of host time.
*
* Compile: g++ -O1 -m32 -static -o time time.c
*/

#include <sys/time.h>
#include <sys/times.h>
#include <stdio.h>
#include <unistd.h>

extern "C" void xiosim_roi_begin() __attribute__ ((noinline));
extern "C" void xiosim_roi_end() __attribute__ ((noinline));

void xiosim_roi_begin() { __asm__ __volatile__ ("":::"memory"); }
void xiosim_roi_end() { __asm__ __volatile__ ("":::"memory"); }

const int ITER = 1000000;

double times_test() {
Expand All @@ -30,34 +35,39 @@ double times_test() {
return (end.tms_utime - start.tms_utime);
}

unsigned loop() {
unsigned counter = 0;
__asm__ __volatile ("1: addl $1, %0;"
"cmpl %2, %1;"
"jb 1b;"
: "=a"(counter)
: "0"(counter), "i"(ITER)
: "memory");
return counter;
}

double gettimeofday_test() {
struct timeval start, end;
gettimeofday(&start, NULL);
printf("Start time of day: %d s %d us\n", start.tv_sec, start.tv_usec);

unsigned int counter = 0;
__asm__ __volatile ("loop: addl $1, %0;"
"cmpl %1, %0;"
"jb loop;"
: "=a"(counter)
: "r"(ITER)
: "memory");
unsigned counter = loop();

gettimeofday(&end, NULL);

/* 1M iterations of a 3 instruction loop -> 3M instructions.
* With pipelining we get 2 cycles per iteration for a total of 2M cycles.
* With pipelining we get 1 cycles per iteration for a total of 1M cycles.
* Default NHM config is 3.2GHz, so we should expect a time difference
* of around 2M / 3.2GHz = 625us.
* of around 1M / 3.2GHz = 317us.
*/
printf("Counter value = %d\n", counter);
printf("End time of day: %d s %d us\n", end.tv_sec, end.tv_usec);
return (end.tv_sec - start.tv_sec) + 1.0*(end.tv_usec - start.tv_usec)/(1e6);
}

int main() {
// For now, just test gettimeofday() because we haven't completed times().
xiosim_roi_begin();
double elapsed = gettimeofday_test();
xiosim_roi_end();
printf("Elapsed: %3.8f sec\n", elapsed);
return 0;
}
4 changes: 4 additions & 0 deletions xiosim/pintool/BUILD
Expand Up @@ -39,6 +39,8 @@ cc_binary(
"ignore_ins.h",
"ildjit.cpp",
"ildjit.h",
"paravirt.cpp",
"paravirt.h",
"replace_function.cpp",
"replace_function.h",
"roi.cpp",
Expand All @@ -51,6 +53,8 @@ cc_binary(
"syscall_handling.h",
"utils.cpp",
"utils.h",
"vdso.cpp",
"vdso.h",
],
linkopts = [
"-Wl,-Bsymbolic",
Expand Down
9 changes: 8 additions & 1 deletion xiosim/pintool/feeder.h
Expand Up @@ -24,7 +24,6 @@ class handshake_container_t;

extern KNOB<BOOL> KnobILDJIT;
extern KNOB<int> KnobNumCores;
extern KNOB<BOOL> KnobTimingVirtualization;

/* A list of the threads in this feeder. */
extern list<THREADID> thread_list;
Expand All @@ -45,6 +44,9 @@ extern int asid;
/* Xed machine mode state for when we need to encode/decode things. */
extern xed_state_t dstate;

/* Host TSC values for timing virtualization. */
extern tick_t* initial_timestamps;

#define ATOMIC_ITERATE(_list, _it, _lock) \
for (lk_lock(&(_lock), 1), (_it) = (_list).begin(), lk_unlock(&(_lock)); [&] { \
lk_lock(&(_lock), 1); \
Expand Down Expand Up @@ -214,6 +216,11 @@ VOID ScheduleThread(THREADID tid);
* Check out base_allocator.h for the API. */
int AllocateCores(std::vector<double> scaling, double serial_runtime);

/* Helper to check if producer thread @tid will grab instruction at @pc.
* If return value is false, we can skip instrumentaion. Can hog execution
* if producers are disabled by producer_sleep. */
BOOL CheckIgnoreConditions(THREADID tid, ADDRINT pc);

/* Insert instrumentation that we didn't add so we can skip ILDJIT compilation even faster. */
VOID doLateILDJITInstrumentation();

Expand Down

0 comments on commit cf50b19

Please sign in to comment.