Permalink
Browse files

Sample load/store misses by the PC of the instruction.

Change the sampling frequency through the cache_miss_sample_parameter
parameter in the system configuration. Set it to 0 to disable the stat
altogether. Then set the sample_misses parameter in the configuration of
the specific cache of interest to true. Only the caches with
sample_misses true will have the stat appear in the final stat printout.

TESTED=integration

Change-Id: I21b197fbb0d69f14f8f6faff274183812167e9e5
  • Loading branch information...
xyzsam committed May 14, 2016
1 parent f79cacb commit 52f8b59acbb72dae5a9b4400efbb4d439a71924f
View
@@ -607,5 +607,40 @@ def setUp(self):
def runTest(self):
self.runAndValidate()
+class MissesSampleTest(XIOSimTest):
+ ''' End-to-end test for sampled cache miss PCs. '''
+ def setDriverParams(self):
+ bmk_cfg = self.writeTestBmkConfig("misses")
+ self.xio.AddBmks(bmk_cfg)
+
+ repl = {
+ "system_cfg.cache_miss_sample_parameter" : "100",
+ "core_cfg.exec_cfg.dcache_cfg.sample_misses": "true",
+ "uncore_cfg.llccache_cfg.sample_misses": "true",
+ }
+ test_cfg = self.writeTestConfig(os.path.join(self.xio.GetTreeDir(),
+ "xiosim/config", "H.cfg"),
+ repl)
+ self.xio.AddConfigFile(test_cfg)
+ self.xio.AddPinOptions()
+
+ def setUp(self):
+ super(MissesSampleTest, self).setUp()
+ # With 10,000 iterations, each of which should cause a miss, and a
+ # sampling parameter of 100, we should capture 10,000/100 = 100 misses.
+ # The LLC is included because there is a separate code path for
+ # registering the stat for the LLC, and the number of misses are just
+ # based on observations from a few simulation runs.
+ self.expected_vals.append((xs.PerfStatRE("c0.DL1.load_misses"), 10000))
+ if self.xio.TARGET_ARCH == "k8":
+ self.expected_vals.append((xs.PerfDistStatRE("c0.DL1.load_miss_pcs[0x400523]"), 100))
+ self.expected_vals.append((xs.PerfDistStatRE("LLC.load_miss_pcs[0x400523]"), 75))
+ else:
+ self.expected_vals.append((xs.PerfDistStatRE("c0.DL1.load_miss_pcs[0x804833b]"), 100))
+ self.expected_vals.append((xs.PerfDistStatRE("LLC.load_miss_pcs[0x804833b]"), 75))
+
+ def runTest(self):
+ self.runAndValidate()
+
if __name__ == "__main__":
unittest.main()
View
@@ -29,6 +29,8 @@ piii/loop: loop.cpp
g++ -m32 -O0 -static -o $@ $<
piii/rdtsc: rdtsc.cpp
g++ -m32 -O1 -static -falign-functions=16 -o $@ $<
+piii/misses: misses.cpp
+ g++ -m32 -O1 -static -o $@ $<
k8/fib: fib.cpp
g++ -std=c++1y -static -O0 -o $@ $<
@@ -58,3 +60,5 @@ k8/rdtsc: rdtsc.cpp
g++ -O1 -static -falign-functions=16 -o $@ $<
k8/atomics: atomics.cpp
g++ -std=c++1y -O1 -static -falign-loops=64 -o $@ $<
+k8/misses: misses.cpp
+ g++ -O1 -static -o $@ $<
View
Binary file not shown.
View
@@ -0,0 +1,34 @@
+// Repeatedly store/load and flush from a set of memory addresses.
+//
+// The load PCs should appear in the load_miss_pcs and store_miss_pcs stat.
+
+#include <stdlib.h>
+#include <stdio.h>
+
+const int ITER = 10000;
+const int LOG_CACHELINE_SZ = 6;
+// A buffer of this many intergers must be at least twice the size of the L1 dcache.
+const int BUF_SZ = 1024 * 1024;
+
+extern "C" void xiosim_roi_begin() __attribute__ ((noinline));
+extern "C" void xiosim_roi_end() __attribute__ ((noinline));
+
+void xiosim_roi_begin() { __asm__ __volatile__ ("":::"memory"); }
+void xiosim_roi_end() { __asm__ __volatile__ ("":::"memory"); }
+
+int main(int args, char** argv) {
+ int val = 0;
+ int* src_buf = (int*)malloc(BUF_SZ * sizeof(int));
+ // Pseuorandom with fixed seed.
+ srand(0);
+
+ xiosim_roi_begin();
+ for (int i = 0; i < ITER; i++) {
+ int rand_idx = (rand() % (BUF_SZ >> LOG_CACHELINE_SZ)) << LOG_CACHELINE_SZ;
+ val = src_buf[rand_idx];
+ }
+ xiosim_roi_end();
+
+ printf("Last value: %d\n", val);
+ return 0;
+}
View
Binary file not shown.
View
@@ -9,14 +9,15 @@
# Global settings about the system and the simulation.
system_cfg {
- seed = 1 # Random number generator seed
- num_cores = 1 # Number of cores in the system.
- heartbeat_interval = 10000 # Print out simulator heartbeat every x cycles.
- ztrace_file_prefix = "ztrace" # Zesto trace filename prefix.
- simulate_power = false # Simulate power.
- power_rtp_interval = 0 # uncore cycles between power computations.
- power_rtp_file = "" # Runtime power file.
- output_redir = "sim.out" # Redirect simulator output.
+ seed = 1 # Random number generator seed
+ num_cores = 1 # Number of cores in the system.
+ heartbeat_interval = 10000 # Print out simulator heartbeat every x cycles.
+ ztrace_file_prefix = "ztrace" # Zesto trace filename prefix.
+ simulate_power = false # Simulate power.
+ power_rtp_interval = 0 # uncore cycles between power computations.
+ cache_miss_sample_parameter = 0 # Interval between sampling cache misses.
+ power_rtp_file = "" # Runtime power file.
+ output_redir = "sim.out" # Redirect simulator output.
dvfs_cfg {
# DVFS controller configuration.
@@ -73,6 +74,8 @@ core_cfg {
# Cache coherency controller configuration.
coherency_controller = "none"
magic_hit_rate = -1
+ # Enable cache miss sampling.
+ sample_misses = false
iprefetch_cfg inst_pf {
config = {"nextline"} # 1st-level icache prefetcher configuration
@@ -195,6 +198,7 @@ core_cfg {
magic_hit_rate = -1
mshr_cmd = "RWPB"
coherency_controller = "const:75"
+ sample_misses = false
dtlb_cfg dtlb {
config = "DTLB:16:4:1:2:L:8"
@@ -230,6 +234,7 @@ core_cfg {
config = "none"
mshr_cmd = "RPWB"
coherency_controller = "none"
+ sample_misses = false
}
# RingCache settings.
@@ -346,6 +351,7 @@ uncore_cfg {
magic_hit_rate = -1
mshr_cmd = "RPWB" # MSHR configuration.
clock = 800 # Cache clock frequency (MHz).
+ sample_misses = false
llcprefetch_cfg llc_pf {
config = {"IP:256:12:13:6 stream:12:4"} # last-level cache prefetcher configuration
View
@@ -9,14 +9,15 @@
# Global settings about the system and the simulation.
system_cfg {
- seed = 1 # Random number generator seed
- num_cores = 1 # Number of cores in the system.
- heartbeat_interval = 10000 # Print out simulator heartbeat every x cycles.
- ztrace_file_prefix = "ztrace" # Zesto trace filename prefix.
- simulate_power = false # Simulate power.
- power_rtp_interval = 0 # uncore cycles between power computations.
- power_rtp_file = "" # Runtime power file.
- output_redir = "sim.out" # Redirect simulator output.
+ seed = 1 # Random number generator seed
+ num_cores = 1 # Number of cores in the system.
+ heartbeat_interval = 10000 # Print out simulator heartbeat every x cycles.
+ ztrace_file_prefix = "ztrace" # Zesto trace filename prefix.
+ simulate_power = false # Simulate power.
+ power_rtp_interval = 0 # uncore cycles between power computations.
+ cache_miss_sample_parameter = 0 # Interval between sampling cache misses.
+ power_rtp_file = "" # Runtime power file.
+ output_redir = "sim.out" # Redirect simulator output.
dvfs_cfg {
# DVFS controller configuration.
@@ -72,6 +73,8 @@ core_cfg {
config = "IL1:64:8:64:4:64:2:C:8"
# Cache coherency controller configuration.
coherency_controller = "none"
+ # Enable cache miss sampling.
+ sample_misses = false
iprefetch_cfg inst_pf {
config = {"nextline"} # 1st-level icache prefetcher configuration
@@ -197,6 +200,7 @@ core_cfg {
config = "DL1:64:8:64:8:64:2:C:W:B:16:8:C"
mshr_cmd = "RWPB"
coherency_controller = "none"
+ sample_misses = false
dtlb_cfg dtlb {
config = "DTLB:256:4:1:2:L:8"
@@ -232,6 +236,7 @@ core_cfg {
config = "DL2:512:8:64:8:64:6:C:W:B:16:8:C"
mshr_cmd = "RPWB"
coherency_controller = "const:75"
+ sample_misses = false
l2prefetch_cfg l2_pf {
config = {"IP:256:12:13:6", "nextline"}
@@ -364,6 +369,7 @@ uncore_cfg {
coherency_controller = "const:75"
mshr_cmd = "RPWB" # MSHR configuration.
clock = 1600 # Cache clock frequency (MHz).
+ sample_misses = false
llcprefetch_cfg llc_pf {
config = {"IP:256:12:13:6 stream:12:4"} # last-level cache prefetcher configuration
View
@@ -9,14 +9,15 @@
# Global settings about the system and the simulation.
system_cfg {
- seed = 1 # Random number generator seed
- num_cores = 1 # Number of cores in the system.
- heartbeat_interval = 10000 # Print out simulator heartbeat every x cycles.
- ztrace_file_prefix = "ztrace" # Zesto trace filename prefix.
- simulate_power = false # Simulate power.
- power_rtp_interval = 0 # uncore cycles between power computations.
- power_rtp_file = "" # Runtime power file.
- output_redir = "sim.out" # Redirect simulator output.
+ seed = 1 # Random number generator seed
+ num_cores = 1 # Number of cores in the system.
+ heartbeat_interval = 10000 # Print out simulator heartbeat every x cycles.
+ ztrace_file_prefix = "ztrace" # Zesto trace filename prefix.
+ simulate_power = false # Simulate power.
+ power_rtp_interval = 0 # uncore cycles between power computations.
+ cache_miss_sample_parameter = 0 # Interval between sampling cache misses.
+ power_rtp_file = "" # Runtime power file.
+ output_redir = "sim.out" # Redirect simulator output.
dvfs_cfg {
# DVFS controller configuration.
@@ -72,6 +73,8 @@ core_cfg {
config = "IL1:128:4:64:4:64:2:C:8"
# Cache coherency controller configuration.
coherency_controller = "none"
+ # Enable cache miss sampling.
+ sample_misses = false
iprefetch_cfg inst_pf {
config = {"nextline"} # 1st-level icache prefetcher configuration
@@ -195,6 +198,7 @@ core_cfg {
config = "DL1:64:8:64:8:64:2:C:W:B:16:8:C"
mshr_cmd = "RWPB"
coherency_controller = "none"
+ sample_misses = false
dtlb_cfg dtlb {
config = "DTLB:256:4:1:2:L:8"
@@ -230,6 +234,7 @@ core_cfg {
config = "DL2:512:8:64:8:64:2:C:W:B:16:8:C"
mshr_cmd = "RPWB"
coherency_controller = "const:75"
+ sample_misses = false
l2prefetch_cfg l2_pf {
config = {"IP:256:12:13:6", "nextline"}
@@ -362,6 +367,7 @@ uncore_cfg {
coherency_controller = "const:75"
mshr_cmd = "RPWB" # MSHR configuration.
clock = 1600 # Cache clock frequency (MHz).
+ sample_misses = false
llcprefetch_cfg llc_pf {
config = {"IP:256:12:13:6 stream:12:4"} # last-level cache prefetcher configuration
View
@@ -5,14 +5,15 @@
# Global settings about the system and the simulation.
system_cfg {
- seed = 1 # Random number generator seed
- num_cores = 1 # Number of cores in the system.
- heartbeat_interval = 0 # Print out simulator heartbeat every x cycles.
- ztrace_file_prefix = "ztrace" # Zesto trace filename prefix.
- simulate_power = false # Simulate power.
- power_rtp_interval = 0 # uncore cycles between power computations.
- power_rtp_file = "" # Runtime power file.
- output_redir = NULL # Redirect simulator output.
+ seed = 1 # Random number generator seed
+ num_cores = 1 # Number of cores in the system.
+ heartbeat_interval = 0 # Print out simulator heartbeat every x cycles.
+ ztrace_file_prefix = "ztrace" # Zesto trace filename prefix.
+ simulate_power = false # Simulate power.
+ power_rtp_interval = 0 # uncore cycles between power computations.
+ cache_miss_sample_parameter = 0 # Interval between sampling cache misses.
+ power_rtp_file = "" # Runtime power file.
+ output_redir = NULL # Redirect simulator output.
dvfs_cfg {
# DVFS controller configuration.
@@ -68,6 +69,8 @@ core_cfg {
config = "IL1:64:8:64:4:16:3:L:8"
# Cache coherency controller configuration.
coherency_controller = "none"
+ # Enable cache miss sampling.
+ sample_misses = false
iprefetch_cfg inst_pf {
config = {"nextline"} # 1st-level icache prefetcher configuration
@@ -191,6 +194,7 @@ core_cfg {
config = "DL1:64:8:64:8:64:2:L:W:T:8:C"
mshr_cmd = "RWPB"
coherency_controller = "none"
+ sample_misses = false
dtlb_cfg dtlb {
config = "DTLB:4:4:1:2:L:4"
@@ -226,6 +230,7 @@ core_cfg {
config = "DL2:512:8:64:8:64:9:L:W:T:8:C"
mshr_cmd = "RPWB"
coherency_controller = "none"
+ sample_misses = false
l2prefetch_cfg l2_pf {
config = {"nextline"}
@@ -355,6 +360,7 @@ uncore_cfg {
coherency_controller = "none"
mshr_cmd = "RPWB" # MSHR configuration.
clock = 800 # Cache clock frequency (MHz).
+ sample_misses = false
llcprefetch_cfg llc_pf {
config = {"none"} # last-level cache prefetcher configuration
View
@@ -6,12 +6,13 @@
# Global settings about the system and the simulation.
system_cfg {
- seed = 1 # Random number generator seed
- num_cores = 1 # Number of cores in the system.
- heartbeat_interval = 10000 # Print out simulator heartbeat every x cycles.
- ztrace_file_prefix = "ztrace" # Zesto trace filename prefix.
- simulate_power = false # Simulate power.
- output_redir = "sim.out" # Redirect simulator output.
+ seed = 1 # Random number generator seed
+ num_cores = 1 # Number of cores in the system.
+ heartbeat_interval = 10000 # Print out simulator heartbeat every x cycles.
+ cache_miss_sample_parameter = 0 # Interval between sampling cache misses.
+ ztrace_file_prefix = "ztrace" # Zesto trace filename prefix.
+ simulate_power = false # Simulate power.
+ output_redir = "sim.out" # Redirect simulator output.
# OS scheduler and core allocator.
scheduler_cfg {
@@ -52,6 +53,8 @@ core_cfg {
mshr_cmd = "RWPB"
coherency_controller = "none"
magic_hit_rate = -1.0
+ # Enable cache miss sampling
+ sample_misses = false
dtlb_cfg dtlb {
config = "DTLB:16:4:1:2:L:8"
@@ -83,6 +86,7 @@ core_cfg {
l2cache_cfg L2 {
config = "none"
coherency_controller = "none"
+ sample_misses = false
l2prefetch_cfg l2_pf {
config = {"none"}
@@ -101,6 +105,7 @@ uncore_cfg {
mshr_cmd = "RPWB" # MSHR configuration.
clock = 800 # Cache clock frequency (MHz).
magic_hit_rate = -1.0
+ sample_misses = false
llcprefetch_cfg llc_pf {
config = {"IP:256:12:13:6 stream:12:4"} # last-level cache prefetcher configuration
View
@@ -122,6 +122,11 @@ struct core_knobs_t {
const char* DTLB_controller_opt_str;
const char* DTLB2_controller_opt_str;
+ /* Enable/disable sampling of cache misses. */
+ bool IL1_sample_misses;
+ bool DL1_sample_misses;
+ bool DL2_sample_misses;
+
bool DL1_rep_req;
} memory;
@@ -138,6 +143,7 @@ struct uncore_knobs_t {
const char* LLC_opt_str;
const char* LLC_MSHR_cmd;
float LLC_magic_hit_rate;
+ bool LLC_sample_misses;
double LLC_speed;
const char* LLC_controller_str;
@@ -196,6 +202,9 @@ struct system_knobs_t {
const char* dvfs_opt_str;
int dvfs_interval;
+ /* Frequency at which to sample the PC of a cache miss. */
+ unsigned long cache_miss_sample_parameter;
+
/* Prefix for profiling result files.
* Filenames are prefix.<core>.<profile_id>. */
const char* profiling_file_prefix;
Oops, something went wrong.

0 comments on commit 52f8b59

Please sign in to comment.