Permalink
Switch branches/tags
Nothing to show
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
413 lines (366 sloc) 12.4 KB
##############################################################################################
# This is our "best guess" knob file for a Nehalem-class (45nm) Intel Core i7
# processor. We make no claims as to the accuracy or correctness of these
# settings, there is no support for modeling SMT cores, LLC cache inclusion,
# and a variety of other microarchitectural features, so use this at your own
# risk. It is **your** responsibility to understand what you are modeling and
# simulating!
##############################################################################################
# Global settings about the system and the simulation.
system_cfg {
seed = 1 # Random number generator seed
num_cores = 1 # Number of cores in the system.
heartbeat_interval = 10000 # Print out simulator heartbeat every x cycles.
ztrace_file_prefix = "ztrace" # Zesto trace filename prefix.
simulate_power = false # Simulate power.
power_rtp_interval = 0 # uncore cycles between power computations.
cache_miss_sample_parameter = 0 # Interval between sampling cache misses.
power_rtp_file = "" # Runtime power file.
output_redir = "sim.out" # Redirect simulator output.
dvfs_cfg {
# DVFS controller configuration.
config = "none"
# Re-evaluate voltage/freq choice every X cycles.
interval = 0
}
# OS scheduler and core allocator.
scheduler_cfg {
scheduler_tick = 0 # Scheduler refresh in cycles.
allocator = "gang:1" # Core allocation algorithm.
allocator_opt_target = "throughput" # Core allocation optimization target.
speedup_model = "linear" # Core allocation speedup model.
}
profiling_cfg {
# file with profiling results
file_prefix = ""
# symbol/instruction to start profiling (format is symbol_name(+offset))
start = {}
# symbol/instruction to stop profiling (if empty, exit points of @profiling_start)
stop = {}
}
ignore_cfg {
# Names of functions to replace.
funcs = {}
# Individual instructions to ignore. Format is either an exact PC in hex or
# symbol_name(+offset), like the profiling start parameters.
pcs = {}
}
}
# Core configuration.
core_cfg {
# Pipeline model.
pipeline_model = "DPM"
# CPU clock frequency
core_clock = 3200.0
# Instruction fetch settings.
fetch_cfg {
# Size of instruction queue (macro ops), placed between predecode and
# decode.
instruction_queue_size = 18
# Caches consist of the cache itself, a TLB, a prefetcher, and a coherency
# controller.
icache_cfg icache {
# General cache settings - size, associativity, line size, etc.
config = "IL1:128:4:64:4:64:2:C:8"
# Cache coherency controller configuration.
coherency_controller = "none"
# Enable cache miss sampling.
sample_misses = false
iprefetch_cfg inst_pf {
config = {"nextline"} # 1st-level icache prefetcher configuration
on_miss_only = true # icache prefetch on miss only
fifosize = 8 # Prefetch FIFO size (TODO: units?)
buffer = 0 # Prefetch buffer size.
filter = 0 # Prefetch filter size.
filter_reset = 65536 # Prefetch filter reset interval (cycles).
# Prefetch threshold - only prefetch if MSHR occupancy is less than
# this.
threshold = 4
# Maximum instruction prefetch requests in the MSHR
max_outstanding_requests = 2
# Sampling interval (cycles) for prefetch control. 0 = no PF controller.
watermark_sampling_interval = 100
# Minimum watermark - always prefetch if lower than this.
watermark_min = 0.1
# Maximum watermark - never prefetch if above this.
watermark_max = 0.3
}
itlb_cfg itlb {
# Instruction ITLB configuration.
config = "ITLB:128:4:1:2:L:5"
# Coherency controller.
coherency_controller = "none"
}
}
branch_pred_cfg {
# bpred configuration(s)
type = {"tage:TAGE5:5:2048:512:9:6:75"}
# fusion algorithm for hybrid 2nd-level bpred
fusion = "none"
# branch target buffer configuration
btb = "btac:BTB:512:4:8:l"
# indirect branch target buffer configuration
ibtb = "2levbtac:iBTB:1:8:1:128:4:8:l"
# return address stack predictor configuration
ras = "multistack:RAS:8:8"
# additional latency from branch-exec to jeclear
jump_exec_delay = 1
}
byte_queue_cfg {
# Number of entries.
size = 3
# Bytes per line.
line_size = 16
}
predecode_cfg {
# Number of stages in the predecode pipe.
depth = 2
# Width of predecode pipeline (macro-ops)
width = 6
}
}
decode_cfg {
# Pipeline depth in stages.
depth = 2
# Width of pipeline in macro-ops.
width = 4
# stage of branch agen ("targetstage").
branch_agen_stage = 1
# Maximum branches decoded per cycle.
branch_decode_limit = 1
# maximum uops generated for each decoder (e.g., 4 1 1)
decoder_max_uops = {4, 1, 1, 1}
# Latency to access micro-code sequencer.
ucode_sequencer_latency = 0
# Number of entries in uop queue.
uop_queue_size = 24
# Enable/disable uop fusion rules.
uop_fusion_cfg {
# Fuse the load op with the next computation op.
load_comp_op = true
# Fuse the load op with the next fp op.
fpload_comp_op = true
# Store address generate - store op.
sta_std = true
# Load-store op fusion.
load_op_store = false
}
}
# Alloc = dispatch
alloc_cfg {
# Pipeline depth (stages).
depth = 1
# Pipeline width (uops).
width = 4
# use drain-flush after misprediction
use_drain_flush = true
}
exec_cfg {
# Maximum issues from RS per cycle (equal to num exec ports).
width = 6
# Number of cycles for payload RAM access (schedule to exec delay).
payload_depth = 2
# Enable heuristic tornado breaker.
enable_tornado_breaker = true
# Enable load issue throttling on partial matches.
enable_partial_throttle = true
# Latency to forward results to FP cluster (cycles).
fp_forward_penalty = 0
# Memory dependence predictor configuration.
mem_dep_pred_config = "lwt:LWT:8192:999999"
# Number of reservation station entries.
rs_size = 36
# Number of load queue entries.
loadq_size = 36
# Number of store queue entries.
storeq_size = 24
dcache_cfg dcache {
config = "DL1:64:8:64:8:64:2:C:W:B:16:8:C"
mshr_cmd = "RWPB"
coherency_controller = "none"
sample_misses = false
dtlb_cfg dtlb {
config = "DTLB:256:4:1:2:L:8"
coherency_controller = "none"
}
d2tlb_cfg d2tlb {
config = "none"
coherency_controller = "none"
}
dprefetch_cfg data_pf {
# 1st-level dcache prefetcher configuration
config = {"IP:256:12:13:6", "nextline"}
on_miss_only = true # dcache prefetch on miss only
fifosize = 8 # Prefetch FIFO size (TODO: units?)
buffer = 0 # Prefetch buffer size.
filter = 0 # Prefetch filter size.
filter_reset = 65536 # Prefetch filter reset interval (cycles).
threshold = 4 # Prefetch threshold.
# Maximum instruction prefetch requests in the MSHR
max_outstanding_requests = 2
# Sampling interval (cycles) for prefetch control. 0 = no PF controller.
watermark_sampling_interval = 100
# Minimum watermark - always prefetch if lower than this.
watermark_min = 0.1
# Maximum watermark - never prefetch if above this.
watermark_max = 0.3
}
}
l2cache_cfg L2 {
config = "DL2:512:8:64:8:64:2:C:W:B:16:8:C"
mshr_cmd = "RPWB"
coherency_controller = "const:75"
sample_misses = false
l2prefetch_cfg l2_pf {
config = {"IP:256:12:13:6", "nextline"}
on_miss_only = true # dcache prefetch on miss only
fifosize = 8 # Prefetch FIFO size (TODO: units?)
buffer = 0 # Prefetch buffer size.
filter = 0 # Prefetch filter size.
filter_reset = 65536 # Prefetch filter reset interval (cycles).
threshold = 4 # Prefetch threshold.
# Maximum instruction prefetch requests in the MSHR
max_outstanding_requests = 2
# Sampling interval (cycles) for prefetch control. 0 = no PF controller.
watermark_sampling_interval = 100
# Minimum watermark - always prefetch if lower than this.
watermark_min = 0.1
# Maximum watermark - never prefetch if above this.
watermark_max = 0.3
}
}
# RingCache settings.
repeater_cfg {
# RingCache configuration (originally in zesto-repeater).
config = "none"
# Send request to L1 in parallel with the repeater.
request_dl1 = false
}
exeu int_alu {
latency = 1 # Execution latency.
rate = 1 # Issue rate.
port_binding = {0, 1, 5} # Port bindings.
}
exeu jump {
latency = 1
rate = 1
port_binding = {5}
}
exeu int_mul {
latency = 3
rate = 1
port_binding = {1}
}
exeu int_div {
latency = 24
rate = 16
port_binding = {0}
}
exeu shift {
latency = 1
rate = 1
port_binding = {0, 5}
}
exeu fp_alu {
latency = 3
rate = 1
port_binding = {1}
}
exeu fp_mul {
latency = 5
rate = 2
port_binding = {0}
}
exeu fp_div {
latency = 32
rate = 32
port_binding = {0}
}
exeu fp_cplx {
latency = 58
rate = 58
port_binding = {0}
}
exeu ld {
latency = 1
rate = 1
port_binding = {2}
}
exeu st_agen {
latency = 1
rate = 1
port_binding = {3}
}
exeu st_data {
latency = 1
rate = 1
port_binding = {4}
}
# LEA = load effective address.
exeu lea {
latency = 1
rate = 1
port_binding = {1}
}
exeu magic {
latency = 1
rate = 1
port_binding = {0}
}
}
# Commit stage.
commit_cfg {
rob_size = 128 # Number of ROB entries.
commit_width = 4 # Maximum uops committed per cycle.
commit_branches = 0 # Maximum branches committed per cycle.
}
} # End of core cfg.
# Last level cache, FSB, DRAM, etc.
uncore_cfg {
llccache_cfg llc {
# General cache settings - size, associativity, line size, etc.
config = "LLC:8192:16:64:16:64:9:L:W:B:16:1:8:C"
# Cache coherency controller configuration.
coherency_controller = "const:75"
mshr_cmd = "RPWB" # MSHR configuration.
clock = 1600 # Cache clock frequency (MHz).
sample_misses = false
llcprefetch_cfg llc_pf {
config = {"IP:256:12:13:6 stream:12:4"} # last-level cache prefetcher configuration
on_miss_only = false # LLC prefetch on miss only
fifosize = 8 # Prefetch FIFO size (TODO: units?)
buffer = 0 # Prefetch buffer size.
filter = 0 # Prefetch filter size.
filter_reset = 65536 # Prefetch filter reset interval (cycles).
# Prefetch threshold - only prefetch if MSHR occupancy is less than
# this.
threshold = 4
# Maximum instruction prefetch requests in the MSHR
max_outstanding_requests = 2
# Sampling interval (cycles) for prefetch control. 0 = no PF controller.
watermark_sampling_interval = 2000
# Minimum watermark - always prefetch if lower than this.
watermark_min = 0.1
# Maximum watermark - never prefetch if above this.
watermark_max = 0.4
}
}
fsb_cfg {
width = 8 # FSB bus width (Bytes).
ddr = true # FSB double pumped data.
clock = 800.0 # FSB bus clock frequency (MHz).
magic = false # FSB unlimited bandwdidth.
}
dram_cfg {
memory_controller_config = "simple:16:1"
dram_config = "simplesdram:4:4:35:11.25:11.25:11.25:11.25:64"
# Based on Samsung K4B510446E-ZCH0
# 512-Mb, DDR3-1600 9-9-9
#
# t_RAS = 45.0ns
# t_RCD = 15.0ns
# t_CAS = 15.0ns
# t_WR = 15.0ns
# t_RP = 15.0ns
}
} # End of uncore configs.