OS-2456 sd timeout/retry settings are absurd

OS-2457 mptsas timeout subsystem needs finer granularity
Keith M Wesolowski committed Sep 4, 2013
1 parent 794fcf5 commit e07b2d7fd21c58c9a366d7a5ae040a212f850af5
* automatically. See: usr/src/uts/common/io/sata/impl/sata.c:97
set sata:sata_auto_online=1

# We want to limit the time spent in any one I/O to 10 seconds for targets
# that are not optical. This is still a very long time; our queue depth is
# typically 10 or less, and disks will usually fail a command after 2-3s.
# So we'd have to have multiple reads of bad sectors queued up to have any
# chance of timing out. In practice, timeouts occur because of problems with
# disk controllers or firmware, not media errors, and in those cases it will
# not help at all to wait longer.
set sd:sd_io_time=10
# Command/target timeout checking should be done at a 1-second granularity.
# the controller will switch to write-through mode, and ensure that any
# underlying drive cache is off. In this case, it should still be safe to
# dispense with cache flush commands. Controllers for which this is not the
# case should not be added here unless data loss and corruption are acceptable.
# case should have cache-nonvolatile set unless data loss and corruption are
# acceptable.
# In addition, *all* devices have their retries capped at 1. There are an
# additional 2 retries for "victim" IOs if a reset is needed. Retrying is
# very rarely successful, and it is preferable to let ZFS do it where needed.
"", "retries-timeout:1,retries-busy:1,retries-reset:1,retries-victim:2",
"DELL PERC H710", "cache-nonvolatile:true",
"DELL PERC H700", "cache-nonvolatile:true",
"DELL PERC/6i", "cache-nonvolatile:true";
f etc/resolv.conf 0644 netadm netadm
d kernel/drv 0755 root sys
f kernel/drv/cpqary3.conf 0644 root sys
f kernel/drv/mpt_sas.conf 0644 root sys
f kernel/drv/sd.conf 0644 root sys
f kernel/drv/amd64/cpqary3 0755 root sys
f kernel/drv/amd64/bnx 0755 root sys

