Permalink
Browse files

OS-2456 sd timeout/retry settings are absurd

OS-2457 mptsas timeout subsystem needs finer granularity
  • Loading branch information...
wesolows committed Aug 29, 2013
1 parent 794fcf5 commit e07b2d7fd21c58c9a366d7a5ae040a212f850af5
View
@@ -144,3 +144,14 @@ set dump_metrics_on=1
* automatically. See: usr/src/uts/common/io/sata/impl/sata.c:97
*
set sata:sata_auto_online=1
+
+#
+# We want to limit the time spent in any one I/O to 10 seconds for targets
+# that are not optical. This is still a very long time; our queue depth is
+# typically 10 or less, and disks will usually fail a command after 2-3s.
+# So we'd have to have multiple reads of bad sectors queued up to have any
+# chance of timing out. In practice, timeouts occur because of problems with
+# disk controllers or firmware, not media errors, and in those cases it will
+# not help at all to wait longer.
+#
+set sd:sd_io_time=10
@@ -0,0 +1,7 @@
+ddi-vhci-class="scsi_vhci";
+mpxio-disable="no";
+
+#
+# Command/target timeout checking should be done at a 1-second granularity.
+#
+scsi-watchdog-tick=1;
@@ -60,9 +60,15 @@ ddi-devid-registrant=1;
# the controller will switch to write-through mode, and ensure that any
# underlying drive cache is off. In this case, it should still be safe to
# dispense with cache flush commands. Controllers for which this is not the
-# case should not be added here unless data loss and corruption are acceptable.
+# case should have cache-nonvolatile set unless data loss and corruption are
+# acceptable.
+#
+# In addition, *all* devices have their retries capped at 1. There are an
+# additional 2 retries for "victim" IOs if a reset is needed. Retrying is
+# very rarely successful, and it is preferable to let ZFS do it where needed.
#
sd-config-list=
+ "", "retries-timeout:1,retries-busy:1,retries-reset:1,retries-victim:2",
"DELL PERC H710", "cache-nonvolatile:true",
"DELL PERC H700", "cache-nonvolatile:true",
"DELL PERC/6i", "cache-nonvolatile:true";
View
@@ -60,6 +60,7 @@ f etc/zones/SUNWdefault.xml 0444 root bin
f etc/resolv.conf 0644 netadm netadm
d kernel/drv 0755 root sys
f kernel/drv/cpqary3.conf 0644 root sys
+f kernel/drv/mpt_sas.conf 0644 root sys
f kernel/drv/sd.conf 0644 root sys
f kernel/drv/amd64/cpqary3 0755 root sys
f kernel/drv/amd64/bnx 0755 root sys

0 comments on commit e07b2d7

Please sign in to comment.