Permalink
Browse files

Merge branch 'devel'

* devel: (33 commits)
  edac i5000, i5400: fix pointer math in i5000_get_mc_regs()
  edac: allow specifying the error count with fake_inject
  edac: add support for Calxeda highbank L2 cache ecc
  edac: add support for Calxeda highbank memory controller
  edac: create top-level debugfs directory
  sb_edac: properly handle error count
  i7core_edac: properly handle error count
  edac: edac_mc_handle_error(): add an error_count parameter
  edac: remove arch-specific parameter for the error handler
  amd64_edac: Don't pass driver name as an error parameter
  edac_mc: check for allocation failure in edac_mc_alloc()
  edac: Increase version to 3.0.0
  edac_mc: Cleanup per-dimm_info debug messages
  edac: Convert debugfX to edac_dbg(X,
  edac: Use more normal debugging macro style
  edac: Don't add __func__ or __FILE__ for debugf[0-9] msgs
  Edac: Add ABI Documentation for the new device nodes
  edac: move documentation ABI to ABI/testing/sysfs-devices-edac
  i7core_edac: change the mem allocation scheme to make Documentation/kobject.txt happy
  edac: change the mem allocation scheme to make Documentation/kobject.txt happy
  ...
  • Loading branch information...
2 parents 73bcc49 + f58d0de commit c2078e4c9120e7b38b1a02cd9fc6dd4f792110bf Mauro Carvalho Chehab committed Jul 30, 2012
Showing with 3,471 additions and 2,555 deletions.
  1. +140 −0 Documentation/ABI/testing/sysfs-devices-edac
  2. +15 −0 Documentation/devicetree/bindings/arm/calxeda/l2ecc.txt
  3. +14 −0 Documentation/devicetree/bindings/arm/calxeda/mem-ctrlr.txt
  4. +8 −104 Documentation/edac.txt
  5. +12 −0 arch/arm/boot/dts/highbank.dts
  6. +23 −1 drivers/edac/Kconfig
  7. +3 −0 drivers/edac/Makefile
  8. +187 −189 drivers/edac/amd64_edac.c
  9. +22 −7 drivers/edac/amd64_edac.h
  10. +45 −44 drivers/edac/amd64_edac_dbg.c
  11. +78 −56 drivers/edac/amd64_edac_inj.c
  12. +17 −17 drivers/edac/amd76x_edac.c
  13. +14 −14 drivers/edac/cell_edac.c
  14. +47 −49 drivers/edac/cpc925_edac.c
  15. +46 −46 drivers/edac/e752x_edac.c
  16. +45 −44 drivers/edac/e7xxx_edac.c
  17. +17 −22 drivers/edac/edac_core.h
  18. +22 −25 drivers/edac/edac_device.c
  19. +32 −39 drivers/edac/edac_device_sysfs.c
  20. +241 −154 drivers/edac/edac_mc.c
  21. +712 −643 drivers/edac/edac_mc_sysfs.c
  22. +8 −12 drivers/edac/edac_module.c
  23. +22 −4 drivers/edac/edac_module.h
  24. +13 −13 drivers/edac/edac_pci.c
  25. +24 −25 drivers/edac/edac_pci_sysfs.c
  26. +149 −0 drivers/edac/highbank_l2_edac.c
  27. +264 −0 drivers/edac/highbank_mc_edac.c
  28. +23 −24 drivers/edac/i3000_edac.c
  29. +24 −24 drivers/edac/i3200_edac.c
  30. +100 −107 drivers/edac/i5000_edac.c
  31. +7 −7 drivers/edac/i5100_edac.c
  32. +97 −104 drivers/edac/i5400_edac.c
  33. +79 −94 drivers/edac/i7300_edac.c
  34. +297 −223 drivers/edac/i7core_edac.c
  35. +24 −27 drivers/edac/i82443bxgx_edac.c
  36. +22 −23 drivers/edac/i82860_edac.c
  37. +24 −29 drivers/edac/i82875p_edac.c
  38. +27 −28 drivers/edac/i82975x_edac.c
  39. +74 −57 drivers/edac/mpc85xx_edac.c
  40. +20 −20 drivers/edac/mv64x60_edac.c
  41. +11 −11 drivers/edac/pasemi_edac.c
  42. +8 −8 drivers/edac/ppc4xx_edac.c
  43. +23 −25 drivers/edac/r82600_edac.c
  44. +126 −131 drivers/edac/sb_edac.c
  45. +6 −6 drivers/edac/tile_edac.c
  46. +24 −24 drivers/edac/x38_edac.c
  47. +133 −75 include/linux/edac.h
  48. +102 −0 include/ras/ras_event.h
@@ -0,0 +1,140 @@
+What: /sys/devices/system/edac/mc/mc*/reset_counters
+Date: January 2006
+Contact: linux-edac@vger.kernel.org
+Description: This write-only control file will zero all the statistical
+ counters for UE and CE errors on the given memory controller.
+ Zeroing the counters will also reset the timer indicating how
+ long since the last counter were reset. This is useful for
+ computing errors/time. Since the counters are always reset
+ at driver initialization time, no module/kernel parameter
+ is available.
+
+What: /sys/devices/system/edac/mc/mc*/seconds_since_reset
+Date: January 2006
+Contact: linux-edac@vger.kernel.org
+Description: This attribute file displays how many seconds have elapsed
+ since the last counter reset. This can be used with the error
+ counters to measure error rates.
+
+What: /sys/devices/system/edac/mc/mc*/mc_name
+Date: January 2006
+Contact: linux-edac@vger.kernel.org
+Description: This attribute file displays the type of memory controller
+ that is being utilized.
+
+What: /sys/devices/system/edac/mc/mc*/size_mb
+Date: January 2006
+Contact: linux-edac@vger.kernel.org
+Description: This attribute file displays, in count of megabytes, of memory
+ that this memory controller manages.
+
+What: /sys/devices/system/edac/mc/mc*/ue_count
+Date: January 2006
+Contact: linux-edac@vger.kernel.org
+Description: This attribute file displays the total count of uncorrectable
+ errors that have occurred on this memory controller. If
+ panic_on_ue is set, this counter will not have a chance to
+ increment, since EDAC will panic the system
+
+What: /sys/devices/system/edac/mc/mc*/ue_noinfo_count
+Date: January 2006
+Contact: linux-edac@vger.kernel.org
+Description: This attribute file displays the number of UEs that have
+ occurred on this memory controller with no information as to
+ which DIMM slot is having errors.
+
+What: /sys/devices/system/edac/mc/mc*/ce_count
+Date: January 2006
+Contact: linux-edac@vger.kernel.org
+Description: This attribute file displays the total count of correctable
+ errors that have occurred on this memory controller. This
+ count is very important to examine. CEs provide early
+ indications that a DIMM is beginning to fail. This count
+ field should be monitored for non-zero values and report
+ such information to the system administrator.
+
+What: /sys/devices/system/edac/mc/mc*/ce_noinfo_count
+Date: January 2006
+Contact: linux-edac@vger.kernel.org
+Description: This attribute file displays the number of CEs that
+ have occurred on this memory controller wherewith no
+ information as to which DIMM slot is having errors. Memory is
+ handicapped, but operational, yet no information is available
+ to indicate which slot the failing memory is in. This count
+ field should be also be monitored for non-zero values.
+
+What: /sys/devices/system/edac/mc/mc*/sdram_scrub_rate
+Date: February 2007
+Contact: linux-edac@vger.kernel.org
+Description: Read/Write attribute file that controls memory scrubbing.
+ The scrubbing rate used by the memory controller is set by
+ writing a minimum bandwidth in bytes/sec to the attribute file.
+ The rate will be translated to an internal value that gives at
+ least the specified rate.
+ Reading the file will return the actual scrubbing rate employed.
+ If configuration fails or memory scrubbing is not implemented,
+ the value of the attribute file will be -1.
+
+What: /sys/devices/system/edac/mc/mc*/max_location
+Date: April 2012
+Contact: Mauro Carvalho Chehab <mchehab@redhat.com>
+ linux-edac@vger.kernel.org
+Description: This attribute file displays the information about the last
+ available memory slot in this memory controller. It is used by
+ userspace tools in order to display the memory filling layout.
+
+What: /sys/devices/system/edac/mc/mc*/(dimm|rank)*/size
+Date: April 2012
+Contact: Mauro Carvalho Chehab <mchehab@redhat.com>
+ linux-edac@vger.kernel.org
+Description: This attribute file will display the size of dimm or rank.
+ For dimm*/size, this is the size, in MB of the DIMM memory
+ stick. For rank*/size, this is the size, in MB for one rank
+ of the DIMM memory stick. On single rank memories (1R), this
+ is also the total size of the dimm. On dual rank (2R) memories,
+ this is half the size of the total DIMM memories.
+
+What: /sys/devices/system/edac/mc/mc*/(dimm|rank)*/dimm_dev_type
+Date: April 2012
+Contact: Mauro Carvalho Chehab <mchehab@redhat.com>
+ linux-edac@vger.kernel.org
+Description: This attribute file will display what type of DRAM device is
+ being utilized on this DIMM (x1, x2, x4, x8, ...).
+
+What: /sys/devices/system/edac/mc/mc*/(dimm|rank)*/dimm_edac_mode
+Date: April 2012
+Contact: Mauro Carvalho Chehab <mchehab@redhat.com>
+ linux-edac@vger.kernel.org
+Description: This attribute file will display what type of Error detection
+ and correction is being utilized. For example: S4ECD4ED would
+ mean a Chipkill with x4 DRAM.
+
+What: /sys/devices/system/edac/mc/mc*/(dimm|rank)*/dimm_label
+Date: April 2012
+Contact: Mauro Carvalho Chehab <mchehab@redhat.com>
+ linux-edac@vger.kernel.org
+Description: This control file allows this DIMM to have a label assigned
+ to it. With this label in the module, when errors occur
+ the output can provide the DIMM label in the system log.
+ This becomes vital for panic events to isolate the
+ cause of the UE event.
+ DIMM Labels must be assigned after booting, with information
+ that correctly identifies the physical slot with its
+ silk screen label. This information is currently very
+ motherboard specific and determination of this information
+ must occur in userland at this time.
+
+What: /sys/devices/system/edac/mc/mc*/(dimm|rank)*/dimm_location
+Date: April 2012
+Contact: Mauro Carvalho Chehab <mchehab@redhat.com>
+ linux-edac@vger.kernel.org
+Description: This attribute file will display the location (csrow/channel,
+ branch/channel/slot or channel/slot) of the dimm or rank.
+
+What: /sys/devices/system/edac/mc/mc*/(dimm|rank)*/dimm_mem_type
+Date: April 2012
+Contact: Mauro Carvalho Chehab <mchehab@redhat.com>
+ linux-edac@vger.kernel.org
+Description: This attribute file will display what type of memory is
+ currently on this csrow. Normally, either buffered or
+ unbuffered memory (for example, Unbuffered-DDR3).
@@ -0,0 +1,15 @@
+Calxeda Highbank L2 cache ECC
+
+Properties:
+- compatible : Should be "calxeda,hb-sregs-l2-ecc"
+- reg : Address and size for ECC error interrupt clear registers.
+- interrupts : Should be single bit error interrupt, then double bit error
+ interrupt.
+
+Example:
+
+ sregs@fff3c200 {
+ compatible = "calxeda,hb-sregs-l2-ecc";
+ reg = <0xfff3c200 0x100>;
+ interrupts = <0 71 4 0 72 4>;
+ };
@@ -0,0 +1,14 @@
+Calxeda DDR memory controller
+
+Properties:
+- compatible : Should be "calxeda,hb-ddr-ctrl"
+- reg : Address and size for DDR controller registers.
+- interrupts : Interrupt for DDR controller.
+
+Example:
+
+ memory-controller@fff00000 {
+ compatible = "calxeda,hb-ddr-ctrl";
+ reg = <0xfff00000 0x1000>;
+ interrupts = <0 91 4>;
+ };
View
@@ -232,116 +232,20 @@ EDAC control and attribute files.
In 'mcX' directories are EDAC control and attribute files for
-this 'X' instance of the memory controllers:
-
-
-Counter reset control file:
-
- 'reset_counters'
-
- This write-only control file will zero all the statistical counters
- for UE and CE errors. Zeroing the counters will also reset the timer
- indicating how long since the last counter zero. This is useful
- for computing errors/time. Since the counters are always reset at
- driver initialization time, no module/kernel parameter is available.
-
- RUN TIME: echo "anything" >/sys/devices/system/edac/mc/mc0/counter_reset
-
- This resets the counters on memory controller 0
-
-
-Seconds since last counter reset control file:
-
- 'seconds_since_reset'
-
- This attribute file displays how many seconds have elapsed since the
- last counter reset. This can be used with the error counters to
- measure error rates.
-
-
-
-Memory Controller name attribute file:
-
- 'mc_name'
-
- This attribute file displays the type of memory controller
- that is being utilized.
-
-
-Total memory managed by this memory controller attribute file:
-
- 'size_mb'
-
- This attribute file displays, in count of megabytes, of memory
- that this instance of memory controller manages.
-
-
-Total Uncorrectable Errors count attribute file:
-
- 'ue_count'
-
- This attribute file displays the total count of uncorrectable
- errors that have occurred on this memory controller. If panic_on_ue
- is set this counter will not have a chance to increment,
- since EDAC will panic the system.
-
-
-Total UE count that had no information attribute fileY:
-
- 'ue_noinfo_count'
-
- This attribute file displays the number of UEs that have occurred
- with no information as to which DIMM slot is having errors.
-
-
-Total Correctable Errors count attribute file:
-
- 'ce_count'
-
- This attribute file displays the total count of correctable
- errors that have occurred on this memory controller. This
- count is very important to examine. CEs provide early
- indications that a DIMM is beginning to fail. This count
- field should be monitored for non-zero values and report
- such information to the system administrator.
-
-
-Total Correctable Errors count attribute file:
-
- 'ce_noinfo_count'
-
- This attribute file displays the number of CEs that
- have occurred wherewith no information as to which DIMM slot
- is having errors. Memory is handicapped, but operational,
- yet no information is available to indicate which slot
- the failing memory is in. This count field should be also
- be monitored for non-zero values.
-
-Device Symlink:
-
- 'device'
-
- Symlink to the memory controller device.
-
-Sdram memory scrubbing rate:
-
- 'sdram_scrub_rate'
-
- Read/Write attribute file that controls memory scrubbing. The scrubbing
- rate is set by writing a minimum bandwidth in bytes/sec to the attribute
- file. The rate will be translated to an internal value that gives at
- least the specified rate.
-
- Reading the file will return the actual scrubbing rate employed.
-
- If configuration fails or memory scrubbing is not implemented, accessing
- that attribute will fail.
+this 'X' instance of the memory controllers.
+For a description of the sysfs API, please see:
+ Documentation/ABI/testing/sysfs/devices-edac
============================================================================
'csrowX' DIRECTORIES
+When CONFIG_EDAC_LEGACY_SYSFS is enabled, the sysfs will contain the
+csrowX directories. As this API doesn't work properly for Rambus, FB-DIMMs
+and modern Intel Memory Controllers, this is being deprecated in favor
+of dimmX directories.
+
In the 'csrowX' directories are EDAC control and attribute files for
this 'X' instance of csrow:
@@ -118,6 +118,12 @@
interrupts = <0 90 4>;
};
+ memory-controller@fff00000 {
+ compatible = "calxeda,hb-ddr-ctrl";
+ reg = <0xfff00000 0x1000>;
+ interrupts = <0 91 4>;
+ };
+
ipc@fff20000 {
compatible = "arm,pl320", "arm,primecell";
reg = <0xfff20000 0x1000>;
@@ -188,6 +194,12 @@
reg = <0xfff3c000 0x1000>;
};
+ sregs@fff3c200 {
+ compatible = "calxeda,hb-sregs-l2-ecc";
+ reg = <0xfff3c200 0x100>;
+ interrupts = <0 71 4 0 72 4>;
+ };
+
dma@fff3d000 {
compatible = "arm,pl330", "arm,primecell";
reg = <0xfff3d000 0x1000>;
View
@@ -7,7 +7,7 @@
menuconfig EDAC
bool "EDAC (Error Detection And Correction) reporting"
depends on HAS_IOMEM
- depends on X86 || PPC || TILE
+ depends on X86 || PPC || TILE || ARM
help
EDAC is designed to report errors in the core system.
These are low-level errors that are reported in the CPU or
@@ -31,6 +31,14 @@ if EDAC
comment "Reporting subsystems"
+config EDAC_LEGACY_SYSFS
+ bool "EDAC legacy sysfs"
+ default y
+ help
+ Enable the compatibility sysfs nodes.
+ Use 'Y' if your edac utilities aren't ported to work with the newer
+ structures.
+
config EDAC_DEBUG
bool "Debugging"
help
@@ -294,4 +302,18 @@ config EDAC_TILE
Support for error detection and correction on the
Tilera memory controller.
+config EDAC_HIGHBANK_MC
+ tristate "Highbank Memory Controller"
+ depends on EDAC_MM_EDAC && ARCH_HIGHBANK
+ help
+ Support for error detection and correction on the
+ Calxeda Highbank memory controller.
+
+config EDAC_HIGHBANK_L2
+ tristate "Highbank L2 Cache"
+ depends on EDAC_MM_EDAC && ARCH_HIGHBANK
+ help
+ Support for error detection and correction on the
+ Calxeda Highbank memory controller.
+
endif # EDAC
View
@@ -55,3 +55,6 @@ obj-$(CONFIG_EDAC_AMD8111) += amd8111_edac.o
obj-$(CONFIG_EDAC_AMD8131) += amd8131_edac.o
obj-$(CONFIG_EDAC_TILE) += tile_edac.o
+
+obj-$(CONFIG_EDAC_HIGHBANK_MC) += highbank_mc_edac.o
+obj-$(CONFIG_EDAC_HIGHBANK_L2) += highbank_l2_edac.o
Oops, something went wrong.

0 comments on commit c2078e4

Please sign in to comment.